"""Tests for GraphBuilder + InMemoryGraphDB.""" from __future__ import annotations import math import pytest from aucourt_ingest.models import CaseMeta, Chunk, Verdict, MatterType from aucourt_ingest.processing.graph_builder import ( GraphBuilder, _cosine_similarity, CORROBORATE_THRESHOLD, CONTRADICT_THRESHOLD, ) from aucourt_ingest.storage.in_memory_graph_db import InMemoryGraphDB from aucourt_ingest.storage.graph_db import _node_id, _rel_id def _make_meta( mnc="[2019] NSWSC 1234", court="NSWSC", judges=None, charges=None, inadmissible=None, is_appeal=False, appeal_of="", verdict=Verdict.GUILTY, exoneration_flag=False, ) -> CaseMeta: return CaseMeta( case_name=f"Test v State ({mnc})", mnc=mnc, court=court, judge=judges if judges is not None else ["Judge Smith"], charges=charges if charges is not None else ["murder"], inadmissible_evidence=inadmissible or [], is_appeal=is_appeal, appeal_of=appeal_of, verdict=verdict, exoneration_flag=exoneration_flag, matter_type=MatterType.CRIMINAL, date_delivered="2019-06-15", jurisdiction="NSW", ) def _make_chunk(chunk_id="c0", chunk_type="testimony", sequence=0, text="Some text", embedding=None, doc_id="[2019] NSWSC 1") -> Chunk: return Chunk( chunk_id=chunk_id, doc_id=doc_id, chunk_type=chunk_type, sequence=sequence, text=text, token_count=50, embedding=embedding, ) # ── Cosine similarity ── class TestCosineSimilarity: def test_identical_vectors(self): v = [0.5, 0.3, 0.8, 0.1] assert _cosine_similarity(v, v) == pytest.approx(1.0) def test_orthogonal_vectors(self): assert _cosine_similarity([1, 0], [0, 1]) == pytest.approx(0.0) def test_opposite_vectors(self): assert _cosine_similarity([1, 0], [-1, 0]) == pytest.approx(-1.0) def test_empty_vectors(self): assert _cosine_similarity([], []) == 0.0 def test_zero_vectors(self): assert _cosine_similarity([0, 0], [1, 0]) == 0.0 def test_unequal_lengths(self): assert _cosine_similarity([1, 2], [1, 2, 3]) == 0.0 def test_mismatched_lengths(self): assert _cosine_similarity([1, 2], []) == 0.0 # ── _node_id / _rel_id helpers ── class TestHelperFunctions: def test_node_id_mnc(self): assert _node_id("Case", {"mnc": "[2019] NSWSC 1234"}) == "Case:[2019] NSWSC 1234" def test_node_id_id(self): assert _node_id("Judge", {"id": "j1"}) == "Judge:j1" def test_node_id_hash_deterministic(self): props = {"name": "Smith", "court": "NSWSC"} a = _node_id("Judge", props) b = _node_id("Judge", props) assert a == b assert a.startswith("Judge:") assert len(a) > len("Judge:") def test_rel_id(self): rid = _rel_id("from123", "to456", "HEARD_BY") assert rid == "from123-[HEARD_BY]->to456" # ── InMemoryGraphDB ── class TestInMemoryGraphDB: @pytest.fixture def db(self): return InMemoryGraphDB() @pytest.mark.asyncio async def test_create_and_get_node(self, db): await db.create_node("Case", {"mnc": "[2019] NSWSC 1"}) node = await db.get_node("Case:[2019] NSWSC 1") assert node is not None assert node.label == "Case" assert node.properties["mnc"] == "[2019] NSWSC 1" @pytest.mark.asyncio async def test_create_node_dedup(self, db): id1 = await db.create_node("Case", {"mnc": "[2019] NSWSC 1"}) id2 = await db.create_node("Case", {"mnc": "[2019] NSWSC 1"}) assert id1 == id2 assert await db.node_count("Case") == 1 @pytest.mark.asyncio async def test_node_count_by_label(self, db): await db.create_node("Case", {"mnc": "[2019] NSWSC 1"}) await db.create_node("Case", {"mnc": "[2020] NSWSC 2"}) await db.create_node("Judge", {"name": "Smith"}) assert await db.node_count("Case") == 2 assert await db.node_count("Judge") == 1 assert await db.node_count() == 3 @pytest.mark.asyncio async def test_query_nodes(self, db): await db.create_node("Case", {"mnc": "[2019] NSWSC 1", "court": "NSWSC"}) await db.create_node("Case", {"mnc": "[2020] VSC 2", "court": "VSC"}) results = await db.query_nodes("Case", {"court": "NSWSC"}) assert len(results) == 1 assert results[0].properties["mnc"] == "[2019] NSWSC 1" @pytest.mark.asyncio async def test_relationship(self, db): case_id = await db.create_node("Case", {"mnc": "[2019] NSWSC 1"}) judge_id = await db.create_node("Judge", {"name": "Smith"}) await db.create_relationship(case_id, judge_id, "HEARD_BY") rels = await db.get_relationships(case_id, "HEARD_BY") assert len(rels) == 1 assert rels[0].to_id == judge_id assert rels[0].rel_type == "HEARD_BY" @pytest.mark.asyncio async def test_relationship_count(self, db): c1 = await db.create_node("Case", {"mnc": "[2019] NSWSC 1"}) j1 = await db.create_node("Judge", {"name": "Smith"}) j2 = await db.create_node("Judge", {"name": "Jones"}) await db.create_relationship(c1, j1, "HEARD_BY") await db.create_relationship(c1, j2, "HEARD_BY") assert await db.relationship_count("HEARD_BY") == 2 assert await db.relationship_count() == 2 @pytest.mark.asyncio async def test_neighbors(self, db): c1 = await db.create_node("Case", {"mnc": "[2019] NSWSC 1"}) j1 = await db.create_node("Judge", {"name": "Smith"}) await db.create_relationship(c1, j1, "HEARD_BY") nbrs = await db.neighbors(c1, "HEARD_BY") assert nbrs == [j1] @pytest.mark.asyncio async def test_close_clears(self, db): await db.create_node("Case", {"mnc": "[2019] NSWSC 1"}) await db.close() assert await db.node_count() == 0 # ── GraphBuilder.build_case ── class TestBuildCase: @pytest.fixture def builder(self): db = InMemoryGraphDB() return GraphBuilder(db), db @pytest.mark.asyncio async def test_single_judge_single_charge(self, builder): builder, db = builder meta = _make_meta(judges=["Judge Smith"], charges=["murder"]) case_id = await builder.build_case(meta) assert case_id.startswith("Case:") assert await db.node_count("Case") == 1 assert await db.node_count("Judge") == 1 assert await db.node_count("Charge") == 1 assert await db.relationship_count("HEARD_BY") == 1 assert await db.relationship_count("CHARGED_WITH") == 1 @pytest.mark.asyncio async def test_multiple_judges(self, builder): builder, db = builder meta = _make_meta(judges=["Judge Smith", "Judge Jones"]) await builder.build_case(meta) assert await db.node_count("Judge") == 2 assert await db.relationship_count("HEARD_BY") == 2 @pytest.mark.asyncio async def test_multiple_charges(self, builder): builder, db = builder meta = _make_meta(charges=["murder", "assault", "robbery"]) await builder.build_case(meta) assert await db.node_count("Charge") == 3 assert await db.relationship_count("CHARGED_WITH") == 3 @pytest.mark.asyncio async def test_inadmissible_evidence_creates_ruling(self, builder): builder, db = builder meta = _make_meta(inadmissible=["hearsay statement", "tainted identification"]) await builder.build_case(meta) assert await db.node_count("Ruling") == 2 assert await db.relationship_count("HAS_RULING") == 2 @pytest.mark.asyncio async def test_appeal_creates_appeals_edge(self, builder): builder, db = builder meta = _make_meta( mnc="[2020] NSWSC 5678", is_appeal=True, appeal_of="[2019] NSWSC 1234", ) case_id = await builder.build_case(meta) assert await db.node_count("Case") == 2 assert await db.relationship_count("APPEALS") == 1 rels = await db.get_relationships(case_id, "APPEALS") assert len(rels) == 1 target = await db.get_node(rels[0].to_id) assert target.properties["mnc"] == "[2019] NSWSC 1234" @pytest.mark.asyncio async def test_case_properties(self, builder): builder, db = builder meta = _make_meta() case_id = await builder.build_case(meta) node = await db.get_node(case_id) assert node.properties["mnc"] == "[2019] NSWSC 1234" assert node.properties["court"] == "NSWSC" assert node.properties["verdict"] == "guilty" assert node.properties["exoneration_flag"] is False @pytest.mark.asyncio async def test_no_judges_no_charges(self, builder): builder, db = builder meta = _make_meta(judges=[], charges=[]) await builder.build_case(meta) assert await db.node_count("Judge") == 0 assert await db.node_count("Charge") == 0 assert await db.relationship_count("HEARD_BY") == 0 assert await db.relationship_count("CHARGED_WITH") == 0 # ── GraphBuilder.build_chunks ── class TestBuildChunks: @pytest.fixture def builder(self): db = InMemoryGraphDB() return GraphBuilder(db), db @pytest.mark.asyncio async def test_chunk_nodes_created(self, builder): builder, db = builder chunks = [ _make_chunk("c0", "opening", 0, "Opening statement"), _make_chunk("c1", "testimony", 1, "Witness testimony"), _make_chunk("c2", "closing", 2, "Closing argument"), ] count = await builder.build_chunks("[2019] NSWSC 1", chunks) assert count == 3 assert await db.node_count("Chunk") == 3 @pytest.mark.asyncio async def test_follows_edges(self, builder): builder, db = builder chunks = [ _make_chunk("c0", "opening", 0), _make_chunk("c1", "testimony", 1), _make_chunk("c2", "closing", 2), ] await builder.build_chunks("[2019] NSWSC 1", chunks) assert await db.relationship_count("FOLLOWS") == 2 @pytest.mark.asyncio async def test_single_chunk_no_edges(self, builder): builder, db = builder chunks = [_make_chunk("c0", "opening", 0)] await builder.build_chunks("[2019] NSWSC 1", chunks) assert await db.relationship_count("FOLLOWS") == 0 @pytest.mark.asyncio async def test_no_chunks(self, builder): builder, db = builder count = await builder.build_chunks("[2019] NSWSC 1", []) assert count == 0 # ── GraphBuilder.build_similarity_edges ── class TestBuildSimilarityEdges: @pytest.fixture def builder(self): db = InMemoryGraphDB() return GraphBuilder(db), db @pytest.mark.asyncio async def test_corroborates_edge(self, builder): builder, db = builder # Nearly identical embeddings -> should corroborate emb = [0.1, 0.9, 0.3, 0.7] chunks = [ _make_chunk("c0", "testimony", 0, embedding=emb), _make_chunk("c1", "testimony", 1, embedding=[x + 0.001 for x in emb]), ] edges = await builder.build_similarity_edges("[2019] NSWSC 1", chunks) assert edges == 1 assert await db.relationship_count("CORROBORATES") == 1 @pytest.mark.asyncio async def test_contradicts_edge(self, builder): builder, db = builder chunks = [ _make_chunk("c0", "exhibit", 0, embedding=[1.0, 0.0, 0.0, 0.0]), _make_chunk("c1", "exhibit", 1, embedding=[0.0, 1.0, 0.0, 0.0]), ] edges = await builder.build_similarity_edges("[2019] NSWSC 1", chunks) assert edges == 1 assert await db.relationship_count("CONTRADICTS") == 1 @pytest.mark.asyncio async def test_no_edge_when_sim_between_thresholds(self, builder): builder, db = builder # Moderate similarity (~0.56) — neither corroborates nor contradicts chunks = [ _make_chunk("c0", "testimony", 0, embedding=[0.7, 0.3, 0.5, 0.0]), _make_chunk("c1", "testimony", 1, embedding=[0.3, 0.7, 0.1, 0.5]), ] edges = await builder.build_similarity_edges("[2019] NSWSC 1", chunks) assert edges == 0 @pytest.mark.asyncio async def test_type_filter(self, builder): builder, db = builder emb = [0.1, 0.9, 0.3, 0.7] chunks = [ _make_chunk("c0", "testimony", 0, embedding=emb), _make_chunk("c1", "testimony", 1, embedding=[x + 0.001 for x in emb]), _make_chunk("c2", "ruling", 2, embedding=[x + 0.001 for x in emb]), ] # Only testimony should be compared edges = await builder.build_similarity_edges( "[2019] NSWSC 1", chunks, types=["testimony"] ) assert edges == 1 @pytest.mark.asyncio async def test_no_embedding_skipped(self, builder): builder, db = builder chunks = [ _make_chunk("c0", "testimony", 0, embedding=[1, 0, 0, 0]), _make_chunk("c1", "testimony", 1, embedding=None), ] edges = await builder.build_similarity_edges("[2019] NSWSC 1", chunks) assert edges == 0 @pytest.mark.asyncio async def test_empty_chunks(self, builder): builder, db = builder edges = await builder.build_similarity_edges("[2019] NSWSC 1", []) assert edges == 0 @pytest.mark.asyncio async def test_weight_stored(self, builder): builder, db = builder emb = [0.1, 0.9, 0.3, 0.7] chunks = [ _make_chunk("c0", "testimony", 0, embedding=emb), _make_chunk("c1", "testimony", 1, embedding=[x + 0.001 for x in emb]), ] await builder.build_similarity_edges("[2019] NSWSC 1", chunks) rels = await db.get_relationships("c0", "CORROBORATES") assert len(rels) == 1 assert "weight" in rels[0].properties assert rels[0].properties["weight"] >= CORROBORATE_THRESHOLD # ── GraphBuilder.build_full ── class TestBuildFull: @pytest.mark.asyncio async def test_full_build_summary(self): db = InMemoryGraphDB() builder = GraphBuilder(db) meta = _make_meta( judges=["Judge Smith", "Judge Jones"], charges=["murder", "assault"], inadmissible=["hearsay"], ) emb = [0.1, 0.9, 0.3, 0.7] chunks = [ _make_chunk("c0", "testimony", 0, "Testimony text", embedding=emb), _make_chunk("c1", "testimony", 1, "Similar testimony", embedding=[x + 0.001 for x in emb]), _make_chunk("c2", "closing", 2, "Closing text"), ] summary = await builder.build_full(meta, chunks) assert summary["mnc"] == "[2019] NSWSC 1234" assert summary["nodes"]["Case"] == 1 assert summary["nodes"]["Judge"] == 2 assert summary["nodes"]["Charge"] == 2 assert summary["nodes"]["Ruling"] == 1 assert summary["nodes"]["Chunk"] == 3 assert summary["edges"]["HEARD_BY"] == 2 assert summary["edges"]["CHARGED_WITH"] == 2 assert summary["edges"]["HAS_RULING"] == 1 assert summary["edges"]["FOLLOWS"] == 2 assert summary["edges"]["CORROBORATES"] == 1 assert summary["edges"]["CONTRADICTS"] == 0 @pytest.mark.asyncio async def test_full_build_appeal(self): db = InMemoryGraphDB() builder = GraphBuilder(db) meta = _make_meta( mnc="[2020] NSWSC 5678", is_appeal=True, appeal_of="[2019] NSWSC 1234", ) summary = await builder.build_full(meta, []) assert summary["nodes"]["Case"] == 2 assert summary["edges"]["APPEALS"] == 1 # ── GraphDB Protocol conformance ── class TestProtocolConformance: def test_in_memory_conforms(self): from aucourt_ingest.storage.graph_db import GraphDB proto_methods = {m for m in dir(GraphDB) if not m.startswith("_")} impl_methods = set(dir(InMemoryGraphDB)) for method in proto_methods: if not method.startswith("_"): assert method in impl_methods, f"InMemoryGraphDB missing {method}"