"""Tests for juror personas and subgraph queries.""" from __future__ import annotations import pytest from aucourt_ingest.models import CaseMeta, Chunk, JurorPersona, JurorContext, Verdict, MatterType from aucourt_ingest.jury.personas import PERSONAS, get_persona, all_persona_names, DEFAULT_TOKEN_BUDGET from aucourt_ingest.jury.subgraph_query import SubgraphQuery, _parse_anchor_spec from aucourt_ingest.storage.in_memory_graph_db import InMemoryGraphDB def _make_meta(mnc="[2019] NSWSC 1234", court="NSWSC", judges=None, charges=None, inadmissible=None, is_appeal=False, appeal_of="") -> CaseMeta: return CaseMeta( case_name=f"Test v State ({mnc})", mnc=mnc, court=court, judge=judges if judges is not None else ["Judge Smith"], charges=charges if charges is not None else ["murder"], inadmissible_evidence=inadmissible or [], is_appeal=is_appeal, appeal_of=appeal_of, verdict=Verdict.GUILTY, exoneration_flag=False, matter_type=MatterType.CRIMINAL, date_delivered="2019-06-15", jurisdiction="NSW", ) # ── Persona definitions ── class TestPersonaDefinitions: def test_six_personas_defined(self): assert len(PERSONAS) == 6 def test_all_expected_names(self): names = set(PERSONAS.keys()) assert names == {"nurse", "accountant", "skeptic", "ex_cop", "empath", "foreman"} def test_all_exclude_inadmissible(self): for name, persona in PERSONAS.items(): assert "RULED_INADMISSIBLE" in persona.exclude_edges, f"{name} missing RULED_INADMISSIBLE" def test_all_have_anchor_nodes(self): for name, persona in PERSONAS.items(): assert len(persona.anchor_nodes) > 0, f"{name} has no anchor nodes" def test_all_have_edge_types(self): for name, persona in PERSONAS.items(): assert len(persona.edge_types) > 0, f"{name} has no edge types" def test_all_have_chunk_types(self): for name, persona in PERSONAS.items(): assert len(persona.chunk_types) > 0, f"{name} has no chunk types" def test_get_persona(self): nurse = get_persona("nurse") assert nurse.name == "nurse" assert "testimony" in nurse.chunk_types def test_get_persona_raises(self): with pytest.raises(KeyError): get_persona("nonexistent") def test_all_persona_names(self): names = all_persona_names() assert len(names) == 6 assert set(names) == set(PERSONAS.keys()) def test_nurse_persona(self): p = PERSONAS["nurse"] assert "testimony" in p.chunk_types assert "exhibit" in p.chunk_types assert "GAVE_TESTIMONY" in p.edge_types assert "CORROBORATES" in p.edge_types def test_foreman_persona(self): p = PERSONAS["foreman"] assert "opening" in p.chunk_types assert "judgment" in p.chunk_types assert "sentence" in p.chunk_types assert "Charge" in "".join(p.anchor_nodes) def test_skeptic_persona(self): p = PERSONAS["skeptic"] assert "CONTRADICTS" in p.edge_types assert "testimony" in p.chunk_types assert "ruling" in p.chunk_types def test_empath_persona(self): p = PERSONAS["empath"] assert "closing" in p.chunk_types assert "testimony" in p.chunk_types assert "CORROBORATES" in p.edge_types assert "CONTRADICTS" in p.edge_types # ── Anchor spec parsing ── class TestParseAnchorSpec: def test_plain_label(self): label, props = _parse_anchor_spec("Timeline") assert label == "Timeline" assert props == {} def test_label_with_single_prop(self): label, props = _parse_anchor_spec("Witness[role=expert]") assert label == "Witness" assert props == {"role": "expert"} def test_label_with_multiple_props(self): label, props = _parse_anchor_spec("Exhibit[category=medical,type=report]") assert label == "Exhibit" assert props == {"category": "medical", "type": "report"} def test_whitespace_in_props(self): label, props = _parse_anchor_spec("Witness[role = expert]") assert label == "Witness" assert props == {"role": "expert"} def test_spaces_around_bracket(self): label, props = _parse_anchor_spec("Witness [role=expert]") assert label == "Witness" assert props == {"role": "expert"} # ── SubgraphQuery ── class TestSubgraphQuery: @pytest.mark.asyncio async def test_missing_case_returns_empty(self): db = InMemoryGraphDB() sq = SubgraphQuery(db) persona = get_persona("nurse") ctx = await sq.get_juror_context("[2099] FAKE 9999", persona) assert ctx.persona == "nurse" assert ctx.case_mnc == "[2099] FAKE 9999" assert ctx.context_text == "" assert ctx.source_chunk_ids == [] assert ctx.total_tokens == 0 @pytest.mark.asyncio async def test_no_matching_chunks_returns_empty(self): db = InMemoryGraphDB() from aucourt_ingest.processing.graph_builder import GraphBuilder builder = GraphBuilder(db) meta = _make_meta() await builder.build_case(meta) sq = SubgraphQuery(db) persona = get_persona("nurse") ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona) assert ctx.context_text == "" assert ctx.source_chunk_ids == [] @pytest.mark.asyncio async def test_inadmissible_wall_blocks_evidence(self): """RULED_INADMISSIBLE edges must be excluded from juror traversal.""" db = InMemoryGraphDB() from aucourt_ingest.processing.graph_builder import GraphBuilder builder = GraphBuilder(db) meta = _make_meta(inadmissible=["hearsay statement"]) case_id = await builder.build_case(meta) # Create a witness connected via RULED_INADMISSIBLE (excluded edge) inad_witness_id = await db.create_node("Witness", {"role": "expert", "id": "w_inad"}) await db.create_relationship(case_id, inad_witness_id, "RULED_INADMISSIBLE", {"evidence": "hearsay statement"}) # Create a chunk connected to the inadmissible witness inad_chunk_id = await db.create_node("Chunk", { "chunk_id": "inc0", "type": "testimony", "sequence": 0, "text_preview": "Inadmissible testimony that should be excluded", "token_count": 20, }) await db.create_relationship(inad_witness_id, inad_chunk_id, "GAVE_TESTIMONY") # Create an admissible witness with testimony good_witness_id = await db.create_node("Witness", {"role": "expert", "id": "w_good"}) await db.create_relationship(case_id, good_witness_id, "GAVE_TESTIMONY") good_chunk_id = await db.create_node("Chunk", { "chunk_id": "good0", "type": "testimony", "sequence": 0, "text_preview": "Admissible expert testimony", "token_count": 20, }) await db.create_relationship(good_witness_id, good_chunk_id, "GAVE_TESTIMONY") sq = SubgraphQuery(db) persona = get_persona("nurse") ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona) # Should only contain admissible content assert "Inadmissible" not in ctx.context_text assert "Admissible" in ctx.context_text assert "inc0" not in ctx.source_chunk_ids assert "good0" in ctx.source_chunk_ids @pytest.mark.asyncio async def test_token_budget_respected(self): """Chunks should be truncated when exceeding max_tokens.""" db = InMemoryGraphDB() from aucourt_ingest.processing.graph_builder import GraphBuilder builder = GraphBuilder(db) meta = _make_meta() case_id = await builder.build_case(meta) witness_id = await db.create_node("Witness", {"role": "expert"}) await db.create_relationship(case_id, witness_id, "GAVE_TESTIMONY") for i in range(5): chunk_node_id = await db.create_node("Chunk", { "chunk_id": f"c{i}", "type": "testimony", "sequence": i, "text_preview": f"Chunk {i} text " * 10, "token_count": 50, }) await db.create_relationship(witness_id, chunk_node_id, "GAVE_TESTIMONY") sq = SubgraphQuery(db) persona = get_persona("nurse") # Budget of 100 tokens = 2 chunks (50 each) ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona, max_tokens=100) assert ctx.total_tokens <= 150 # Some slack for partial chunk assert len(ctx.source_chunk_ids) <= 3 # 2 full + possibly 1 partial @pytest.mark.asyncio async def test_chunk_type_filtering(self): """Only chunks matching persona's chunk_types should be included.""" db = InMemoryGraphDB() from aucourt_ingest.processing.graph_builder import GraphBuilder builder = GraphBuilder(db) meta = _make_meta() case_id = await builder.build_case(meta) # Expert witness connected to case witness_id = await db.create_node("Witness", {"role": "expert"}) await db.create_relationship(case_id, witness_id, "GAVE_TESTIMONY") # Testimony chunk (nurse cares about this) test_chunk_id = await db.create_node("Chunk", { "chunk_id": "test0", "type": "testimony", "sequence": 0, "text_preview": "Expert medical testimony", "token_count": 20, }) await db.create_relationship(witness_id, test_chunk_id, "GAVE_TESTIMONY") # Judgment chunk (nurse does NOT care about this) judg_chunk_id = await db.create_node("Chunk", { "chunk_id": "judg0", "type": "judgment", "sequence": 0, "text_preview": "The court finds...", "token_count": 20, }) await db.create_relationship(case_id, judg_chunk_id, "HAS_RULING") sq = SubgraphQuery(db) nurse_ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("nurse")) # Nurse only sees testimony, not judgment assert "medical" in nurse_ctx.context_text assert "court finds" not in nurse_ctx.context_text # But foreman would see judgment foreman_ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("foreman")) assert "court finds" in foreman_ctx.context_text @pytest.mark.asyncio async def test_chunks_sorted_by_sequence(self): """Chunks should be assembled in sequence order.""" db = InMemoryGraphDB() from aucourt_ingest.processing.graph_builder import GraphBuilder builder = GraphBuilder(db) meta = _make_meta() case_id = await builder.build_case(meta) witness_id = await db.create_node("Witness", {"role": "prosecution"}) await db.create_relationship(case_id, witness_id, "GAVE_TESTIMONY") # Insert chunks out of order for seq, text in [(2, "Third"), (0, "First"), (1, "Second")]: chunk_node_id = await db.create_node("Chunk", { "chunk_id": f"c{seq}", "type": "testimony", "sequence": seq, "text_preview": text, "token_count": 10, }) await db.create_relationship(witness_id, chunk_node_id, "GAVE_TESTIMONY") sq = SubgraphQuery(db) persona = get_persona("empath") ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona) assert ctx.context_text.startswith("First") assert ctx.source_chunk_ids == ["c0", "c1", "c2"] @pytest.mark.asyncio async def test_all_personas_return_valid_context(self): """Every persona should return a valid JurorContext object.""" db = InMemoryGraphDB() from aucourt_ingest.processing.graph_builder import GraphBuilder builder = GraphBuilder(db) meta = _make_meta() await builder.build_case(meta) sq = SubgraphQuery(db) for name in all_persona_names(): persona = get_persona(name) ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona) assert isinstance(ctx, JurorContext) assert ctx.persona == name assert ctx.case_mnc == "[2019] NSWSC 1234" assert ctx.total_tokens >= 0 assert isinstance(ctx.source_chunk_ids, list) @pytest.mark.asyncio async def test_foreman_sees_opening_and_judgment(self): """Foreman persona should access opening, judgment, sentence chunks.""" db = InMemoryGraphDB() from aucourt_ingest.processing.graph_builder import GraphBuilder builder = GraphBuilder(db) meta = _make_meta() case_id = await builder.build_case(meta) # Opening chunk connected to case opening_id = await db.create_node("Chunk", { "chunk_id": "open0", "type": "opening", "sequence": 0, "text_preview": "The prosecution alleges...", "token_count": 30, }) await db.create_relationship(case_id, opening_id, "CHARGED_WITH") # Sentence chunk connected to case sentence_id = await db.create_node("Chunk", { "chunk_id": "sent0", "type": "sentence", "sequence": 0, "text_preview": "The defendant is sentenced to...", "token_count": 30, }) await db.create_relationship(case_id, sentence_id, "HEARD_BY") sq = SubgraphQuery(db) ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("foreman")) assert "prosecution alleges" in ctx.context_text assert "sentenced" in ctx.context_text assert len(ctx.source_chunk_ids) == 2 @pytest.mark.asyncio async def test_excludes_inadmissible_globally(self): """Even without explicit anchor match, RULED_INADMISSIBLE edges block.""" db = InMemoryGraphDB() from aucourt_ingest.processing.graph_builder import GraphBuilder builder = GraphBuilder(db) meta = _make_meta() case_id = await builder.build_case(meta) # Forensic exhibit with RULED_INADMISSIBLE edge exhibit_id = await db.create_node("Exhibit", {"category": "forensic", "id": "ex1"}) await db.create_relationship(case_id, exhibit_id, "RULED_INADMISSIBLE") chunk_id = await db.create_node("Chunk", { "chunk_id": "fc0", "type": "exhibit", "sequence": 0, "text_preview": "Excluded forensic report", "token_count": 20, }) await db.create_relationship(exhibit_id, chunk_id, "DESCRIBED_IN") sq = SubgraphQuery(db) # ex_cop cares about forensic exhibits ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("ex_cop")) assert "Excluded" not in ctx.context_text assert ctx.source_chunk_ids == []