aucourt-ingest/tests/test_jury.py

377 lines
15 KiB
Python
Raw Normal View History

"""Tests for juror personas and subgraph queries."""
from __future__ import annotations
import pytest
from aucourt_ingest.models import CaseMeta, Chunk, JurorPersona, JurorContext, Verdict, MatterType
from aucourt_ingest.jury.personas import PERSONAS, get_persona, all_persona_names, DEFAULT_TOKEN_BUDGET
from aucourt_ingest.jury.subgraph_query import SubgraphQuery, _parse_anchor_spec
from aucourt_ingest.storage.in_memory_graph_db import InMemoryGraphDB
def _make_meta(mnc="[2019] NSWSC 1234", court="NSWSC",
judges=None, charges=None, inadmissible=None,
is_appeal=False, appeal_of="") -> CaseMeta:
return CaseMeta(
case_name=f"Test v State ({mnc})",
mnc=mnc,
court=court,
judge=judges if judges is not None else ["Judge Smith"],
charges=charges if charges is not None else ["murder"],
inadmissible_evidence=inadmissible or [],
is_appeal=is_appeal,
appeal_of=appeal_of,
verdict=Verdict.GUILTY,
exoneration_flag=False,
matter_type=MatterType.CRIMINAL,
date_delivered="2019-06-15",
jurisdiction="NSW",
)
# ── Persona definitions ──
class TestPersonaDefinitions:
def test_six_personas_defined(self):
assert len(PERSONAS) == 6
def test_all_expected_names(self):
names = set(PERSONAS.keys())
assert names == {"nurse", "accountant", "skeptic", "ex_cop", "empath", "foreman"}
def test_all_exclude_inadmissible(self):
for name, persona in PERSONAS.items():
assert "RULED_INADMISSIBLE" in persona.exclude_edges, f"{name} missing RULED_INADMISSIBLE"
def test_all_have_anchor_nodes(self):
for name, persona in PERSONAS.items():
assert len(persona.anchor_nodes) > 0, f"{name} has no anchor nodes"
def test_all_have_edge_types(self):
for name, persona in PERSONAS.items():
assert len(persona.edge_types) > 0, f"{name} has no edge types"
def test_all_have_chunk_types(self):
for name, persona in PERSONAS.items():
assert len(persona.chunk_types) > 0, f"{name} has no chunk types"
def test_get_persona(self):
nurse = get_persona("nurse")
assert nurse.name == "nurse"
assert "testimony" in nurse.chunk_types
def test_get_persona_raises(self):
with pytest.raises(KeyError):
get_persona("nonexistent")
def test_all_persona_names(self):
names = all_persona_names()
assert len(names) == 6
assert set(names) == set(PERSONAS.keys())
def test_nurse_persona(self):
p = PERSONAS["nurse"]
assert "testimony" in p.chunk_types
assert "exhibit" in p.chunk_types
assert "GAVE_TESTIMONY" in p.edge_types
assert "CORROBORATES" in p.edge_types
def test_foreman_persona(self):
p = PERSONAS["foreman"]
assert "opening" in p.chunk_types
assert "judgment" in p.chunk_types
assert "sentence" in p.chunk_types
assert "Charge" in "".join(p.anchor_nodes)
def test_skeptic_persona(self):
p = PERSONAS["skeptic"]
assert "CONTRADICTS" in p.edge_types
assert "testimony" in p.chunk_types
assert "ruling" in p.chunk_types
def test_empath_persona(self):
p = PERSONAS["empath"]
assert "closing" in p.chunk_types
assert "testimony" in p.chunk_types
assert "CORROBORATES" in p.edge_types
assert "CONTRADICTS" in p.edge_types
# ── Anchor spec parsing ──
class TestParseAnchorSpec:
def test_plain_label(self):
label, props = _parse_anchor_spec("Timeline")
assert label == "Timeline"
assert props == {}
def test_label_with_single_prop(self):
label, props = _parse_anchor_spec("Witness[role=expert]")
assert label == "Witness"
assert props == {"role": "expert"}
def test_label_with_multiple_props(self):
label, props = _parse_anchor_spec("Exhibit[category=medical,type=report]")
assert label == "Exhibit"
assert props == {"category": "medical", "type": "report"}
def test_whitespace_in_props(self):
label, props = _parse_anchor_spec("Witness[role = expert]")
assert label == "Witness"
assert props == {"role": "expert"}
def test_spaces_around_bracket(self):
label, props = _parse_anchor_spec("Witness [role=expert]")
assert label == "Witness"
assert props == {"role": "expert"}
# ── SubgraphQuery ──
class TestSubgraphQuery:
@pytest.mark.asyncio
async def test_missing_case_returns_empty(self):
db = InMemoryGraphDB()
sq = SubgraphQuery(db)
persona = get_persona("nurse")
ctx = await sq.get_juror_context("[2099] FAKE 9999", persona)
assert ctx.persona == "nurse"
assert ctx.case_mnc == "[2099] FAKE 9999"
assert ctx.context_text == ""
assert ctx.source_chunk_ids == []
assert ctx.total_tokens == 0
@pytest.mark.asyncio
async def test_no_matching_chunks_returns_empty(self):
db = InMemoryGraphDB()
from aucourt_ingest.processing.graph_builder import GraphBuilder
builder = GraphBuilder(db)
meta = _make_meta()
await builder.build_case(meta)
sq = SubgraphQuery(db)
persona = get_persona("nurse")
ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona)
assert ctx.context_text == ""
assert ctx.source_chunk_ids == []
@pytest.mark.asyncio
async def test_inadmissible_wall_blocks_evidence(self):
"""RULED_INADMISSIBLE edges must be excluded from juror traversal."""
db = InMemoryGraphDB()
from aucourt_ingest.processing.graph_builder import GraphBuilder
builder = GraphBuilder(db)
meta = _make_meta(inadmissible=["hearsay statement"])
case_id = await builder.build_case(meta)
# Create a witness connected via RULED_INADMISSIBLE (excluded edge)
inad_witness_id = await db.create_node("Witness", {"role": "expert", "id": "w_inad"})
await db.create_relationship(case_id, inad_witness_id, "RULED_INADMISSIBLE",
{"evidence": "hearsay statement"})
# Create a chunk connected to the inadmissible witness
inad_chunk_id = await db.create_node("Chunk", {
"chunk_id": "inc0", "type": "testimony", "sequence": 0,
"text_preview": "Inadmissible testimony that should be excluded",
"token_count": 20,
})
await db.create_relationship(inad_witness_id, inad_chunk_id, "GAVE_TESTIMONY")
# Create an admissible witness with testimony
good_witness_id = await db.create_node("Witness", {"role": "expert", "id": "w_good"})
await db.create_relationship(case_id, good_witness_id, "GAVE_TESTIMONY")
good_chunk_id = await db.create_node("Chunk", {
"chunk_id": "good0", "type": "testimony", "sequence": 0,
"text_preview": "Admissible expert testimony",
"token_count": 20,
})
await db.create_relationship(good_witness_id, good_chunk_id, "GAVE_TESTIMONY")
sq = SubgraphQuery(db)
persona = get_persona("nurse")
ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona)
# Should only contain admissible content
assert "Inadmissible" not in ctx.context_text
assert "Admissible" in ctx.context_text
assert "inc0" not in ctx.source_chunk_ids
assert "good0" in ctx.source_chunk_ids
@pytest.mark.asyncio
async def test_token_budget_respected(self):
"""Chunks should be truncated when exceeding max_tokens."""
db = InMemoryGraphDB()
from aucourt_ingest.processing.graph_builder import GraphBuilder
builder = GraphBuilder(db)
meta = _make_meta()
case_id = await builder.build_case(meta)
witness_id = await db.create_node("Witness", {"role": "expert"})
await db.create_relationship(case_id, witness_id, "GAVE_TESTIMONY")
for i in range(5):
chunk_node_id = await db.create_node("Chunk", {
"chunk_id": f"c{i}", "type": "testimony", "sequence": i,
"text_preview": f"Chunk {i} text " * 10, "token_count": 50,
})
await db.create_relationship(witness_id, chunk_node_id, "GAVE_TESTIMONY")
sq = SubgraphQuery(db)
persona = get_persona("nurse")
# Budget of 100 tokens = 2 chunks (50 each)
ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona, max_tokens=100)
assert ctx.total_tokens <= 150 # Some slack for partial chunk
assert len(ctx.source_chunk_ids) <= 3 # 2 full + possibly 1 partial
@pytest.mark.asyncio
async def test_chunk_type_filtering(self):
"""Only chunks matching persona's chunk_types should be included."""
db = InMemoryGraphDB()
from aucourt_ingest.processing.graph_builder import GraphBuilder
builder = GraphBuilder(db)
meta = _make_meta()
case_id = await builder.build_case(meta)
# Expert witness connected to case
witness_id = await db.create_node("Witness", {"role": "expert"})
await db.create_relationship(case_id, witness_id, "GAVE_TESTIMONY")
# Testimony chunk (nurse cares about this)
test_chunk_id = await db.create_node("Chunk", {
"chunk_id": "test0", "type": "testimony", "sequence": 0,
"text_preview": "Expert medical testimony", "token_count": 20,
})
await db.create_relationship(witness_id, test_chunk_id, "GAVE_TESTIMONY")
# Judgment chunk (nurse does NOT care about this)
judg_chunk_id = await db.create_node("Chunk", {
"chunk_id": "judg0", "type": "judgment", "sequence": 0,
"text_preview": "The court finds...", "token_count": 20,
})
await db.create_relationship(case_id, judg_chunk_id, "HAS_RULING")
sq = SubgraphQuery(db)
nurse_ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("nurse"))
# Nurse only sees testimony, not judgment
assert "medical" in nurse_ctx.context_text
assert "court finds" not in nurse_ctx.context_text
# But foreman would see judgment
foreman_ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("foreman"))
assert "court finds" in foreman_ctx.context_text
@pytest.mark.asyncio
async def test_chunks_sorted_by_sequence(self):
"""Chunks should be assembled in sequence order."""
db = InMemoryGraphDB()
from aucourt_ingest.processing.graph_builder import GraphBuilder
builder = GraphBuilder(db)
meta = _make_meta()
case_id = await builder.build_case(meta)
witness_id = await db.create_node("Witness", {"role": "prosecution"})
await db.create_relationship(case_id, witness_id, "GAVE_TESTIMONY")
# Insert chunks out of order
for seq, text in [(2, "Third"), (0, "First"), (1, "Second")]:
chunk_node_id = await db.create_node("Chunk", {
"chunk_id": f"c{seq}", "type": "testimony", "sequence": seq,
"text_preview": text, "token_count": 10,
})
await db.create_relationship(witness_id, chunk_node_id, "GAVE_TESTIMONY")
sq = SubgraphQuery(db)
persona = get_persona("empath")
ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona)
assert ctx.context_text.startswith("First")
assert ctx.source_chunk_ids == ["c0", "c1", "c2"]
@pytest.mark.asyncio
async def test_all_personas_return_valid_context(self):
"""Every persona should return a valid JurorContext object."""
db = InMemoryGraphDB()
from aucourt_ingest.processing.graph_builder import GraphBuilder
builder = GraphBuilder(db)
meta = _make_meta()
await builder.build_case(meta)
sq = SubgraphQuery(db)
for name in all_persona_names():
persona = get_persona(name)
ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona)
assert isinstance(ctx, JurorContext)
assert ctx.persona == name
assert ctx.case_mnc == "[2019] NSWSC 1234"
assert ctx.total_tokens >= 0
assert isinstance(ctx.source_chunk_ids, list)
@pytest.mark.asyncio
async def test_foreman_sees_opening_and_judgment(self):
"""Foreman persona should access opening, judgment, sentence chunks."""
db = InMemoryGraphDB()
from aucourt_ingest.processing.graph_builder import GraphBuilder
builder = GraphBuilder(db)
meta = _make_meta()
case_id = await builder.build_case(meta)
# Opening chunk connected to case
opening_id = await db.create_node("Chunk", {
"chunk_id": "open0", "type": "opening", "sequence": 0,
"text_preview": "The prosecution alleges...", "token_count": 30,
})
await db.create_relationship(case_id, opening_id, "CHARGED_WITH")
# Sentence chunk connected to case
sentence_id = await db.create_node("Chunk", {
"chunk_id": "sent0", "type": "sentence", "sequence": 0,
"text_preview": "The defendant is sentenced to...", "token_count": 30,
})
await db.create_relationship(case_id, sentence_id, "HEARD_BY")
sq = SubgraphQuery(db)
ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("foreman"))
assert "prosecution alleges" in ctx.context_text
assert "sentenced" in ctx.context_text
assert len(ctx.source_chunk_ids) == 2
@pytest.mark.asyncio
async def test_excludes_inadmissible_globally(self):
"""Even without explicit anchor match, RULED_INADMISSIBLE edges block."""
db = InMemoryGraphDB()
from aucourt_ingest.processing.graph_builder import GraphBuilder
builder = GraphBuilder(db)
meta = _make_meta()
case_id = await builder.build_case(meta)
# Forensic exhibit with RULED_INADMISSIBLE edge
exhibit_id = await db.create_node("Exhibit", {"category": "forensic", "id": "ex1"})
await db.create_relationship(case_id, exhibit_id, "RULED_INADMISSIBLE")
chunk_id = await db.create_node("Chunk", {
"chunk_id": "fc0", "type": "exhibit", "sequence": 0,
"text_preview": "Excluded forensic report", "token_count": 20,
})
await db.create_relationship(exhibit_id, chunk_id, "DESCRIBED_IN")
sq = SubgraphQuery(db)
# ex_cop cares about forensic exhibits
ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("ex_cop"))
assert "Excluded" not in ctx.context_text
assert ctx.source_chunk_ids == []