Source layer (5 court sources), processing pipeline (parse/extract/chunk/embed/graph), property graph with 8 node types, juror subgraph queries with 6 personas, orchestrator with bootstrap/watch/backfill/audit/process modes, 170 tests. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
376 lines
15 KiB
Python
376 lines
15 KiB
Python
"""Tests for juror personas and subgraph queries."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from aucourt_ingest.models import CaseMeta, Chunk, JurorPersona, JurorContext, Verdict, MatterType
|
|
from aucourt_ingest.jury.personas import PERSONAS, get_persona, all_persona_names, DEFAULT_TOKEN_BUDGET
|
|
from aucourt_ingest.jury.subgraph_query import SubgraphQuery, _parse_anchor_spec
|
|
from aucourt_ingest.storage.in_memory_graph_db import InMemoryGraphDB
|
|
|
|
|
|
def _make_meta(mnc="[2019] NSWSC 1234", court="NSWSC",
|
|
judges=None, charges=None, inadmissible=None,
|
|
is_appeal=False, appeal_of="") -> CaseMeta:
|
|
return CaseMeta(
|
|
case_name=f"Test v State ({mnc})",
|
|
mnc=mnc,
|
|
court=court,
|
|
judge=judges if judges is not None else ["Judge Smith"],
|
|
charges=charges if charges is not None else ["murder"],
|
|
inadmissible_evidence=inadmissible or [],
|
|
is_appeal=is_appeal,
|
|
appeal_of=appeal_of,
|
|
verdict=Verdict.GUILTY,
|
|
exoneration_flag=False,
|
|
matter_type=MatterType.CRIMINAL,
|
|
date_delivered="2019-06-15",
|
|
jurisdiction="NSW",
|
|
)
|
|
|
|
|
|
# ── Persona definitions ──
|
|
|
|
class TestPersonaDefinitions:
|
|
def test_six_personas_defined(self):
|
|
assert len(PERSONAS) == 6
|
|
|
|
def test_all_expected_names(self):
|
|
names = set(PERSONAS.keys())
|
|
assert names == {"nurse", "accountant", "skeptic", "ex_cop", "empath", "foreman"}
|
|
|
|
def test_all_exclude_inadmissible(self):
|
|
for name, persona in PERSONAS.items():
|
|
assert "RULED_INADMISSIBLE" in persona.exclude_edges, f"{name} missing RULED_INADMISSIBLE"
|
|
|
|
def test_all_have_anchor_nodes(self):
|
|
for name, persona in PERSONAS.items():
|
|
assert len(persona.anchor_nodes) > 0, f"{name} has no anchor nodes"
|
|
|
|
def test_all_have_edge_types(self):
|
|
for name, persona in PERSONAS.items():
|
|
assert len(persona.edge_types) > 0, f"{name} has no edge types"
|
|
|
|
def test_all_have_chunk_types(self):
|
|
for name, persona in PERSONAS.items():
|
|
assert len(persona.chunk_types) > 0, f"{name} has no chunk types"
|
|
|
|
def test_get_persona(self):
|
|
nurse = get_persona("nurse")
|
|
assert nurse.name == "nurse"
|
|
assert "testimony" in nurse.chunk_types
|
|
|
|
def test_get_persona_raises(self):
|
|
with pytest.raises(KeyError):
|
|
get_persona("nonexistent")
|
|
|
|
def test_all_persona_names(self):
|
|
names = all_persona_names()
|
|
assert len(names) == 6
|
|
assert set(names) == set(PERSONAS.keys())
|
|
|
|
def test_nurse_persona(self):
|
|
p = PERSONAS["nurse"]
|
|
assert "testimony" in p.chunk_types
|
|
assert "exhibit" in p.chunk_types
|
|
assert "GAVE_TESTIMONY" in p.edge_types
|
|
assert "CORROBORATES" in p.edge_types
|
|
|
|
def test_foreman_persona(self):
|
|
p = PERSONAS["foreman"]
|
|
assert "opening" in p.chunk_types
|
|
assert "judgment" in p.chunk_types
|
|
assert "sentence" in p.chunk_types
|
|
assert "Charge" in "".join(p.anchor_nodes)
|
|
|
|
def test_skeptic_persona(self):
|
|
p = PERSONAS["skeptic"]
|
|
assert "CONTRADICTS" in p.edge_types
|
|
assert "testimony" in p.chunk_types
|
|
assert "ruling" in p.chunk_types
|
|
|
|
def test_empath_persona(self):
|
|
p = PERSONAS["empath"]
|
|
assert "closing" in p.chunk_types
|
|
assert "testimony" in p.chunk_types
|
|
assert "CORROBORATES" in p.edge_types
|
|
assert "CONTRADICTS" in p.edge_types
|
|
|
|
|
|
# ── Anchor spec parsing ──
|
|
|
|
class TestParseAnchorSpec:
|
|
def test_plain_label(self):
|
|
label, props = _parse_anchor_spec("Timeline")
|
|
assert label == "Timeline"
|
|
assert props == {}
|
|
|
|
def test_label_with_single_prop(self):
|
|
label, props = _parse_anchor_spec("Witness[role=expert]")
|
|
assert label == "Witness"
|
|
assert props == {"role": "expert"}
|
|
|
|
def test_label_with_multiple_props(self):
|
|
label, props = _parse_anchor_spec("Exhibit[category=medical,type=report]")
|
|
assert label == "Exhibit"
|
|
assert props == {"category": "medical", "type": "report"}
|
|
|
|
def test_whitespace_in_props(self):
|
|
label, props = _parse_anchor_spec("Witness[role = expert]")
|
|
assert label == "Witness"
|
|
assert props == {"role": "expert"}
|
|
|
|
def test_spaces_around_bracket(self):
|
|
label, props = _parse_anchor_spec("Witness [role=expert]")
|
|
assert label == "Witness"
|
|
assert props == {"role": "expert"}
|
|
|
|
|
|
# ── SubgraphQuery ──
|
|
|
|
class TestSubgraphQuery:
|
|
@pytest.mark.asyncio
|
|
async def test_missing_case_returns_empty(self):
|
|
db = InMemoryGraphDB()
|
|
sq = SubgraphQuery(db)
|
|
persona = get_persona("nurse")
|
|
ctx = await sq.get_juror_context("[2099] FAKE 9999", persona)
|
|
|
|
assert ctx.persona == "nurse"
|
|
assert ctx.case_mnc == "[2099] FAKE 9999"
|
|
assert ctx.context_text == ""
|
|
assert ctx.source_chunk_ids == []
|
|
assert ctx.total_tokens == 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_no_matching_chunks_returns_empty(self):
|
|
db = InMemoryGraphDB()
|
|
from aucourt_ingest.processing.graph_builder import GraphBuilder
|
|
|
|
builder = GraphBuilder(db)
|
|
meta = _make_meta()
|
|
await builder.build_case(meta)
|
|
|
|
sq = SubgraphQuery(db)
|
|
persona = get_persona("nurse")
|
|
ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona)
|
|
|
|
assert ctx.context_text == ""
|
|
assert ctx.source_chunk_ids == []
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_inadmissible_wall_blocks_evidence(self):
|
|
"""RULED_INADMISSIBLE edges must be excluded from juror traversal."""
|
|
db = InMemoryGraphDB()
|
|
from aucourt_ingest.processing.graph_builder import GraphBuilder
|
|
|
|
builder = GraphBuilder(db)
|
|
meta = _make_meta(inadmissible=["hearsay statement"])
|
|
case_id = await builder.build_case(meta)
|
|
|
|
# Create a witness connected via RULED_INADMISSIBLE (excluded edge)
|
|
inad_witness_id = await db.create_node("Witness", {"role": "expert", "id": "w_inad"})
|
|
await db.create_relationship(case_id, inad_witness_id, "RULED_INADMISSIBLE",
|
|
{"evidence": "hearsay statement"})
|
|
|
|
# Create a chunk connected to the inadmissible witness
|
|
inad_chunk_id = await db.create_node("Chunk", {
|
|
"chunk_id": "inc0", "type": "testimony", "sequence": 0,
|
|
"text_preview": "Inadmissible testimony that should be excluded",
|
|
"token_count": 20,
|
|
})
|
|
await db.create_relationship(inad_witness_id, inad_chunk_id, "GAVE_TESTIMONY")
|
|
|
|
# Create an admissible witness with testimony
|
|
good_witness_id = await db.create_node("Witness", {"role": "expert", "id": "w_good"})
|
|
await db.create_relationship(case_id, good_witness_id, "GAVE_TESTIMONY")
|
|
good_chunk_id = await db.create_node("Chunk", {
|
|
"chunk_id": "good0", "type": "testimony", "sequence": 0,
|
|
"text_preview": "Admissible expert testimony",
|
|
"token_count": 20,
|
|
})
|
|
await db.create_relationship(good_witness_id, good_chunk_id, "GAVE_TESTIMONY")
|
|
|
|
sq = SubgraphQuery(db)
|
|
persona = get_persona("nurse")
|
|
ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona)
|
|
|
|
# Should only contain admissible content
|
|
assert "Inadmissible" not in ctx.context_text
|
|
assert "Admissible" in ctx.context_text
|
|
assert "inc0" not in ctx.source_chunk_ids
|
|
assert "good0" in ctx.source_chunk_ids
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_token_budget_respected(self):
|
|
"""Chunks should be truncated when exceeding max_tokens."""
|
|
db = InMemoryGraphDB()
|
|
from aucourt_ingest.processing.graph_builder import GraphBuilder
|
|
|
|
builder = GraphBuilder(db)
|
|
meta = _make_meta()
|
|
case_id = await builder.build_case(meta)
|
|
|
|
witness_id = await db.create_node("Witness", {"role": "expert"})
|
|
await db.create_relationship(case_id, witness_id, "GAVE_TESTIMONY")
|
|
|
|
for i in range(5):
|
|
chunk_node_id = await db.create_node("Chunk", {
|
|
"chunk_id": f"c{i}", "type": "testimony", "sequence": i,
|
|
"text_preview": f"Chunk {i} text " * 10, "token_count": 50,
|
|
})
|
|
await db.create_relationship(witness_id, chunk_node_id, "GAVE_TESTIMONY")
|
|
|
|
sq = SubgraphQuery(db)
|
|
persona = get_persona("nurse")
|
|
# Budget of 100 tokens = 2 chunks (50 each)
|
|
ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona, max_tokens=100)
|
|
|
|
assert ctx.total_tokens <= 150 # Some slack for partial chunk
|
|
assert len(ctx.source_chunk_ids) <= 3 # 2 full + possibly 1 partial
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chunk_type_filtering(self):
|
|
"""Only chunks matching persona's chunk_types should be included."""
|
|
db = InMemoryGraphDB()
|
|
from aucourt_ingest.processing.graph_builder import GraphBuilder
|
|
|
|
builder = GraphBuilder(db)
|
|
meta = _make_meta()
|
|
case_id = await builder.build_case(meta)
|
|
|
|
# Expert witness connected to case
|
|
witness_id = await db.create_node("Witness", {"role": "expert"})
|
|
await db.create_relationship(case_id, witness_id, "GAVE_TESTIMONY")
|
|
|
|
# Testimony chunk (nurse cares about this)
|
|
test_chunk_id = await db.create_node("Chunk", {
|
|
"chunk_id": "test0", "type": "testimony", "sequence": 0,
|
|
"text_preview": "Expert medical testimony", "token_count": 20,
|
|
})
|
|
await db.create_relationship(witness_id, test_chunk_id, "GAVE_TESTIMONY")
|
|
|
|
# Judgment chunk (nurse does NOT care about this)
|
|
judg_chunk_id = await db.create_node("Chunk", {
|
|
"chunk_id": "judg0", "type": "judgment", "sequence": 0,
|
|
"text_preview": "The court finds...", "token_count": 20,
|
|
})
|
|
await db.create_relationship(case_id, judg_chunk_id, "HAS_RULING")
|
|
|
|
sq = SubgraphQuery(db)
|
|
nurse_ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("nurse"))
|
|
|
|
# Nurse only sees testimony, not judgment
|
|
assert "medical" in nurse_ctx.context_text
|
|
assert "court finds" not in nurse_ctx.context_text
|
|
|
|
# But foreman would see judgment
|
|
foreman_ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("foreman"))
|
|
assert "court finds" in foreman_ctx.context_text
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chunks_sorted_by_sequence(self):
|
|
"""Chunks should be assembled in sequence order."""
|
|
db = InMemoryGraphDB()
|
|
from aucourt_ingest.processing.graph_builder import GraphBuilder
|
|
|
|
builder = GraphBuilder(db)
|
|
meta = _make_meta()
|
|
case_id = await builder.build_case(meta)
|
|
|
|
witness_id = await db.create_node("Witness", {"role": "prosecution"})
|
|
await db.create_relationship(case_id, witness_id, "GAVE_TESTIMONY")
|
|
|
|
# Insert chunks out of order
|
|
for seq, text in [(2, "Third"), (0, "First"), (1, "Second")]:
|
|
chunk_node_id = await db.create_node("Chunk", {
|
|
"chunk_id": f"c{seq}", "type": "testimony", "sequence": seq,
|
|
"text_preview": text, "token_count": 10,
|
|
})
|
|
await db.create_relationship(witness_id, chunk_node_id, "GAVE_TESTIMONY")
|
|
|
|
sq = SubgraphQuery(db)
|
|
persona = get_persona("empath")
|
|
ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona)
|
|
|
|
assert ctx.context_text.startswith("First")
|
|
assert ctx.source_chunk_ids == ["c0", "c1", "c2"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_all_personas_return_valid_context(self):
|
|
"""Every persona should return a valid JurorContext object."""
|
|
db = InMemoryGraphDB()
|
|
from aucourt_ingest.processing.graph_builder import GraphBuilder
|
|
|
|
builder = GraphBuilder(db)
|
|
meta = _make_meta()
|
|
await builder.build_case(meta)
|
|
|
|
sq = SubgraphQuery(db)
|
|
for name in all_persona_names():
|
|
persona = get_persona(name)
|
|
ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona)
|
|
assert isinstance(ctx, JurorContext)
|
|
assert ctx.persona == name
|
|
assert ctx.case_mnc == "[2019] NSWSC 1234"
|
|
assert ctx.total_tokens >= 0
|
|
assert isinstance(ctx.source_chunk_ids, list)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_foreman_sees_opening_and_judgment(self):
|
|
"""Foreman persona should access opening, judgment, sentence chunks."""
|
|
db = InMemoryGraphDB()
|
|
from aucourt_ingest.processing.graph_builder import GraphBuilder
|
|
|
|
builder = GraphBuilder(db)
|
|
meta = _make_meta()
|
|
case_id = await builder.build_case(meta)
|
|
|
|
# Opening chunk connected to case
|
|
opening_id = await db.create_node("Chunk", {
|
|
"chunk_id": "open0", "type": "opening", "sequence": 0,
|
|
"text_preview": "The prosecution alleges...", "token_count": 30,
|
|
})
|
|
await db.create_relationship(case_id, opening_id, "CHARGED_WITH")
|
|
|
|
# Sentence chunk connected to case
|
|
sentence_id = await db.create_node("Chunk", {
|
|
"chunk_id": "sent0", "type": "sentence", "sequence": 0,
|
|
"text_preview": "The defendant is sentenced to...", "token_count": 30,
|
|
})
|
|
await db.create_relationship(case_id, sentence_id, "HEARD_BY")
|
|
|
|
sq = SubgraphQuery(db)
|
|
ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("foreman"))
|
|
|
|
assert "prosecution alleges" in ctx.context_text
|
|
assert "sentenced" in ctx.context_text
|
|
assert len(ctx.source_chunk_ids) == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_excludes_inadmissible_globally(self):
|
|
"""Even without explicit anchor match, RULED_INADMISSIBLE edges block."""
|
|
db = InMemoryGraphDB()
|
|
from aucourt_ingest.processing.graph_builder import GraphBuilder
|
|
|
|
builder = GraphBuilder(db)
|
|
meta = _make_meta()
|
|
case_id = await builder.build_case(meta)
|
|
|
|
# Forensic exhibit with RULED_INADMISSIBLE edge
|
|
exhibit_id = await db.create_node("Exhibit", {"category": "forensic", "id": "ex1"})
|
|
await db.create_relationship(case_id, exhibit_id, "RULED_INADMISSIBLE")
|
|
|
|
chunk_id = await db.create_node("Chunk", {
|
|
"chunk_id": "fc0", "type": "exhibit", "sequence": 0,
|
|
"text_preview": "Excluded forensic report", "token_count": 20,
|
|
})
|
|
await db.create_relationship(exhibit_id, chunk_id, "DESCRIBED_IN")
|
|
|
|
sq = SubgraphQuery(db)
|
|
# ex_cop cares about forensic exhibits
|
|
ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("ex_cop"))
|
|
|
|
assert "Excluded" not in ctx.context_text
|
|
assert ctx.source_chunk_ids == []
|