aucourt-ingest/tests/test_jury.py

"""Tests for juror personas and subgraph queries."""

from __future__ import annotations

import pytest

from aucourt_ingest.models import CaseMeta, Chunk, JurorPersona, JurorContext, Verdict, MatterType
from aucourt_ingest.jury.personas import PERSONAS, get_persona, all_persona_names, DEFAULT_TOKEN_BUDGET
from aucourt_ingest.jury.subgraph_query import SubgraphQuery, _parse_anchor_spec
from aucourt_ingest.storage.in_memory_graph_db import InMemoryGraphDB


def _make_meta(mnc="[2019] NSWSC 1234", court="NSWSC",
               judges=None, charges=None, inadmissible=None,
               is_appeal=False, appeal_of="") -> CaseMeta:
    return CaseMeta(
        case_name=f"Test v State ({mnc})",
        mnc=mnc,
        court=court,
        judge=judges if judges is not None else ["Judge Smith"],
        charges=charges if charges is not None else ["murder"],
        inadmissible_evidence=inadmissible or [],
        is_appeal=is_appeal,
        appeal_of=appeal_of,
        verdict=Verdict.GUILTY,
        exoneration_flag=False,
        matter_type=MatterType.CRIMINAL,
        date_delivered="2019-06-15",
        jurisdiction="NSW",
    )


# ── Persona definitions ──

class TestPersonaDefinitions:
    def test_six_personas_defined(self):
        assert len(PERSONAS) == 6

    def test_all_expected_names(self):
        names = set(PERSONAS.keys())
        assert names == {"nurse", "accountant", "skeptic", "ex_cop", "empath", "foreman"}

    def test_all_exclude_inadmissible(self):
        for name, persona in PERSONAS.items():
            assert "RULED_INADMISSIBLE" in persona.exclude_edges, f"{name} missing RULED_INADMISSIBLE"

    def test_all_have_anchor_nodes(self):
        for name, persona in PERSONAS.items():
            assert len(persona.anchor_nodes) > 0, f"{name} has no anchor nodes"

    def test_all_have_edge_types(self):
        for name, persona in PERSONAS.items():
            assert len(persona.edge_types) > 0, f"{name} has no edge types"

    def test_all_have_chunk_types(self):
        for name, persona in PERSONAS.items():
            assert len(persona.chunk_types) > 0, f"{name} has no chunk types"

    def test_get_persona(self):
        nurse = get_persona("nurse")
        assert nurse.name == "nurse"
        assert "testimony" in nurse.chunk_types

    def test_get_persona_raises(self):
        with pytest.raises(KeyError):
            get_persona("nonexistent")

    def test_all_persona_names(self):
        names = all_persona_names()
        assert len(names) == 6
        assert set(names) == set(PERSONAS.keys())

    def test_nurse_persona(self):
        p = PERSONAS["nurse"]
        assert "testimony" in p.chunk_types
        assert "exhibit" in p.chunk_types
        assert "GAVE_TESTIMONY" in p.edge_types
        assert "CORROBORATES" in p.edge_types

    def test_foreman_persona(self):
        p = PERSONAS["foreman"]
        assert "opening" in p.chunk_types
        assert "judgment" in p.chunk_types
        assert "sentence" in p.chunk_types
        assert "Charge" in "".join(p.anchor_nodes)

    def test_skeptic_persona(self):
        p = PERSONAS["skeptic"]
        assert "CONTRADICTS" in p.edge_types
        assert "testimony" in p.chunk_types
        assert "ruling" in p.chunk_types

    def test_empath_persona(self):
        p = PERSONAS["empath"]
        assert "closing" in p.chunk_types
        assert "testimony" in p.chunk_types
        assert "CORROBORATES" in p.edge_types
        assert "CONTRADICTS" in p.edge_types


# ── Anchor spec parsing ──

class TestParseAnchorSpec:
    def test_plain_label(self):
        label, props = _parse_anchor_spec("Timeline")
        assert label == "Timeline"
        assert props == {}

    def test_label_with_single_prop(self):
        label, props = _parse_anchor_spec("Witness[role=expert]")
        assert label == "Witness"
        assert props == {"role": "expert"}

    def test_label_with_multiple_props(self):
        label, props = _parse_anchor_spec("Exhibit[category=medical,type=report]")
        assert label == "Exhibit"
        assert props == {"category": "medical", "type": "report"}

    def test_whitespace_in_props(self):
        label, props = _parse_anchor_spec("Witness[role = expert]")
        assert label == "Witness"
        assert props == {"role": "expert"}

    def test_spaces_around_bracket(self):
        label, props = _parse_anchor_spec("Witness [role=expert]")
        assert label == "Witness"
        assert props == {"role": "expert"}


# ── SubgraphQuery ──

class TestSubgraphQuery:
    @pytest.mark.asyncio
    async def test_missing_case_returns_empty(self):
        db = InMemoryGraphDB()
        sq = SubgraphQuery(db)
        persona = get_persona("nurse")
        ctx = await sq.get_juror_context("[2099] FAKE 9999", persona)

        assert ctx.persona == "nurse"
        assert ctx.case_mnc == "[2099] FAKE 9999"
        assert ctx.context_text == ""
        assert ctx.source_chunk_ids == []
        assert ctx.total_tokens == 0

    @pytest.mark.asyncio
    async def test_no_matching_chunks_returns_empty(self):
        db = InMemoryGraphDB()
        from aucourt_ingest.processing.graph_builder import GraphBuilder

        builder = GraphBuilder(db)
        meta = _make_meta()
        await builder.build_case(meta)

        sq = SubgraphQuery(db)
        persona = get_persona("nurse")
        ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona)

        assert ctx.context_text == ""
        assert ctx.source_chunk_ids == []

    @pytest.mark.asyncio
    async def test_inadmissible_wall_blocks_evidence(self):
        """RULED_INADMISSIBLE edges must be excluded from juror traversal."""
        db = InMemoryGraphDB()
        from aucourt_ingest.processing.graph_builder import GraphBuilder

        builder = GraphBuilder(db)
        meta = _make_meta(inadmissible=["hearsay statement"])
        case_id = await builder.build_case(meta)

        # Create a witness connected via RULED_INADMISSIBLE (excluded edge)
        inad_witness_id = await db.create_node("Witness", {"role": "expert", "id": "w_inad"})
        await db.create_relationship(case_id, inad_witness_id, "RULED_INADMISSIBLE",
                                     {"evidence": "hearsay statement"})

        # Create a chunk connected to the inadmissible witness
        inad_chunk_id = await db.create_node("Chunk", {
            "chunk_id": "inc0", "type": "testimony", "sequence": 0,
            "text_preview": "Inadmissible testimony that should be excluded",
            "token_count": 20,
        })
        await db.create_relationship(inad_witness_id, inad_chunk_id, "GAVE_TESTIMONY")

        # Create an admissible witness with testimony
        good_witness_id = await db.create_node("Witness", {"role": "expert", "id": "w_good"})
        await db.create_relationship(case_id, good_witness_id, "GAVE_TESTIMONY")
        good_chunk_id = await db.create_node("Chunk", {
            "chunk_id": "good0", "type": "testimony", "sequence": 0,
            "text_preview": "Admissible expert testimony",
            "token_count": 20,
        })
        await db.create_relationship(good_witness_id, good_chunk_id, "GAVE_TESTIMONY")

        sq = SubgraphQuery(db)
        persona = get_persona("nurse")
        ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona)

        # Should only contain admissible content
        assert "Inadmissible" not in ctx.context_text
        assert "Admissible" in ctx.context_text
        assert "inc0" not in ctx.source_chunk_ids
        assert "good0" in ctx.source_chunk_ids

    @pytest.mark.asyncio
    async def test_token_budget_respected(self):
        """Chunks should be truncated when exceeding max_tokens."""
        db = InMemoryGraphDB()
        from aucourt_ingest.processing.graph_builder import GraphBuilder

        builder = GraphBuilder(db)
        meta = _make_meta()
        case_id = await builder.build_case(meta)

        witness_id = await db.create_node("Witness", {"role": "expert"})
        await db.create_relationship(case_id, witness_id, "GAVE_TESTIMONY")

        for i in range(5):
            chunk_node_id = await db.create_node("Chunk", {
                "chunk_id": f"c{i}", "type": "testimony", "sequence": i,
                "text_preview": f"Chunk {i} text " * 10, "token_count": 50,
            })
            await db.create_relationship(witness_id, chunk_node_id, "GAVE_TESTIMONY")

        sq = SubgraphQuery(db)
        persona = get_persona("nurse")
        # Budget of 100 tokens = 2 chunks (50 each)
        ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona, max_tokens=100)

        assert ctx.total_tokens <= 150  # Some slack for partial chunk
        assert len(ctx.source_chunk_ids) <= 3  # 2 full + possibly 1 partial

    @pytest.mark.asyncio
    async def test_chunk_type_filtering(self):
        """Only chunks matching persona's chunk_types should be included."""
        db = InMemoryGraphDB()
        from aucourt_ingest.processing.graph_builder import GraphBuilder

        builder = GraphBuilder(db)
        meta = _make_meta()
        case_id = await builder.build_case(meta)

        # Expert witness connected to case
        witness_id = await db.create_node("Witness", {"role": "expert"})
        await db.create_relationship(case_id, witness_id, "GAVE_TESTIMONY")

        # Testimony chunk (nurse cares about this)
        test_chunk_id = await db.create_node("Chunk", {
            "chunk_id": "test0", "type": "testimony", "sequence": 0,
            "text_preview": "Expert medical testimony", "token_count": 20,
        })
        await db.create_relationship(witness_id, test_chunk_id, "GAVE_TESTIMONY")

        # Judgment chunk (nurse does NOT care about this)
        judg_chunk_id = await db.create_node("Chunk", {
            "chunk_id": "judg0", "type": "judgment", "sequence": 0,
            "text_preview": "The court finds...", "token_count": 20,
        })
        await db.create_relationship(case_id, judg_chunk_id, "HAS_RULING")

        sq = SubgraphQuery(db)
        nurse_ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("nurse"))

        # Nurse only sees testimony, not judgment
        assert "medical" in nurse_ctx.context_text
        assert "court finds" not in nurse_ctx.context_text

        # But foreman would see judgment
        foreman_ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("foreman"))
        assert "court finds" in foreman_ctx.context_text

    @pytest.mark.asyncio
    async def test_chunks_sorted_by_sequence(self):
        """Chunks should be assembled in sequence order."""
        db = InMemoryGraphDB()
        from aucourt_ingest.processing.graph_builder import GraphBuilder

        builder = GraphBuilder(db)
        meta = _make_meta()
        case_id = await builder.build_case(meta)

        witness_id = await db.create_node("Witness", {"role": "prosecution"})
        await db.create_relationship(case_id, witness_id, "GAVE_TESTIMONY")

        # Insert chunks out of order
        for seq, text in [(2, "Third"), (0, "First"), (1, "Second")]:
            chunk_node_id = await db.create_node("Chunk", {
                "chunk_id": f"c{seq}", "type": "testimony", "sequence": seq,
                "text_preview": text, "token_count": 10,
            })
            await db.create_relationship(witness_id, chunk_node_id, "GAVE_TESTIMONY")

        sq = SubgraphQuery(db)
        persona = get_persona("empath")
        ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona)

        assert ctx.context_text.startswith("First")
        assert ctx.source_chunk_ids == ["c0", "c1", "c2"]

    @pytest.mark.asyncio
    async def test_all_personas_return_valid_context(self):
        """Every persona should return a valid JurorContext object."""
        db = InMemoryGraphDB()
        from aucourt_ingest.processing.graph_builder import GraphBuilder

        builder = GraphBuilder(db)
        meta = _make_meta()
        await builder.build_case(meta)

        sq = SubgraphQuery(db)
        for name in all_persona_names():
            persona = get_persona(name)
            ctx = await sq.get_juror_context("[2019] NSWSC 1234", persona)
            assert isinstance(ctx, JurorContext)
            assert ctx.persona == name
            assert ctx.case_mnc == "[2019] NSWSC 1234"
            assert ctx.total_tokens >= 0
            assert isinstance(ctx.source_chunk_ids, list)

    @pytest.mark.asyncio
    async def test_foreman_sees_opening_and_judgment(self):
        """Foreman persona should access opening, judgment, sentence chunks."""
        db = InMemoryGraphDB()
        from aucourt_ingest.processing.graph_builder import GraphBuilder

        builder = GraphBuilder(db)
        meta = _make_meta()
        case_id = await builder.build_case(meta)

        # Opening chunk connected to case
        opening_id = await db.create_node("Chunk", {
            "chunk_id": "open0", "type": "opening", "sequence": 0,
            "text_preview": "The prosecution alleges...", "token_count": 30,
        })
        await db.create_relationship(case_id, opening_id, "CHARGED_WITH")

        # Sentence chunk connected to case
        sentence_id = await db.create_node("Chunk", {
            "chunk_id": "sent0", "type": "sentence", "sequence": 0,
            "text_preview": "The defendant is sentenced to...", "token_count": 30,
        })
        await db.create_relationship(case_id, sentence_id, "HEARD_BY")

        sq = SubgraphQuery(db)
        ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("foreman"))

        assert "prosecution alleges" in ctx.context_text
        assert "sentenced" in ctx.context_text
        assert len(ctx.source_chunk_ids) == 2

    @pytest.mark.asyncio
    async def test_excludes_inadmissible_globally(self):
        """Even without explicit anchor match, RULED_INADMISSIBLE edges block."""
        db = InMemoryGraphDB()
        from aucourt_ingest.processing.graph_builder import GraphBuilder

        builder = GraphBuilder(db)
        meta = _make_meta()
        case_id = await builder.build_case(meta)

        # Forensic exhibit with RULED_INADMISSIBLE edge
        exhibit_id = await db.create_node("Exhibit", {"category": "forensic", "id": "ex1"})
        await db.create_relationship(case_id, exhibit_id, "RULED_INADMISSIBLE")

        chunk_id = await db.create_node("Chunk", {
            "chunk_id": "fc0", "type": "exhibit", "sequence": 0,
            "text_preview": "Excluded forensic report", "token_count": 20,
        })
        await db.create_relationship(exhibit_id, chunk_id, "DESCRIBED_IN")

        sq = SubgraphQuery(db)
        # ex_cop cares about forensic exhibits
        ctx = await sq.get_juror_context("[2019] NSWSC 1234", get_persona("ex_cop"))

        assert "Excluded" not in ctx.context_text
        assert ctx.source_chunk_ids == []