aucourt-ingest/tests/test_meta_extractor.py

135 lines
4.5 KiB
Python
Raw Permalink Normal View History

"""Tests for MetaExtractor (mocked LLM)."""
import json
import pytest
from aucourt_ingest.models import CaseMeta
from aucourt_ingest.processing.meta_extractor import MetaExtractor
class MockLLM:
"""Synchronous mock for testing — returns canned JSON."""
def __init__(self, response_json: dict | None = None, response_text: str | None = None,
second_response_json: dict | None = None):
self._response_json = response_json
self._response_text = response_text
self._second_response_json = second_response_json
self.call_count = 0
async def create_message(self, prompt: str, system: str) -> str:
self.call_count += 1
if self.call_count == 2 and self._second_response_json is not None:
return json.dumps(self._second_response_json)
if self._response_json is not None:
return json.dumps(self._response_json)
return self._response_text or "{}"
FULL_META = {
"case_name": "R v Smith",
"mnc": "[2019] NSWSC 1234",
"court": "NSWSC",
"judge": ["Brereton J"],
"date_delivered": "2019-06-15",
"jurisdiction": "NSW",
"matter_type": "criminal",
"charges": ["murder s18 Crimes Act 1900 (NSW)"],
"charge_categories": ["homicide"],
"verdict": "guilty",
"sentence": "18 years NMP 13",
"outcome_notes": "Found guilty of murder, sentenced to 18 years.",
"is_appeal": False,
"appeal_of": None,
"exoneration_flag": False,
"inadmissible_evidence": [],
"suppression_order": False,
}
@pytest.mark.asyncio
async def test_full_extraction():
mock = MockLLM(response_json=FULL_META)
extractor = MetaExtractor(llm=mock)
meta = await extractor.extract("some judgment text")
assert meta.case_name == "R v Smith"
assert meta.mnc == "[2019] NSWSC 1234"
assert meta.court == "NSWSC"
assert meta.verdict == "guilty"
assert meta.sentence == "18 years NMP 13"
assert meta.judge == ["Brereton J"]
assert meta.matter_type == "criminal"
assert meta.charges == ["murder s18 Crimes Act 1900 (NSW)"]
assert meta.is_appeal is False
assert meta.exoneration_flag is False
assert meta.suppression_order is False
assert mock.call_count == 1
@pytest.mark.asyncio
async def test_null_handling():
"""Fields not returned by LLM should default safely."""
mock = MockLLM(response_json={"case_name": "R v Jones", "mnc": "[2020] HCA 5"})
extractor = MetaExtractor(llm=mock)
meta = await extractor.extract("text")
assert meta.case_name == "R v Jones"
assert meta.mnc == "[2020] HCA 5"
assert meta.court is None
assert meta.judge == []
assert meta.charges == []
assert meta.verdict is None
assert meta.sentence is None
@pytest.mark.asyncio
async def test_markdown_wrapped_json():
"""LLM sometimes wraps JSON in markdown code fences."""
mock = MockLLM(response_text='```json\n' + json.dumps(FULL_META) + '\n```')
extractor = MetaExtractor(llm=mock)
meta = await extractor.extract("text")
assert meta.case_name == "R v Smith"
assert mock.call_count == 1
@pytest.mark.asyncio
async def test_broken_json_retries():
"""If first response is invalid JSON, should retry with fix prompt."""
mock = MockLLM(
response_text="Sure, here is the metadata:\n" + json.dumps(FULL_META),
second_response_json=FULL_META,
)
extractor = MetaExtractor(llm=mock)
meta = await extractor.extract("text")
assert meta.case_name == "R v Smith"
assert mock.call_count == 2 # first failed parse, second succeeded
@pytest.mark.asyncio
async def test_completely_broken_json_fallback():
"""If both attempts fail, return empty CaseMeta."""
mock = MockLLM(response_text="I cannot process this request.")
extractor = MetaExtractor(llm=mock)
meta = await extractor.extract("text")
assert meta.case_name == ""
assert meta.mnc == ""
assert mock.call_count == 2
@pytest.mark.asyncio
async def test_bool_coercion():
"""JSON true/false should become Python bools."""
mock = MockLLM(response_json={
"case_name": "R v Test",
"mnc": "[2020] FCA 1",
"is_appeal": True,
"exoneration_flag": True,
"suppression_order": True,
"inadmissible_evidence": ["video recording"],
})
extractor = MetaExtractor(llm=mock)
meta = await extractor.extract("text")
assert meta.is_appeal is True
assert meta.exoneration_flag is True
assert meta.suppression_order is True
assert meta.inadmissible_evidence == ["video recording"]