"""Tests for MetaExtractor (mocked LLM).""" import json import pytest from aucourt_ingest.models import CaseMeta from aucourt_ingest.processing.meta_extractor import MetaExtractor class MockLLM: """Synchronous mock for testing — returns canned JSON.""" def __init__(self, response_json: dict | None = None, response_text: str | None = None, second_response_json: dict | None = None): self._response_json = response_json self._response_text = response_text self._second_response_json = second_response_json self.call_count = 0 async def create_message(self, prompt: str, system: str) -> str: self.call_count += 1 if self.call_count == 2 and self._second_response_json is not None: return json.dumps(self._second_response_json) if self._response_json is not None: return json.dumps(self._response_json) return self._response_text or "{}" FULL_META = { "case_name": "R v Smith", "mnc": "[2019] NSWSC 1234", "court": "NSWSC", "judge": ["Brereton J"], "date_delivered": "2019-06-15", "jurisdiction": "NSW", "matter_type": "criminal", "charges": ["murder s18 Crimes Act 1900 (NSW)"], "charge_categories": ["homicide"], "verdict": "guilty", "sentence": "18 years NMP 13", "outcome_notes": "Found guilty of murder, sentenced to 18 years.", "is_appeal": False, "appeal_of": None, "exoneration_flag": False, "inadmissible_evidence": [], "suppression_order": False, } @pytest.mark.asyncio async def test_full_extraction(): mock = MockLLM(response_json=FULL_META) extractor = MetaExtractor(llm=mock) meta = await extractor.extract("some judgment text") assert meta.case_name == "R v Smith" assert meta.mnc == "[2019] NSWSC 1234" assert meta.court == "NSWSC" assert meta.verdict == "guilty" assert meta.sentence == "18 years NMP 13" assert meta.judge == ["Brereton J"] assert meta.matter_type == "criminal" assert meta.charges == ["murder s18 Crimes Act 1900 (NSW)"] assert meta.is_appeal is False assert meta.exoneration_flag is False assert meta.suppression_order is False assert mock.call_count == 1 @pytest.mark.asyncio async def test_null_handling(): """Fields not returned by LLM should default safely.""" mock = MockLLM(response_json={"case_name": "R v Jones", "mnc": "[2020] HCA 5"}) extractor = MetaExtractor(llm=mock) meta = await extractor.extract("text") assert meta.case_name == "R v Jones" assert meta.mnc == "[2020] HCA 5" assert meta.court is None assert meta.judge == [] assert meta.charges == [] assert meta.verdict is None assert meta.sentence is None @pytest.mark.asyncio async def test_markdown_wrapped_json(): """LLM sometimes wraps JSON in markdown code fences.""" mock = MockLLM(response_text='```json\n' + json.dumps(FULL_META) + '\n```') extractor = MetaExtractor(llm=mock) meta = await extractor.extract("text") assert meta.case_name == "R v Smith" assert mock.call_count == 1 @pytest.mark.asyncio async def test_broken_json_retries(): """If first response is invalid JSON, should retry with fix prompt.""" mock = MockLLM( response_text="Sure, here is the metadata:\n" + json.dumps(FULL_META), second_response_json=FULL_META, ) extractor = MetaExtractor(llm=mock) meta = await extractor.extract("text") assert meta.case_name == "R v Smith" assert mock.call_count == 2 # first failed parse, second succeeded @pytest.mark.asyncio async def test_completely_broken_json_fallback(): """If both attempts fail, return empty CaseMeta.""" mock = MockLLM(response_text="I cannot process this request.") extractor = MetaExtractor(llm=mock) meta = await extractor.extract("text") assert meta.case_name == "" assert meta.mnc == "" assert mock.call_count == 2 @pytest.mark.asyncio async def test_bool_coercion(): """JSON true/false should become Python bools.""" mock = MockLLM(response_json={ "case_name": "R v Test", "mnc": "[2020] FCA 1", "is_appeal": True, "exoneration_flag": True, "suppression_order": True, "inadmissible_evidence": ["video recording"], }) extractor = MetaExtractor(llm=mock) meta = await extractor.extract("text") assert meta.is_appeal is True assert meta.exoneration_flag is True assert meta.suppression_order is True assert meta.inadmissible_evidence == ["video recording"]