aucourt-ingest/config.toml
slothitude d77fe12cfc AuCourtIngest: complete 8-stage Australian legal case ingestion pipeline
Source layer (5 court sources), processing pipeline (parse/extract/chunk/embed/graph),
property graph with 8 node types, juror subgraph queries with 6 personas,
orchestrator with bootstrap/watch/backfill/audit/process modes, 170 tests.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-30 11:56:23 +10:00

78 lines
1.8 KiB
TOML

# AuCourtIngest configuration
# Copy to config.local.toml and fill in API keys — do not commit secrets.
user_agent = "AuCourtIngest/0.1 (legal research; contact: TODO@TODO.com)"
[sources.fedcourt]
base_url = "https://www.judgments.fedcourt.gov.au"
fetch_strategy = "rss_poll_then_docx_download"
rss_feed = "https://www.judgments.fedcourt.gov.au/rss/fca-judgments"
doc_formats = ["html", "docx", "pdf"]
coverage_from = 1977
[sources.fedcourt.rate_limit]
rps = 1.0
concurrent = 2
retry_after = 60
[sources.highcourt]
base_url = "https://www.hcourt.gov.au"
fetch_strategy = "index_crawl"
doc_formats = ["html", "pdf"]
coverage_from = 1994
[sources.highcourt.rate_limit]
rps = 0.5
concurrent = 1
retry_after = 120
[sources.nsw_caselaw]
base_url = "https://www.caselaw.nsw.gov.au"
browse_url = "https://www.caselaw.nsw.gov.au/browse"
fetch_strategy = "browse_pagination"
doc_formats = ["html", "pdf"]
coverage_from = 1988
[sources.nsw_caselaw.rate_limit]
rps = 1.0
concurrent = 2
retry_after = 60
[sources.qld_judgments]
base_url = "https://www.queenslandjudgments.com.au"
fetch_strategy = "search_pagination"
doc_formats = ["html", "pdf"]
[sources.qld_judgments.rate_limit]
rps = 0.5
concurrent = 1
retry_after = 120
[sources.auslaw_mcp]
base_url = "mcp://auslaw"
fetch_strategy = "mcp_server"
doc_formats = ["html", "pdf"]
[sources.auslaw_mcp.rate_limit]
rps = 0.3
concurrent = 1
retry_after = 180
[storage]
data_dir = "data"
neo4j_uri = "bolt://localhost:7687"
neo4j_user = "neo4j"
neo4j_password = "password"
neo4j_database = "au_legal"
[llm]
anthropic_api_key = "" # set via ANTHROPIC_API_KEY env or here
openai_api_key = "" # set via OPENAI_API_KEY env or here
extraction_model = "claude-haiku-4-5-20251001"
embedding_model = "text-embedding-3-small"
embedding_batch_size = 100
[telegram]
bot_token = ""
chat_id = ""
enabled = false