aucourt-ingest/config.toml

85 lines
1.9 KiB
TOML
Raw Permalink Normal View History

# AuCourtIngest configuration
# Copy to config.local.toml and fill in API keys — do not commit secrets.
user_agent = "AuCourtIngest/0.1 (legal research; contact: TODO@TODO.com)"
[sources.fedcourt]
base_url = "https://www.judgments.fedcourt.gov.au"
fetch_strategy = "rss_poll_then_docx_download"
rss_feed = "https://www.judgments.fedcourt.gov.au/rss/fca-judgments"
doc_formats = ["html", "docx", "pdf"]
coverage_from = 1977
[sources.fedcourt.rate_limit]
rps = 1.0
concurrent = 2
retry_after = 60
[sources.highcourt]
base_url = "https://www.hcourt.gov.au"
fetch_strategy = "index_crawl"
doc_formats = ["html", "pdf"]
coverage_from = 1994
[sources.highcourt.rate_limit]
rps = 0.5
concurrent = 1
retry_after = 120
[sources.nsw_caselaw]
base_url = "https://www.caselaw.nsw.gov.au"
browse_url = "https://www.caselaw.nsw.gov.au/browse"
fetch_strategy = "browse_pagination"
doc_formats = ["html", "pdf"]
coverage_from = 1988
[sources.nsw_caselaw.rate_limit]
rps = 1.0
concurrent = 2
retry_after = 60
[sources.qld_judgments]
base_url = "https://www.queenslandjudgments.com.au"
fetch_strategy = "search_pagination"
doc_formats = ["html", "pdf"]
[sources.qld_judgments.rate_limit]
rps = 0.5
concurrent = 1
retry_after = 120
[sources.auslaw_mcp]
base_url = "mcp://auslaw"
fetch_strategy = "mcp_server"
doc_formats = ["html", "pdf"]
[sources.auslaw_mcp.rate_limit]
rps = 0.3
concurrent = 1
retry_after = 180
[storage]
data_dir = "data"
neo4j_uri = "bolt://localhost:7687"
neo4j_user = "neo4j"
neo4j_password = "password"
neo4j_database = "au_legal"
[llm]
anthropic_api_key = "" # set via ANTHROPIC_API_KEY env or here
openai_api_key = "" # set via OPENAI_API_KEY env or here
extraction_model = "claude-haiku-4-5-20251001"
embedding_model = "text-embedding-3-small"
embedding_batch_size = 100
[telegram]
bot_token = ""
chat_id = ""
enabled = false
[server]
host = "127.0.0.1"
port = 8000
graph_backend = "memory"
default_max_tokens = 4000