"""Common utilities for PSNI data sources.
This module provides shared functionality for all PSNI (Police Service of
Northern Ireland) data source modules, including:
- **Caching**: Download and cache data files with configurable TTL
- **Geographic codes**: LGD and NUTS3 code mappings for cross-dataset integration
- **Exceptions**: Standardized error handling for data operations
Cache Location:
Files are cached in ``~/.cache/bolster/psni/`` with filenames based on
URL hashes. Cache validity is configurable per-request.
Geographic Code Systems:
- **LGD codes** (N09000XXX): ONS Local Government District codes for NI
- **NUTS3 codes** (UKN0X): EU statistical region codes for aggregation
Example:
>>> from bolster.data_sources.psni._base import get_lgd_code, get_nuts3_code
>>> get_lgd_code("Belfast City")
'N09000003'
>>> get_nuts3_code("Belfast City")
'UKN06'
"""
import logging
from pathlib import Path
from bolster.utils.cache import CachedDownloader, DownloadError
logger = logging.getLogger(__name__)
# Shared downloader instance for PSNI data sources
_downloader = CachedDownloader("psni", timeout=60)
[docs]
class PSNIDataError(Exception):
"""Base exception for PSNI data errors.
All PSNI-specific exceptions inherit from this class, allowing
callers to catch all PSNI errors with a single except clause.
"""
pass
[docs]
class PSNIDataNotFoundError(PSNIDataError):
"""Raised when a PSNI data file cannot be downloaded or accessed.
This exception is raised when:
- Network requests fail (timeout, connection errors)
- HTTP errors occur (404, 500, etc.)
- The requested resource is unavailable
"""
pass
[docs]
class PSNIValidationError(PSNIDataError):
"""Raised when PSNI data fails validation checks.
This exception is raised when:
- CSV structure doesn't match expected columns
- Data contains invalid or unexpected values
- Required fields are missing or malformed
"""
pass
[docs]
class PSNIDataStaleError(PSNIDataError):
"""Raised when a PSNI data source is known to be stale with no accessible update.
This exception is raised when the underlying data source has not been updated
and no machine-readable replacement is accessible (e.g. due to Cloudflare
protection on the official PSNI website blocking automated downloads).
"""
pass
# Geographic Code Mappings
# Policing Districts map 1:1 to Local Government Districts (LGDs) established 2015
# This enables cross-comparison with other NISRA datasets
LGD_CODES = {
"Antrim & Newtownabbey": "N09000001",
"Ards & North Down": "N09000011",
"Armagh City Banbridge & Craigavon": "N09000002",
"Belfast City": "N09000003",
"Causeway Coast & Glens": "N09000004",
"Derry City & Strabane": "N09000005",
"Fermanagh & Omagh": "N09000006",
"Lisburn & Castlereagh City": "N09000007",
"Mid & East Antrim": "N09000008",
"Mid Ulster": "N09000009",
"Newry Mourne & Down": "N09000010",
}
# NUTS3 regional codes for aggregation (NUTS 2021 classification)
# NUTS = Nomenclature of Territorial Units for Statistics (EU standard)
# Since 2016, NI NUTS3 regions map 1:1 to Local Government Districts
NUTS3_CODES = {
"Antrim & Newtownabbey": "UKN0D",
"Ards & North Down": "UKN09",
"Armagh City Banbridge & Craigavon": "UKN07",
"Belfast City": "UKN06",
"Causeway Coast & Glens": "UKN0C",
"Derry City & Strabane": "UKN0A",
"Fermanagh & Omagh": "UKN0G",
"Lisburn & Castlereagh City": "UKN0E",
"Mid & East Antrim": "UKN0F",
"Mid Ulster": "UKN0B",
"Newry Mourne & Down": "UKN08",
}
# NUTS region names for reference
# Source: Eurostat NUTS 2021 classification
NUTS_REGION_NAMES = {
"UKN06": "Belfast",
"UKN07": "Armagh City, Banbridge and Craigavon",
"UKN08": "Newry, Mourne and Down",
"UKN09": "Ards and North Down",
"UKN0A": "Derry City and Strabane",
"UKN0B": "Mid Ulster",
"UKN0C": "Causeway Coast and Glens",
"UKN0D": "Antrim and Newtownabbey",
"UKN0E": "Lisburn and Castlereagh",
"UKN0F": "Mid and East Antrim",
"UKN0G": "Fermanagh and Omagh",
}
[docs]
def get_lgd_code(district_name: str) -> str | None:
"""Get LGD code for a policing district.
Args:
district_name: Policing district name (e.g., "Belfast City")
Returns:
LGD code (e.g., "N09000003") or None if not found
Example:
>>> get_lgd_code("Belfast City")
'N09000003'
"""
return LGD_CODES.get(district_name)
[docs]
def get_nuts3_code(district_name: str) -> str | None:
"""Get NUTS3 regional code for a policing district.
Uses NUTS 2021 classification where each LGD maps 1:1 to a NUTS3 region.
Args:
district_name: Policing district name (e.g., "Belfast City")
Returns:
NUTS3 code (e.g., "UKN06") or None if not found
Example:
>>> get_nuts3_code("Belfast City")
'UKN06'
>>> get_nuts3_code("Derry City & Strabane")
'UKN0A'
"""
return NUTS3_CODES.get(district_name)
[docs]
def get_nuts_region_name(nuts3_code: str) -> str | None:
"""Get descriptive name for a NUTS3 region code.
Args:
nuts3_code: NUTS3 code (e.g., "UKN06")
Returns:
Region name (e.g., "Belfast") or None if not found
Example:
>>> get_nuts_region_name("UKN06")
'Belfast'
>>> get_nuts_region_name("UKN0A")
'Derry City and Strabane'
"""
return NUTS_REGION_NAMES.get(nuts3_code)
def download_file(
url: str,
cache_ttl_hours: int = 24,
force_refresh: bool = False,
headers: dict | None = None,
) -> Path:
"""Download a file with caching support.
Downloads a file from the given URL and caches it locally. If a valid
cached version exists, returns that instead. Uses a 60-second timeout
for network requests.
Args:
url: URL to download
cache_ttl_hours: Cache validity in hours (default: 24)
force_refresh: If True, bypass cache and re-download
headers: Optional extra HTTP headers (e.g. ``{"Referer": "..."}``).
Useful for resources that require a browser-like User-Agent or a
specific Referer to bypass Cloudflare checks.
Returns:
Path to the downloaded (or cached) file
Raises:
PSNIDataNotFoundError: If download fails due to network or HTTP errors
Example:
>>> from bolster.data_sources.psni._base import PSNIDataNotFoundError
>>> try:
... download_file("https://example.com/no-such-file.csv")
... except PSNIDataNotFoundError:
... print("PSNIDataNotFoundError raised for unreachable URL")
PSNIDataNotFoundError raised for unreachable URL
"""
try:
return _downloader.download(url, cache_ttl_hours=cache_ttl_hours, force_refresh=force_refresh, headers=headers)
except DownloadError as e:
raise PSNIDataNotFoundError(str(e)) from e
[docs]
def clear_cache(pattern: str | None = None) -> int:
"""Clear cached files from the PSNI cache directory.
Args:
pattern: Optional glob pattern to match specific files (e.g., ``*.csv``).
If None, clears all cached files in the directory.
Returns:
Number of files deleted
Example:
>>> from bolster.data_sources.psni._base import clear_cache
>>> deleted = clear_cache("*.csv")
>>> isinstance(deleted, int)
True
"""
return _downloader.clear(pattern)