"""NISRA Index of Services (IOS) for Northern Ireland.
Provides quarterly index of services sector output, comparing Northern Ireland
against the UK average. Base year 2020=100.
Services sectors covered:
- Wholesale and retail trade; repair of motor vehicles
- Accommodation and food service activities
- Information and communication
- Financial and insurance activities
- Professional, scientific and technical activities
- Public administration and defence
- Education
- Human health and social work activities
- Other services
Data Source:
**Statistics page**: https://www.nisra.gov.uk/statistics/economic-output/index-services
The module scrapes this page to find the latest quarterly publication,
then downloads the Excel tables file.
Update Frequency: Quarterly (published ~3 months after reference quarter)
Geographic Coverage: Northern Ireland and UK comparison
Base Year: 2020=100
Example:
>>> from bolster.data_sources.nisra import index_of_services as ios
>>> df = ios.get_latest_ios()
>>> 'ni_index' in df.columns
True
>>> growth = ios.get_ios_growth(df)
>>> 'ni_yoy' in growth.columns
True
"""
import logging
import re
from pathlib import Path
import pandas as pd
from bs4 import BeautifulSoup
from bolster.utils.web import session
from ._base import NISRADataNotFoundError, NISRAValidationError, download_file
[docs]
logger = logging.getLogger(__name__)
[docs]
IOS_STATS_URL = "https://www.nisra.gov.uk/statistics/economic-output/index-services"
[docs]
IOS_BASE_URL = "https://www.nisra.gov.uk"
[docs]
def get_latest_ios_publication_url() -> tuple[str, int, int]:
"""Scrape the IOS statistics page to find the latest quarterly Excel file.
Returns:
Tuple of (excel_url, year, quarter)
Raises:
NISRADataNotFoundError: If publication cannot be found
"""
try:
response = session.get(IOS_STATS_URL, timeout=30)
response.raise_for_status()
except Exception as e:
raise NISRADataNotFoundError(f"Failed to fetch IOS statistics page: {e}") from e
soup = BeautifulSoup(response.content, "html.parser")
# Pub list uses slugs like: /publications/index-services-ios-...-quarter-4-2025...
quarter_slug_pat = re.compile(r"quarter[- _](\d+)[- _](\d{4})", re.IGNORECASE)
for link in soup.find_all("a", href=True):
href = link["href"]
if "index-services-ios" not in href or "publications" not in href:
continue
match = quarter_slug_pat.search(href)
if not match:
continue
quarter = int(match.group(1))
year = int(match.group(2))
pub_url = href if href.startswith("http") else f"{IOS_BASE_URL}{href}"
try:
pub_resp = session.get(pub_url, timeout=30)
pub_resp.raise_for_status()
except Exception as e:
logger.warning(f"Failed to fetch publication page {pub_url}: {e}")
continue
pub_soup = BeautifulSoup(pub_resp.content, "html.parser")
for file_link in pub_soup.find_all("a", href=True):
file_href = file_link["href"]
file_text = file_link.get_text(strip=True).lower()
if "ios" in file_text and "tables" in file_text and file_href.endswith(".xlsx"):
excel_url = file_href if file_href.startswith("http") else f"{IOS_BASE_URL}{file_href}"
logger.info(f"Found IOS Q{quarter} {year}: {excel_url}")
return excel_url, year, quarter
raise NISRADataNotFoundError("Could not find latest IOS publication")
[docs]
def parse_ios_file(file_path: str | Path) -> pd.DataFrame:
"""Parse IOS Excel tables file into long-format DataFrame.
Reads Table_1_1 which contains the headline NI and UK services index series.
Args:
file_path: Path to downloaded IOS tables Excel file
Returns:
DataFrame with columns:
- date: Timestamp (first day of quarter)
- year: int
- quarter: int (1-4)
- quarter_label: str (e.g. "Q1 2025")
- ni_index: float (NI Index of Services, 2020=100)
- uk_index: float (UK Index of Services, 2020=100)
Raises:
NISRAValidationError: If file structure is unexpected
"""
file_path = Path(file_path)
try:
df = pd.read_excel(file_path, sheet_name="Table_1_1", skiprows=2, header=0)
except Exception as e:
raise NISRAValidationError(f"Failed to read IOS file: {e}") from e
col0 = df.columns[0]
df = df.rename(columns={col0: "quarter_label"})
# Strip whitespace from all column names
df.columns = [str(c).strip() for c in df.columns]
df = df.rename(columns={"NI Services": "ni_index", "UK Services": "uk_index"})
df = df[df["quarter_label"].notna()].copy()
df["quarter_label"] = df["quarter_label"].astype(str).str.strip()
quarter_pattern = re.compile(r"^Q([1-4])\s+(\d{4})$")
mask = df["quarter_label"].str.match(quarter_pattern)
df = df[mask].copy()
if df.empty:
raise NISRAValidationError("No valid quarter rows found in IOS Table_1_1")
parsed = df["quarter_label"].str.extract(quarter_pattern)
df["quarter"] = parsed[0].astype(int)
df["year"] = parsed[1].astype(int)
quarter_month = {1: 1, 2: 4, 3: 7, 4: 10}
df["date"] = pd.to_datetime(df["year"].astype(str) + "-" + df["quarter"].map(quarter_month).astype(str) + "-01")
df["ni_index"] = pd.to_numeric(df["ni_index"], errors="coerce")
df["uk_index"] = pd.to_numeric(df["uk_index"], errors="coerce")
df = df[["date", "year", "quarter", "quarter_label", "ni_index", "uk_index"]]
df = df.sort_values("date").reset_index(drop=True)
logger.info(
f"Parsed IOS: {len(df)} quarters, {df['year'].min()} Q{df['quarter'].iloc[0]}-{df['year'].max()} Q{df['quarter'].iloc[-1]}"
)
return df
[docs]
def get_latest_ios(force_refresh: bool = False) -> pd.DataFrame:
"""Get the latest NI Index of Services quarterly series.
Args:
force_refresh: If True, bypass cache and download fresh data
Returns:
DataFrame with columns: date, year, quarter, quarter_label,
ni_index, uk_index (base year 2020=100)
Raises:
NISRADataNotFoundError: If latest publication cannot be found
NISRAValidationError: If file structure is unexpected
Example:
>>> df = get_latest_ios()
>>> 'ni_index' in df.columns
True
"""
excel_url, year, quarter = get_latest_ios_publication_url()
logger.info(f"Downloading IOS Q{quarter} {year} from: {excel_url}")
file_path = download_file(excel_url, cache_ttl_hours=24 * 90, force_refresh=force_refresh)
return parse_ios_file(file_path)
[docs]
def validate_ios_data(df: pd.DataFrame) -> bool:
"""Validate IOS DataFrame for basic integrity.
Args:
df: DataFrame from get_latest_ios()
Returns:
True if validation passes
Raises:
NISRAValidationError: If validation fails
"""
required = {"date", "year", "quarter", "ni_index", "uk_index"}
missing = required - set(df.columns)
if missing:
raise NISRAValidationError(f"Missing required columns: {missing}")
if df.empty:
raise NISRAValidationError("DataFrame is empty")
if df["year"].min() > 2010:
raise NISRAValidationError(f"Expected data from before 2010, got {df['year'].min()}")
if (df["ni_index"] <= 0).any():
raise NISRAValidationError("Non-positive NI index values found")
return True
[docs]
def get_ios_by_year(df: pd.DataFrame, year: int) -> pd.DataFrame:
"""Filter IOS data to a specific year.
Args:
df: DataFrame from get_latest_ios()
year: Year to filter
Returns:
Filtered DataFrame (up to 4 quarters)
Example:
>>> df = get_latest_ios()
>>> df_2024 = get_ios_by_year(df, 2024)
>>> len(df_2024) <= 4
True
"""
return df[df["year"] == year].reset_index(drop=True)
[docs]
def get_ios_by_quarter(df: pd.DataFrame, quarter: int, year: int) -> pd.DataFrame:
"""Filter IOS data to a specific quarter.
Args:
df: DataFrame from get_latest_ios()
quarter: Quarter number (1-4)
year: Year
Returns:
Filtered DataFrame (single row)
Example:
>>> df = get_latest_ios()
>>> q1_2024 = get_ios_by_quarter(df, 1, 2024)
>>> len(q1_2024) == 1
True
"""
return df[(df["quarter"] == quarter) & (df["year"] == year)].reset_index(drop=True)
[docs]
def get_ios_summary_statistics(df: pd.DataFrame, start_year: int | None = None, end_year: int | None = None) -> dict:
"""Calculate summary statistics for a period of IOS data.
Args:
df: DataFrame from get_latest_ios()
start_year: Optional start year filter (inclusive)
end_year: Optional end year filter (inclusive)
Returns:
Dictionary with keys: period, ni_mean, ni_min, ni_max,
uk_mean, uk_min, uk_max, quarters_count
Example:
>>> df = get_latest_ios()
>>> stats = get_ios_summary_statistics(df, start_year=2020)
>>> 'ni_mean' in stats
True
"""
filtered = df.copy()
if start_year is not None:
filtered = filtered[filtered["year"] >= start_year]
if end_year is not None:
filtered = filtered[filtered["year"] <= end_year]
return {
"period": f"{filtered['year'].min()}-{filtered['year'].max()}",
"ni_mean": float(filtered["ni_index"].mean()),
"ni_min": float(filtered["ni_index"].min()),
"ni_max": float(filtered["ni_index"].max()),
"uk_mean": float(filtered["uk_index"].mean()),
"uk_min": float(filtered["uk_index"].min()),
"uk_max": float(filtered["uk_index"].max()),
"quarters_count": len(filtered),
}
[docs]
def get_ios_growth(df: pd.DataFrame) -> pd.DataFrame:
"""Calculate quarter-on-quarter and year-on-year growth rates.
Args:
df: DataFrame from get_latest_ios()
Returns:
DataFrame with additional columns:
- ni_qoq: NI quarter-on-quarter % change
- ni_yoy: NI year-on-year % change
- uk_qoq: UK quarter-on-quarter % change
- uk_yoy: UK year-on-year % change
Example:
>>> df = get_latest_ios()
>>> growth = get_ios_growth(df)
>>> 'ni_yoy' in growth.columns
True
"""
result = df.copy()
result["ni_qoq"] = result["ni_index"].pct_change(1).mul(100).round(2)
result["ni_yoy"] = result["ni_index"].pct_change(4).mul(100).round(2)
result["uk_qoq"] = result["uk_index"].pct_change(1).mul(100).round(2)
result["uk_yoy"] = result["uk_index"].pct_change(4).mul(100).round(2)
return result