"""NISRA Individual Wellbeing Module.
This module provides access to Northern Ireland's individual wellbeing statistics,
measuring subjective wellbeing across the population aged 16 and over.
The report covers four main areas of individual wellbeing:
- Personal Wellbeing (ONS4 measures): Life Satisfaction, Worthwhile, Happiness, Anxiety
- Loneliness: Frequency of feeling lonely
- Self-efficacy: Belief in one's capabilities
- Locus of Control: Perceived control over life events
Data Source: Northern Ireland Statistics and Research Agency provides individual wellbeing
statistics through their Wellbeing section at https://www.nisra.gov.uk/statistics/wellbeing/individual-wellbeing-northern-ireland.
The data measures subjective wellbeing using the ONS4 personal wellbeing questions alongside
additional measures of loneliness, self-efficacy, and locus of control for adults in Northern Ireland.
Update Frequency: Annual publications released in January each year, covering the financial
year period from April to March. The wellbeing statistics provide the official measure of
subjective wellbeing for Northern Ireland, with data updated once per year as part of NISRA's
social statistics programme.
Data Coverage:
- Personal Wellbeing (ONS4): 2014/15 - Present (annual, mean scores 0-10)
- Loneliness: 2017/18 - Present (annual, proportions)
- Self-efficacy: 2014/15 - Present (annual, mean scores 5-25)
- Locus of Control: Available in recent years
Demographics available:
- Sex, Age Group, Marital Status, Sexual Orientation
- Religion, Dependant status, Health status, Employment status
Examples:
>>> from bolster.data_sources.nisra import wellbeing
>>> df = wellbeing.get_latest_personal_wellbeing()
>>> 'life_satisfaction' in df.columns
True
>>> df_lonely = wellbeing.get_latest_loneliness()
>>> 'lonely_some_of_time' in df_lonely.columns
True
>>> summary = wellbeing.get_wellbeing_summary()
>>> 'life_satisfaction' in summary.columns
True
Publication Details:
- Frequency: Annual (January publication)
- Reference period: Financial year (April - March)
- Published by: NISRA / The Executive Office
- Contact: pfganalytics@executiveoffice-ni.gov.uk
- Population: Adults aged 16+ in Northern Ireland
"""
import logging
import re
from pathlib import Path
import pandas as pd
from ._base import NISRADataNotFoundError, download_file
[docs]
logger = logging.getLogger(__name__)
# Base URL for wellbeing publications (hosted on Executive Office site)
[docs]
WELLBEING_BASE_URL = "https://www.nisra.gov.uk/statistics/wellbeing/individual-wellbeing-northern-ireland"
[docs]
EXEC_OFFICE_TOPIC_URL = "https://www.executiveoffice-ni.gov.uk/topics/individual-wellbeing-northern-ireland"
[docs]
EXEC_OFFICE_BASE_URL = "https://www.executiveoffice-ni.gov.uk"
[docs]
def get_latest_wellbeing_publication_url() -> tuple[str, str]:
"""Get the URL of the latest Individual Wellbeing publication and its year.
Scrapes the Executive Office topic page to find the most recent publication.
Returns:
Tuple of (publication_url, year_string) e.g. ("https://...", "2024/25")
Raises:
NISRADataNotFoundError: If unable to find the latest publication
Example:
>>> url, year = get_latest_wellbeing_publication_url()
>>> url.startswith('https://')
True
"""
from bs4 import BeautifulSoup
from bolster.utils.web import session
logger.info("Fetching latest Individual Wellbeing publication URL...")
try:
response = session.get(EXEC_OFFICE_TOPIC_URL, timeout=30)
response.raise_for_status()
except Exception as e:
raise NISRADataNotFoundError(f"Failed to fetch wellbeing page: {e}") from e
soup = BeautifulSoup(response.content, "html.parser")
# Find links to publications - pattern: "Individual Wellbeing in Northern Ireland Report 2024/25"
publication_links = soup.find_all("a", href=True)
# Collect all matching publications and find the latest
publications = []
for link in publication_links:
link_text = link.get_text(strip=True)
href = link["href"]
# Match "Report 2024/25" or similar year patterns
match = re.search(r"(\d{4})/(\d{2})", link_text)
if match and "Report" in link_text:
year_str = f"{match.group(1)}/{match.group(2)}"
start_year = int(match.group(1))
pub_url = href
if not pub_url.startswith("http"):
pub_url = f"{EXEC_OFFICE_BASE_URL}{pub_url}"
publications.append((start_year, year_str, pub_url))
if not publications:
raise NISRADataNotFoundError("Could not find latest Individual Wellbeing publication")
# Sort by year and get the latest
publications.sort(key=lambda x: x[0], reverse=True)
_, year_str, pub_url = publications[0]
logger.info(f"Found latest wellbeing publication: {year_str} at {pub_url}")
return pub_url, year_str
[docs]
def get_wellbeing_file_url(year_str: str) -> str:
"""Construct URL for the wellbeing data tables Excel file.
Args:
year_str: Financial year string (e.g., "2024/25")
Returns:
URL to the Excel data tables file
Example:
>>> url = get_wellbeing_file_url("2024/25")
>>> url.startswith('https://')
True
"""
# Convert "2024/25" to "202425"
year_code = year_str.replace("/", "")
# Pattern: individual-wellbeing-ni-{yearcode}-data-tables.xlsx
# Published in January of the following year
# e.g., 2024/25 data published in January 2026
start_year = int(year_str.split("/")[0])
pub_year = start_year + 2 # Publication year is 2 years after start
filename = f"individual-wellbeing-ni-{year_code}-data-tables.xlsx"
url = f"https://www.executiveoffice-ni.gov.uk/sites/default/files/{pub_year}-01/{filename}"
logger.info(f"Constructed wellbeing file URL: {url}")
return url
[docs]
def parse_personal_wellbeing(file_path: str | Path) -> pd.DataFrame:
"""Parse personal wellbeing (ONS4) measures from the Excel file.
Extracts Life Satisfaction, Worthwhile, Happiness, and Anxiety mean scores
from the time series data.
Args:
file_path: Path to the wellbeing data tables Excel file
Returns:
DataFrame with columns:
- year: str (financial year, e.g., "2024/25")
- life_satisfaction: float (mean score 0-10)
- worthwhile: float (mean score 0-10)
- happiness: float (mean score 0-10)
- anxiety: float (mean score 0-10, lower is better)
Example:
>>> _, year = get_latest_wellbeing_publication_url()
>>> path = download_file(get_wellbeing_file_url(year), cache_ttl_hours=90*24)
>>> df = parse_personal_wellbeing(path)
>>> 'life_satisfaction' in df.columns
True
"""
logger.info(f"Parsing personal wellbeing from {file_path}")
# Sheet names for ONS4 measures
sheet_configs = {
"life_satisfaction": "Life_Satisfaction_Avg",
"worthwhile": "Worthwhile_Avg",
"happiness": "Happiness_Avg",
"anxiety": "Anxiety_Avg ", # Note: trailing space in sheet name
}
results = {}
for metric, sheet_name in sheet_configs.items():
try:
# Read the sheet
df_raw = pd.read_excel(
file_path,
sheet_name=sheet_name,
header=None,
skiprows=4, # Skip to header row
nrows=15, # Enough rows for the time series
)
# Extract year and estimate columns (columns 1 and 2)
data = []
for _, row in df_raw.iterrows():
year_val = row.iloc[1]
estimate = row.iloc[2]
# Check if this is a valid year row
if isinstance(year_val, str) and "/" in year_val and pd.notna(estimate):
try:
data.append({"year": year_val, metric: float(estimate)})
except (ValueError, TypeError):
continue
results[metric] = pd.DataFrame(data)
except Exception as e:
logger.warning(f"Failed to parse {metric} from {sheet_name}: {e}")
continue
# Merge all metrics on year
if not results:
raise NISRADataNotFoundError("Could not parse any personal wellbeing metrics")
df = None
for _metric, df_metric in results.items():
df = df_metric if df is None else df.merge(df_metric, on="year", how="outer")
# Sort by year
df = df.sort_values("year").reset_index(drop=True)
logger.info(f"Parsed {len(df)} years of personal wellbeing data")
return df
[docs]
def parse_loneliness(file_path: str | Path) -> pd.DataFrame:
"""Parse loneliness data from the Excel file.
Extracts the proportion of people who feel lonely at least some of the time.
Args:
file_path: Path to the wellbeing data tables Excel file
Returns:
DataFrame with columns:
- year: str (financial year, e.g., "2024/25")
- lonely_some_of_time: float (proportion, e.g., 0.179 = 17.9%)
- confidence_interval: str (e.g., "+/- 1.1")
Example:
>>> _, year = get_latest_wellbeing_publication_url()
>>> path = download_file(get_wellbeing_file_url(year), cache_ttl_hours=90*24)
>>> df = parse_loneliness(path)
>>> 'lonely_some_of_time' in df.columns
True
"""
logger.info(f"Parsing loneliness from {file_path}")
df_raw = pd.read_excel(
file_path,
sheet_name="Loneliness - some of the time",
header=None,
skiprows=4, # Skip to header row
nrows=12, # Time series rows
)
data = []
for _, row in df_raw.iterrows():
year_val = row.iloc[1]
estimate = row.iloc[2]
ci = row.iloc[3]
if isinstance(year_val, str) and "/" in year_val and pd.notna(estimate):
try:
data.append(
{
"year": year_val,
"lonely_some_of_time": float(estimate),
"confidence_interval": str(ci) if pd.notna(ci) else None,
}
)
except (ValueError, TypeError):
continue
df = pd.DataFrame(data)
df = df.sort_values("year").reset_index(drop=True)
logger.info(f"Parsed {len(df)} years of loneliness data")
return df
[docs]
def parse_self_efficacy(file_path: str | Path) -> pd.DataFrame:
"""Parse self-efficacy data from the Excel file.
Self-efficacy measures a person's belief in their capabilities to influence
events in their lives. Scores range from 5 to 25.
Args:
file_path: Path to the wellbeing data tables Excel file
Returns:
DataFrame with columns:
- year: str (financial year, e.g., "2024/25")
- self_efficacy_mean: float (mean score 5-25)
- confidence_interval: str (e.g., "+/- 0.1")
Example:
>>> _, year = get_latest_wellbeing_publication_url()
>>> path = download_file(get_wellbeing_file_url(year), cache_ttl_hours=90*24)
>>> df = parse_self_efficacy(path)
>>> 'self_efficacy_mean' in df.columns
True
"""
logger.info(f"Parsing self-efficacy from {file_path}")
df_raw = pd.read_excel(
file_path,
sheet_name="Self-efficacy_avg",
header=None,
skiprows=3, # Skip to header row
nrows=15, # Time series rows
)
data = []
for _, row in df_raw.iterrows():
year_val = row.iloc[1]
estimate = row.iloc[2]
ci = row.iloc[3]
if isinstance(year_val, str) and "/" in year_val and pd.notna(estimate):
try:
data.append(
{
"year": year_val,
"self_efficacy_mean": float(estimate),
"confidence_interval": str(ci) if pd.notna(ci) else None,
}
)
except (ValueError, TypeError):
continue
df = pd.DataFrame(data)
df = df.sort_values("year").reset_index(drop=True)
logger.info(f"Parsed {len(df)} years of self-efficacy data")
return df
[docs]
def get_latest_personal_wellbeing(force_refresh: bool = False) -> pd.DataFrame:
"""Get the latest personal wellbeing (ONS4) data.
Downloads and parses the latest Individual Wellbeing publication to extract
the four ONS personal wellbeing measures: Life Satisfaction, Worthwhile,
Happiness, and Anxiety.
Args:
force_refresh: Force re-download even if cached
Returns:
DataFrame with columns:
- year: str (financial year)
- life_satisfaction: float (mean 0-10, higher is better)
- worthwhile: float (mean 0-10, higher is better)
- happiness: float (mean 0-10, higher is better)
- anxiety: float (mean 0-10, lower is better)
Example:
>>> df = get_latest_personal_wellbeing()
>>> 'life_satisfaction' in df.columns
True
"""
_, year_str = get_latest_wellbeing_publication_url()
file_url = get_wellbeing_file_url(year_str)
# Cache for 90 days (quarterly publication)
file_path = download_file(file_url, cache_ttl_hours=90 * 24, force_refresh=force_refresh)
return parse_personal_wellbeing(file_path)
[docs]
def get_latest_loneliness(force_refresh: bool = False) -> pd.DataFrame:
"""Get the latest loneliness data.
Downloads and parses the latest Individual Wellbeing publication to extract
loneliness statistics (proportion feeling lonely at least some of the time).
Args:
force_refresh: Force re-download even if cached
Returns:
DataFrame with columns:
- year: str (financial year)
- lonely_some_of_time: float (proportion)
- confidence_interval: str
Example:
>>> df = get_latest_loneliness()
>>> 'lonely_some_of_time' in df.columns
True
"""
_, year_str = get_latest_wellbeing_publication_url()
file_url = get_wellbeing_file_url(year_str)
file_path = download_file(file_url, cache_ttl_hours=90 * 24, force_refresh=force_refresh)
return parse_loneliness(file_path)
[docs]
def get_latest_self_efficacy(force_refresh: bool = False) -> pd.DataFrame:
"""Get the latest self-efficacy data.
Downloads and parses the latest Individual Wellbeing publication to extract
self-efficacy statistics (mean scores 5-25).
Args:
force_refresh: Force re-download even if cached
Returns:
DataFrame with columns:
- year: str (financial year)
- self_efficacy_mean: float (mean 5-25)
- confidence_interval: str
Example:
>>> df = get_latest_self_efficacy()
>>> 'self_efficacy_mean' in df.columns
True
"""
_, year_str = get_latest_wellbeing_publication_url()
file_url = get_wellbeing_file_url(year_str)
file_path = download_file(file_url, cache_ttl_hours=90 * 24, force_refresh=force_refresh)
return parse_self_efficacy(file_path)
[docs]
def get_wellbeing_summary(force_refresh: bool = False) -> pd.DataFrame:
"""Get a summary of all wellbeing measures for the latest year.
Combines personal wellbeing (ONS4), loneliness, and self-efficacy data
into a single summary for the most recent year.
Args:
force_refresh: Force re-download even if cached
Returns:
DataFrame with one row containing:
- year: str
- life_satisfaction: float
- worthwhile: float
- happiness: float
- anxiety: float
- lonely_some_of_time: float
- self_efficacy_mean: float
Example:
>>> summary = get_wellbeing_summary()
>>> 'life_satisfaction' in summary.columns
True
"""
# Get all data
df_personal = get_latest_personal_wellbeing(force_refresh=force_refresh)
df_loneliness = get_latest_loneliness(force_refresh=False) # Already cached
df_efficacy = get_latest_self_efficacy(force_refresh=False) # Already cached
# Get the latest year from personal wellbeing
latest_year = df_personal["year"].iloc[-1]
# Build summary
summary = {"year": latest_year}
# Add personal wellbeing
latest_personal = df_personal[df_personal["year"] == latest_year].iloc[0]
for col in ["life_satisfaction", "worthwhile", "happiness", "anxiety"]:
if col in latest_personal:
summary[col] = latest_personal[col]
# Add loneliness
if latest_year in df_loneliness["year"].values:
latest_lonely = df_loneliness[df_loneliness["year"] == latest_year].iloc[0]
summary["lonely_some_of_time"] = latest_lonely["lonely_some_of_time"]
# Add self-efficacy
if latest_year in df_efficacy["year"].values:
latest_efficacy = df_efficacy[df_efficacy["year"] == latest_year].iloc[0]
summary["self_efficacy_mean"] = latest_efficacy["self_efficacy_mean"]
return pd.DataFrame([summary])
[docs]
def get_personal_wellbeing_by_year(df: pd.DataFrame, year: str) -> pd.DataFrame:
"""Filter personal wellbeing data for a specific year.
Args:
df: DataFrame from get_latest_personal_wellbeing()
year: Financial year string (e.g., "2024/25")
Returns:
DataFrame filtered to the specified year
Example:
>>> df = get_latest_personal_wellbeing()
>>> df_2024 = get_personal_wellbeing_by_year(df, "2024/25")
>>> 'life_satisfaction' in df_2024.columns
True
"""
return df[df["year"] == year].copy()
[docs]
def validate_personal_wellbeing(df: pd.DataFrame) -> bool: # pragma: no cover
"""Validate personal wellbeing data for consistency.
Checks that:
- All ONS4 measures are present
- Scores are within expected ranges
- No duplicate years
Args:
df: DataFrame from get_latest_personal_wellbeing()
Returns:
True if validation passes
Raises:
ValueError: If validation fails
"""
required_cols = {"year", "life_satisfaction", "worthwhile", "happiness", "anxiety"}
if not required_cols.issubset(df.columns):
raise ValueError(f"Missing columns: {required_cols - set(df.columns)}")
# Check score ranges (0-10 for ONS4 measures)
for col in ["life_satisfaction", "worthwhile", "happiness", "anxiety"]:
if col in df.columns and (df[col].min() < 0 or df[col].max() > 10):
raise ValueError(f"{col} scores outside valid range 0-10")
# Check for duplicates
if df["year"].duplicated().any():
raise ValueError("Duplicate years found")
return True