Source code for utility

"""Utility function module for chart creation.

Author: Andrew Ridyard.

License: GNU General Public License v3 or later.

Copyright (C): 2025.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.

Functions:
    add_colorbar: Add a colorbar to a figure, using the provided axis.
    add_text: Create annotation text on an Axes.
    assign_ring_wedge_columns: Assign ring & wedge columns to a DataFrame.
    get_figure_dimensions: Calculate an optimal data clock figure size.

Constants:
    VALID_STYLES: Valid font styles.
"""

import math
from collections import defaultdict
from typing import Optional, Sequence, Tuple, get_args

import numpy as np
from matplotlib import colormaps
from matplotlib.axes import Axes
from matplotlib.cm import ScalarMappable
from matplotlib.colorbar import Colorbar
from matplotlib.colors import Normalize
from matplotlib.figure import Figure
from matplotlib.text import Text
from numpy.typing import DTypeLike, NDArray
from pandas import DataFrame, MultiIndex
from pypalettes import load_cmap

from dataclocklib.exceptions import ModeError
from dataclocklib.typing import Aggregation, CmapNames, FontStyle, Mode

VALID_STYLES: Tuple[FontStyle, ...] = get_args(FontStyle)



[docs]
def add_colorbar(
    ax: Axes,
    fig: Figure,
    cmap_name: str,
    cmap_reverse: bool,
    vmax: float,
    dtype: DTypeLike = np.float64,
) -> Colorbar:
    """Add a colorbar to a figure, sharing the provided axis.

    Args:
        ax (Axes): Chart Axis.
        fig (Figure): Chart Figure.
        dtype (DTypeLike): Colourbar values dtype.
        cmap_name (CmapNames): Name of matplotlib colormap.
        vmax (float): maximum value of the colorbar.
        dtype (DTypeLike): Data type for colorbar values.

    Returns:
        A Colorbar object with a cmap and normalised cmap.
    """
    colorbar_ticks = np.linspace(1, vmax, 5, dtype=dtype)

    cmap = load_cmap(cmap_name, cmap_type="continuous", reverse=cmap_reverse)
    cmap.set_under("w")
    cmap_norm = Normalize(1, vmax)

    colorbar = fig.colorbar(
        ScalarMappable(norm=cmap_norm, cmap=cmap),
        ax=ax,
        orientation="vertical",
        location="right",
        ticks=colorbar_ticks,
        shrink=0.5,
        extend="min",
        use_gridspec=False,
    )

    colorbar.ax.tick_params(direction="out")
    return colorbar



def add_wedge_labels(
    ax: Axes,
    font_scale_factor: float,
    ring_scale_factor: float,
    ring_text_spacing: float,
    max_radius: int,
    theta: NDArray,
    width: float,
    wedge_labels: Sequence[str],
) -> None:
    """Add scaled and rotated labels around each data clock wedge.

    Labels are placed using Axes.text to facilitate custom rotation
    of the text, which is based on the angle of the wedge being
    annotated. The text is scaled based on the size of the chart
    Figure and padded away from the polar axis based on the number
    of rings in the chart.

    Args:
        ax (Axes): Chart Axis.
        font_scale_factor (float): Scale factor based on current figure size.
        ring_scale_factor (float): Scale factor based on number of rings.
        ring_text_spacing (float): Text label distance from polar axis.
        max_radius (int): Maximum radius (unique rings + 1).
        theta (NDArray): Angles (radians) for each data clock wedge.
        width (float): Width of each wedge (2 * Pi / number of wedges).
        wedge_labels (Sequence[str]): Label text for each wedge.

        Returns:
            None
    """
    if ring_scale_factor > 3:
        ring_text_spacing = ring_text_spacing * (ring_scale_factor**0.61)
    else:
        ring_text_spacing = ring_text_spacing * ring_scale_factor

    # place labels in the centre of each wedge
    for idx, angle in enumerate(theta + width / 2):
        # convert to degrees for text rotation
        angle_deg = np.rad2deg(angle)

        if (0 <= angle_deg < 90) or (270 <= angle_deg <= 360):
            rotation = -angle_deg
        else:
            rotation = 180 - angle_deg

        ax.text(
            angle,
            max_radius + ring_text_spacing,
            wedge_labels[idx],
            rotation=rotation,
            rotation_mode="anchor",
            transform=ax.transData,
            family="sans-serif",
            fontsize=11 * font_scale_factor,
            weight="medium",
            style="normal",
            ha="center",
            va="center",
        )



[docs]
def add_text(
    ax: Axes, x: float, y: float, text: Optional[str] = None, **kwargs
) -> Text:
    """Annotate a position on an axis denoted by xy with text.

    Args:
        ax (Axes): Axis to annotate.
        x (int): Axis x position.
        y (int): Axis y position.
        text (str, optional): Text to annotate.

    Returns:
        Text object with annotation.
    """
    s = "" if text is None else text
    return ax.text(x, y, s, **kwargs)



def aggregate_temporal_columns(
    data: DataFrame, agg_column: str, agg: Aggregation, mode: Mode
) -> DataFrame:
    """Aggregate values in agg_column using pass aggregate function.

    Groups the DataFrame by the temporal 'ring' and 'wedge' columns,
    before applying the aggregate function to the chosen aggregation
    column.

    NOTE: The 'ring' & 'wedge' columns are assigned by the utility function
    assign_temporal_columns.

    Args:
        data (DataFrame): DataFrame containing data to aggregate.
        agg_column (str): DataFrame Column to aggregate.
        agg (Aggregation): Aggregation function; 'count', 'mean', 'median',
            'mode' & 'sum'.
        mode (Mode): A mode key representing the temporal bins used in the
            chart; 'YEAR_MONTH', 'YEAR_WEEK', 'WEEK_DAY', 'DOW_HOUR' &
            'DAY_HOUR'.

    Raises:
        ModeError: Unexpected mode value is passed.
        ValueError: Missing 'ring' & 'wedge' columns.

    Returns:
        A DataFrame with aggregate values in a new column named after the
        aggregate function.
    """
    columns = ["ring", "wedge"]
    if not set(columns).issubset(data.columns):
        raise ValueError(f"Expected DataFrame columns: {columns}")

    unique_rings = data["ring"].unique()
    match mode:
        case "YEAR_MONTH":
            unique_wedges = tuple(range(1, 13))
        case "YEAR_WEEK":
            unique_wedges = range(1, 53)
        case "WEEK_DAY":
            unique_wedges = range(0, 7)
        case "DOW_HOUR":
            unique_rings = range(0, 7)
            unique_wedges = range(0, 24)
        case "DAY_HOUR":
            unique_wedges = range(0, 24)
        case _:
            raise ModeError(mode, get_args(Mode))

    # groupby 'ring' & 'wedge' values and apply aggregate function agg
    data_agg = data.groupby(columns, as_index=False)[agg_column].agg(agg)
    data_agg = data_agg.set_axis([*columns, agg], axis="columns")

    # index with all possible combinations of ring & wedge values
    product_idx = MultiIndex.from_product(
        [unique_rings, unique_wedges], names=columns
    )

    # populate any rows for missing ring/wedge combinations
    data_agg = data_agg.set_index(columns).reindex(product_idx).reset_index()

    # replace NaN values created for missing missing ring/wedge combinations
    return data_agg.fillna(0)


def assign_temporal_columns(
    data: DataFrame, date_column: str, mode: Mode
) -> DataFrame:
    """Assign ring & wedge columns to a DataFrame based on mode.

    The mode value is mapped to a predetermined division of a larger unit of
    time into rings, which are then subdivided by a smaller unit of time into
    wedges, creating a set of temporal bins. These bins are assigned as 'ring'
    and 'wedge' columns.

    Args:
        data (DataFrame): DataFrame containing data to visualise.
        date_column (str): Name of DataFrame datetime64 column.
        mode (Mode, optional): A mode key representing the
            temporal bins used in the chart; 'YEAR_MONTH',
            'YEAR_WEEK', 'WEEK_DAY', 'DOW_HOUR' & 'DAY_HOUR'.

    Returns:
        A DataFrame with 'ring' & 'wedge' columns assigned.
    """
    # dict map for ring & wedge features based on mode
    mode_map = defaultdict(dict)
    # year | January - December
    if mode == "YEAR_MONTH":
        mode_map[mode]["ring"] = data[date_column].dt.year
        mode_map[mode]["wedge"] = data[date_column].dt.month
    # year | weeks 1 - 52
    if mode == "YEAR_WEEK":
        mode_map[mode]["ring"] = data[date_column].dt.year
        week = data[date_column].dt.isocalendar().week
        week[week == 53] = 52
        mode_map[mode]["wedge"] = week
    # weeks 1 - 52 | Monday - Sunday
    if mode == "WEEK_DAY":
        week = data[date_column].dt.isocalendar().week
        year = data[date_column].dt.year
        mode_map[mode]["ring"] = week + year * 100
        mode_map[mode]["wedge"] = data[date_column].dt.day_of_week
    # days 1 - 7 (Monday - Sunday) | 00:00 - 23:00
    if mode == "DOW_HOUR":
        mode_map[mode]["ring"] = data[date_column].dt.day_of_week
        mode_map[mode]["wedge"] = data[date_column].dt.hour
    # days 1 - 365 | 00:00 - 23:00
    if mode == "DAY_HOUR":
        mode_map[mode]["ring"] = data[date_column].dt.strftime("%Y%j")
        mode_map[mode]["wedge"] = data[date_column].dt.hour

    return data.assign(**mode_map[mode]).astype({"ring": "int64"})


def get_figure_dimensions(wedges: int) -> tuple[float, float]:
    """Calculate an optimal data clock figure size based on wedge count.

    For most data clock charts, a minimum of 0.70 inches of figure space per
    wedge appears to work best. The best figure shape for this type of chart
    is square, given the circular nature of the chart.

    NOTE: The minimum figure size is capped at (10.0, 10.0).

    Example:
      >>> calculate_figure_dimensions(168)
      (11, 11)

    Args:
      wedges: Number of wedges (number of rings * wedges per ring).

    Returns:
      A tuple containing the height & width of the square figure in inches.
    """
    space_needed = wedges * 0.70
    figure_size = float(max(math.ceil(math.sqrt(space_needed)), 10))
    return figure_size, figure_size