"""Utility function module for chart creation.
Author: Andrew Ridyard.
License: GNU General Public License v3 or later.
Copyright (C): 2025.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Functions:
add_colorbar: Add a colorbar to a figure, using the provided axis.
add_text: Create annotation text on an Axes.
assign_ring_wedge_columns: Assign ring & wedge columns to a DataFrame.
get_figure_dimensions: Calculate an optimal data clock figure size.
Constants:
VALID_STYLES: Valid font styles.
"""
import math
from collections import defaultdict
from typing import Optional, Sequence, Tuple, get_args
import numpy as np
from matplotlib import colormaps
from matplotlib.axes import Axes
from matplotlib.cm import ScalarMappable
from matplotlib.colorbar import Colorbar
from matplotlib.colors import Normalize
from matplotlib.figure import Figure
from matplotlib.text import Text
from numpy.typing import DTypeLike, NDArray
from pandas import DataFrame, MultiIndex
from pypalettes import load_cmap
from dataclocklib.exceptions import ModeError
from dataclocklib.typing import Aggregation, CmapNames, FontStyle, Mode
VALID_STYLES: Tuple[FontStyle, ...] = get_args(FontStyle)
[docs]
def add_colorbar(
ax: Axes,
fig: Figure,
cmap_name: str,
cmap_reverse: bool,
vmax: float,
dtype: DTypeLike = np.float64,
) -> Colorbar:
"""Add a colorbar to a figure, sharing the provided axis.
Args:
ax (Axes): Chart Axis.
fig (Figure): Chart Figure.
dtype (DTypeLike): Colourbar values dtype.
cmap_name (CmapNames): Name of matplotlib colormap.
vmax (float): maximum value of the colorbar.
dtype (DTypeLike): Data type for colorbar values.
Returns:
A Colorbar object with a cmap and normalised cmap.
"""
colorbar_ticks = np.linspace(1, vmax, 5, dtype=dtype)
cmap = load_cmap(cmap_name, cmap_type="continuous", reverse=cmap_reverse)
cmap.set_under("w")
cmap_norm = Normalize(1, vmax)
colorbar = fig.colorbar(
ScalarMappable(norm=cmap_norm, cmap=cmap),
ax=ax,
orientation="vertical",
location="right",
ticks=colorbar_ticks,
shrink=0.5,
extend="min",
use_gridspec=False,
)
colorbar.ax.tick_params(direction="out")
return colorbar
def add_wedge_labels(
ax: Axes,
font_scale_factor: float,
ring_scale_factor: float,
ring_text_spacing: float,
max_radius: int,
theta: NDArray,
width: float,
wedge_labels: Sequence[str],
) -> None:
"""Add scaled and rotated labels around each data clock wedge.
Labels are placed using Axes.text to facilitate custom rotation
of the text, which is based on the angle of the wedge being
annotated. The text is scaled based on the size of the chart
Figure and padded away from the polar axis based on the number
of rings in the chart.
Args:
ax (Axes): Chart Axis.
font_scale_factor (float): Scale factor based on current figure size.
ring_scale_factor (float): Scale factor based on number of rings.
ring_text_spacing (float): Text label distance from polar axis.
max_radius (int): Maximum radius (unique rings + 1).
theta (NDArray): Angles (radians) for each data clock wedge.
width (float): Width of each wedge (2 * Pi / number of wedges).
wedge_labels (Sequence[str]): Label text for each wedge.
Returns:
None
"""
if ring_scale_factor > 3:
ring_text_spacing = ring_text_spacing * (ring_scale_factor**0.61)
else:
ring_text_spacing = ring_text_spacing * ring_scale_factor
# place labels in the centre of each wedge
for idx, angle in enumerate(theta + width / 2):
# convert to degrees for text rotation
angle_deg = np.rad2deg(angle)
if (0 <= angle_deg < 90) or (270 <= angle_deg <= 360):
rotation = -angle_deg
else:
rotation = 180 - angle_deg
ax.text(
angle,
max_radius + ring_text_spacing,
wedge_labels[idx],
rotation=rotation,
rotation_mode="anchor",
transform=ax.transData,
family="sans-serif",
fontsize=11 * font_scale_factor,
weight="medium",
style="normal",
ha="center",
va="center",
)
[docs]
def add_text(
ax: Axes, x: float, y: float, text: Optional[str] = None, **kwargs
) -> Text:
"""Annotate a position on an axis denoted by xy with text.
Args:
ax (Axes): Axis to annotate.
x (int): Axis x position.
y (int): Axis y position.
text (str, optional): Text to annotate.
Returns:
Text object with annotation.
"""
s = "" if text is None else text
return ax.text(x, y, s, **kwargs)
def aggregate_temporal_columns(
data: DataFrame, agg_column: str, agg: Aggregation, mode: Mode
) -> DataFrame:
"""Aggregate values in agg_column using pass aggregate function.
Groups the DataFrame by the temporal 'ring' and 'wedge' columns,
before applying the aggregate function to the chosen aggregation
column.
NOTE: The 'ring' & 'wedge' columns are assigned by the utility function
assign_temporal_columns.
Args:
data (DataFrame): DataFrame containing data to aggregate.
agg_column (str): DataFrame Column to aggregate.
agg (Aggregation): Aggregation function; 'count', 'mean', 'median',
'mode' & 'sum'.
mode (Mode): A mode key representing the temporal bins used in the
chart; 'YEAR_MONTH', 'YEAR_WEEK', 'WEEK_DAY', 'DOW_HOUR' &
'DAY_HOUR'.
Raises:
ModeError: Unexpected mode value is passed.
ValueError: Missing 'ring' & 'wedge' columns.
Returns:
A DataFrame with aggregate values in a new column named after the
aggregate function.
"""
columns = ["ring", "wedge"]
if not set(columns).issubset(data.columns):
raise ValueError(f"Expected DataFrame columns: {columns}")
unique_rings = data["ring"].unique()
match mode:
case "YEAR_MONTH":
unique_wedges = tuple(range(1, 13))
case "YEAR_WEEK":
unique_wedges = range(1, 53)
case "WEEK_DAY":
unique_wedges = range(0, 7)
case "DOW_HOUR":
unique_rings = range(0, 7)
unique_wedges = range(0, 24)
case "DAY_HOUR":
unique_wedges = range(0, 24)
case _:
raise ModeError(mode, get_args(Mode))
# groupby 'ring' & 'wedge' values and apply aggregate function agg
data_agg = data.groupby(columns, as_index=False)[agg_column].agg(agg)
data_agg = data_agg.set_axis([*columns, agg], axis="columns")
# index with all possible combinations of ring & wedge values
product_idx = MultiIndex.from_product(
[unique_rings, unique_wedges], names=columns
)
# populate any rows for missing ring/wedge combinations
data_agg = data_agg.set_index(columns).reindex(product_idx).reset_index()
# replace NaN values created for missing missing ring/wedge combinations
return data_agg.fillna(0)
def assign_temporal_columns(
data: DataFrame, date_column: str, mode: Mode
) -> DataFrame:
"""Assign ring & wedge columns to a DataFrame based on mode.
The mode value is mapped to a predetermined division of a larger unit of
time into rings, which are then subdivided by a smaller unit of time into
wedges, creating a set of temporal bins. These bins are assigned as 'ring'
and 'wedge' columns.
Args:
data (DataFrame): DataFrame containing data to visualise.
date_column (str): Name of DataFrame datetime64 column.
mode (Mode, optional): A mode key representing the
temporal bins used in the chart; 'YEAR_MONTH',
'YEAR_WEEK', 'WEEK_DAY', 'DOW_HOUR' & 'DAY_HOUR'.
Returns:
A DataFrame with 'ring' & 'wedge' columns assigned.
"""
# dict map for ring & wedge features based on mode
mode_map = defaultdict(dict)
# year | January - December
if mode == "YEAR_MONTH":
mode_map[mode]["ring"] = data[date_column].dt.year
mode_map[mode]["wedge"] = data[date_column].dt.month
# year | weeks 1 - 52
if mode == "YEAR_WEEK":
mode_map[mode]["ring"] = data[date_column].dt.year
week = data[date_column].dt.isocalendar().week
week[week == 53] = 52
mode_map[mode]["wedge"] = week
# weeks 1 - 52 | Monday - Sunday
if mode == "WEEK_DAY":
week = data[date_column].dt.isocalendar().week
year = data[date_column].dt.year
mode_map[mode]["ring"] = week + year * 100
mode_map[mode]["wedge"] = data[date_column].dt.day_of_week
# days 1 - 7 (Monday - Sunday) | 00:00 - 23:00
if mode == "DOW_HOUR":
mode_map[mode]["ring"] = data[date_column].dt.day_of_week
mode_map[mode]["wedge"] = data[date_column].dt.hour
# days 1 - 365 | 00:00 - 23:00
if mode == "DAY_HOUR":
mode_map[mode]["ring"] = data[date_column].dt.strftime("%Y%j")
mode_map[mode]["wedge"] = data[date_column].dt.hour
return data.assign(**mode_map[mode]).astype({"ring": "int64"})
def get_figure_dimensions(wedges: int) -> tuple[float, float]:
"""Calculate an optimal data clock figure size based on wedge count.
For most data clock charts, a minimum of 0.70 inches of figure space per
wedge appears to work best. The best figure shape for this type of chart
is square, given the circular nature of the chart.
NOTE: The minimum figure size is capped at (10.0, 10.0).
Example:
>>> calculate_figure_dimensions(168)
(11, 11)
Args:
wedges: Number of wedges (number of rings * wedges per ring).
Returns:
A tuple containing the height & width of the square figure in inches.
"""
space_needed = wedges * 0.70
figure_size = float(max(math.ceil(math.sqrt(space_needed)), 10))
return figure_size, figure_size