%pip install -U dataclocklib
import calendar

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from dataclocklib.charts import dataclock
from matplotlib import rcParams
from matplotlib.ticker import MaxNLocator

Tutorial

This guide will carry out basic exploratory data analysis (EDA) of road safety data for 2019 - 2023. This data details the circumstances of personal injury road collisions in Great Britain for the last 5 years.

NOTE: This tutorial is a work in progress and will include other EDA techniques in an attempt to better demonstrate how dataclocklib can be incorporated into the visualisation of temporal patterns and an EDA workflow.

Data information

  • Published by: Department for Transport

  • Last updated: 28 November 2024

  • Topic: Transport

  • Licence: Open Government Licence

The statistics relate only to personal injury collisions on public roads that are reported to the police, and subsequently recorded, using the STATS19 collision reporting form.

All the data variables are coded rather than containing textual strings. The lookup tables are available in the road Safety Open Data Guide - 2024

raw_data = pd.read_csv(
    "https://data.dft.gov.uk/road-accidents-safety-data/dft-road-casualty-statistics-collision-last-5-years.csv",
    dtype={"accident_index": "string", "accident_reference": "string"}
)
raw_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 520084 entries, 0 to 520083
Data columns (total 37 columns):
 #   Column                                       Non-Null Count   Dtype  
---  ------                                       --------------   -----  
 0   accident_index                               520084 non-null  string 
 1   accident_year                                520084 non-null  int64  
 2   accident_reference                           520084 non-null  string 
 3   location_easting_osgr                        519991 non-null  float64
 4   location_northing_osgr                       519991 non-null  float64
 5   longitude                                    519991 non-null  float64
 6   latitude                                     519991 non-null  float64
 7   police_force                                 520084 non-null  int64  
 8   accident_severity                            520084 non-null  int64  
 9   number_of_vehicles                           520084 non-null  int64  
 10  number_of_casualties                         520084 non-null  int64  
 11  date                                         520084 non-null  object 
 12  day_of_week                                  520084 non-null  int64  
 13  time                                         520084 non-null  object 
 14  local_authority_district                     520084 non-null  int64  
 15  local_authority_ons_district                 520084 non-null  object 
 16  local_authority_highway                      520084 non-null  object 
 17  first_road_class                             520084 non-null  int64  
 18  first_road_number                            520084 non-null  int64  
 19  road_type                                    520084 non-null  int64  
 20  speed_limit                                  520084 non-null  int64  
 21  junction_detail                              520084 non-null  int64  
 22  junction_control                             520084 non-null  int64  
 23  second_road_class                            520084 non-null  int64  
 24  second_road_number                           520084 non-null  int64  
 25  pedestrian_crossing_human_control            520084 non-null  int64  
 26  pedestrian_crossing_physical_facilities      520084 non-null  int64  
 27  light_conditions                             520084 non-null  int64  
 28  weather_conditions                           520084 non-null  int64  
 29  road_surface_conditions                      520084 non-null  int64  
 30  special_conditions_at_site                   520084 non-null  int64  
 31  carriageway_hazards                          520084 non-null  int64  
 32  urban_or_rural_area                          520084 non-null  int64  
 33  did_police_officer_attend_scene_of_accident  520084 non-null  int64  
 34  trunk_road_flag                              520084 non-null  int64  
 35  lsoa_of_accident_location                    520084 non-null  object 
 36  enhanced_severity_collision                  520084 non-null  int64  
dtypes: float64(4), int64(26), object(5), string(2)
memory usage: 146.8+ MB

Data Cleaning & Preprocessing

Below we assign appropriate data types and map a selection of coded values to their associated textual strings for further analysis.

data = raw_data.copy()

datetime_str = data["date"].str.cat(data["time"], sep=" ")
data["date_time"] = pd.to_datetime(datetime_str, yearfirst=False, dayfirst=True)

severity_map = dict(enumerate(["Fatal", "Serious", "Slight"], start=1))
data["accident_severity_str"] = data["accident_severity"].map(severity_map)

enhanced_severity_map = dict(
    enumerate(
        [
            "Fatal",
            "Very Serious",
            "Moderately Serious",
            "Less Serious",
            "Slight",
        ],
        start=1
    )
)

data["enhanced_severity_collision_str"] = data["enhanced_severity_collision"].map(enhanced_severity_map)

light_conditions_map = dict(
    zip(
        [1, 4, 5, 7, -1],
        [
            "Daylight",
            "Darkness (lights lit)",
            "Darkness (lights unlit)",
            "Darkness (no lighting)",
            "Darkness (lighting unknown)"
        ]
    )
)

data["light_conditions_str"] = data["light_conditions"].map(light_conditions_map)

road_surface_map = dict(
    enumerate(
        [
            "Dry",
            "Wet/damp",
            "Snow",
            "Frost/ice",
            "Flood (3cm+ deep)",
            "Oil/diesel",
            "Mud",
        ],
        start=1
    )
)

data["road_surface_conditions_str"] = data["road_surface_conditions"].map(road_surface_map)

weather_map = dict(
    enumerate(
        [
            "Fine (no high winds)",
            "Raining (no high winds)",
            "Snowing (no high winds)",
            "Fine (high winds)",
            "Raining (high winds)",
            "Snowing (high winds)",
            "Fog/mist",
            "Other",
            "Unknown",
        ],
        start=1
    )
)

data["weather_conditions_str"] = data["weather_conditions"].map(road_surface_map)

columns = {
    "date_time": "datetime64[ns]",
    "date": "string",
    "latitude": "float64",
    "longitude": "float64",
    "accident_index": "string",
    "number_of_casualties": "int32",
    "number_of_vehicles": "int32",
    "accident_severity": "int16",
    "accident_severity_str": "string",
    "enhanced_severity_collision": "int16",
    "enhanced_severity_collision_str": "string",
    "light_conditions": "int16",
    "light_conditions_str": "string",
    "road_surface_conditions": "int16",
    "road_surface_conditions_str": "string",
    "weather_conditions": "int16",
    "weather_conditions_str": "string",
}

data = data.reindex(columns=columns).astype(columns)

Here we check the completeness of the data, looking for any missing days. It is worth noting that 2020 was a leap year, giving 366 days.

(
    data
    # datetime index allows grouping by temporal frequency
    .set_index("date_time")
    .groupby(pd.Grouper(freq="YE"))
    ["date"]
    .nunique()
)
date_time
2019-12-31    365
2020-12-31    366
2021-12-31    365
2022-12-31    365
2023-12-31    365
Freq: YE-DEC, Name: date, dtype: int64

Data Aggregation & Summary

Next, we will very briefly summarise and aggregate the data to look at the volume of casualties and accidents over the full 5 years, including the split across the accident severity categories.

‘Fatal’ accidents consistently show the highest standard deviation (from around 1.13 to 1.40), followed by ‘Serious’ accidents (0.79 to 0.97), and then ‘Slight’ accidents (0.62 to 0.68). This indicates that fatal accidents have the most variability in the number of casualties per accident, and accidents categorised as slight, are more consistent in their casualty counts.

Fatal accidents

The high standard deviations combined with means around 1.65-1.75 highlight a considerable variation in the number of casualties for fatal accidents. Some involve single casualties, while others involve multiple, creating a wider spread. In 2023, we see the highest mean (1.75) and standard deviation (1.37), suggesting more multiple-casualty fatal accidents that year.

Serious accidents

For this category, the values suggest more consistency in the number of casualties per accident. The variation is not as extreme as with the fatal accidents and the pattern has remained mostly consistent across the years.

Slight accidents

The data indicates that slight accidents tend to be more consistent in their casualty counts. Most accidents of this nature involve one or two casualties, with less variation than the other severity categories.

summary_data = (
    data
    .set_index("date_time")
    .filter(
        [
            "accident_index",
            "accident_severity_str",
            "number_of_casualties"
        ]
    )
    .groupby([pd.Grouper(freq="YE"), "accident_severity_str"])
    .agg(
        accidents=pd.NamedAgg(column="accident_index", aggfunc="count"),
        casualties=pd.NamedAgg(column="number_of_casualties", aggfunc="sum"),
        casualties_mean=pd.NamedAgg(column="number_of_casualties", aggfunc="mean"),
        casualties_median=pd.NamedAgg(column="number_of_casualties", aggfunc="median"),
        casualties_std=pd.NamedAgg(column="number_of_casualties", aggfunc="std"),
    )
)

summary_data
accidents casualties casualties_mean casualties_median casualties_std
date_time accident_severity_str
2019-12-31 Fatal 1658 2842 1.714113 1.0 1.396281
Serious 22980 31771 1.382550 1.0 0.943921
Slight 92898 118545 1.276077 1.0 0.677882
2020-12-31 Fatal 1391 2175 1.563623 1.0 1.126566
Serious 18355 24495 1.334514 1.0 0.836148
Slight 71453 88914 1.244370 1.0 0.620540
2021-12-31 Fatal 1474 2390 1.621438 1.0 1.204417
Serious 21284 28540 1.340913 1.0 0.792897
Slight 78329 97279 1.241928 1.0 0.620187
2022-12-31 Fatal 1602 2656 1.657928 1.0 1.185627
Serious 23333 31948 1.369220 1.0 0.864221
Slight 81069 100876 1.244323 1.0 0.625025
2023-12-31 Fatal 1522 2661 1.748357 1.0 1.371884
Serious 23438 31976 1.364280 1.0 0.974518
Slight 79298 98340 1.240132 1.0 0.624091

This table is used to annotate bar charts with percentage labels.

def severity_percentage(data: pd.DataFrame) -> pd.DataFrame:
    """Calculate the percentage of accident and casualty totals per year,
    by accident severity category.

    Args:
        data (DataFrame): DataFrame to aggregate.

    Returns:
        DataFrame with 'accidents' & 'casualties' feature values calculated
        as a percentage.
    """
    return data.assign(
        accidents=lambda x: x["accidents"] / x["accidents"].sum() * 100,
        casualties=lambda x: x["casualties"] / x["casualties"].sum() * 100,
    )

summary_data_percent = (
    summary_data
    .filter(["accidents", "casualties"])
    .reset_index(level="accident_severity_str")
    .groupby(pd.Grouper(freq="YE"), as_index=False)
    .apply(severity_percentage)
    .round(2)
    .reset_index(level="date_time")
    .set_index(["date_time", "accident_severity_str"])
)

summary_data_percent
accidents casualties
date_time accident_severity_str
2019-12-31 Fatal 1.41 1.86
Serious 19.55 20.74
Slight 79.04 77.40
2020-12-31 Fatal 1.53 1.88
Serious 20.13 21.19
Slight 78.35 76.93
2021-12-31 Fatal 1.46 1.86
Serious 21.06 22.26
Slight 77.49 75.88
2022-12-31 Fatal 1.51 1.96
Serious 22.01 23.58
Slight 76.48 74.46
2023-12-31 Fatal 1.46 2.00
Serious 22.48 24.05
Slight 76.06 73.95

Traffic Accident Count by Severity

Here we plot the summarised data for accidents categorised by accident severity. The Graph below shows the the number of traffic accidents by severity from 2019 - 2023. For each year, under 2% of casualties were fatal and the majority were categorised as ‘Slight’.

fig, ax = plt.subplots(figsize=(13.33, 7.5), dpi=96)

# adjust subplots for custom title, subtitle and source text
plt.subplots_adjust(
    left=None, bottom=0.2, right=None, top=0.85, wspace=None, hspace=None
)

# Create the grid
ax.grid(which="major", axis='x', color='#DAD8D7', alpha=0.5, zorder=1)
ax.grid(which="major", axis='y', color='#DAD8D7', alpha=0.5, zorder=1)

ax.spines[["top", "right", "bottom"]].set_visible(False)
ax.spines['left'].set_linewidth(1.1)

# set white figure background
fig.patch.set_facecolor("w")

index_values = summary_data.index.get_level_values("date_time")
bar_x_positions = np.arange(index_values.nunique()) - 0.3
bar_x_labels = index_values.year.unique()
bar_width = 0.3

ax.set_ylabel("Cumulative Number of Accidents", fontsize=12, labelpad=10)
ax.yaxis.set_label_position("left")
ax.yaxis.set_major_formatter(lambda s, i : f"{s:,.0f}")
ax.yaxis.set_tick_params(
    pad=2, labeltop=False, labelbottom=True, bottom=False, labelsize=12
)

ax.set_xlabel("", fontsize=12, labelpad=10)
ax.xaxis.set_label_position("bottom")
ax.xaxis.set_tick_params(pad=2, labelbottom=True, bottom=True, labelsize=12)
ax.xaxis.set_ticks(bar_x_positions + 0.3, bar_x_labels, rotation=45, ha="right")

cmap = plt.get_cmap("Dark2")

category_values = summary_data.index.get_level_values("accident_severity_str").unique()
for i, category in enumerate(category_values):
    bar_data = summary_data.loc[(slice(None), category), :]
    bar = ax.bar(
        bar_x_positions + (i*bar_width),
        bar_data["accidents"],
        width=bar_width,
        zorder=2,
        color=cmap(i),
        label=category
    )

    bar_data_pct = summary_data_percent.loc[(slice(None), category), :]
    ax.bar_label(
        bar,
        labels=map(lambda x: f"{x:.2f}%", bar_data_pct["accidents"]),
        padding=6,
        color="black",
        fontsize=10,
    )

ax.legend(loc="best", fontsize=12)

ax.text(
    x=0.05,
    y=.93,
    s="UK Traffic Accidents by Severity 2019 - 2023",
    transform=fig.transFigure,
    ha="left",
    fontsize=14,
    weight="bold",
    alpha=.8
)

ax.text(
    x=0.05,
    y=.90,
    s="Sum by year",
    transform=fig.transFigure,
    ha="left",
    fontsize=12,
    alpha=.8
)

plt.show()
_images/7c19e7bb657a110479472c2343ac6227dba762ba098333e86a1b7a12b0988f6c.png

Traffic Accident Casualty Numbers by Severity

In the graph below we plot the summarised data for casualties categorised by accident severity. The total number of casualties in ‘Slight’ accidents is the highest for each year, followed by ‘Serious’ and ‘Fatal’.

Looking back at the ‘summary_data’ DataFrame, it is interesting to note that the average number of casualties in ‘Fatal’ accidents is higher than the average number of casualties in ‘Slight’ and ‘Serious’ accidents. More severe accidents not only tend to have more casualties on average but also show more variability in the number of casualties involved.

fig, ax = plt.subplots(figsize=(13.33, 7.5), dpi=96)

# adjust subplots for custom title, subtitle and source text
plt.subplots_adjust(
    left=None, bottom=0.2, right=None, top=0.85, wspace=None, hspace=None
)

# Create the grid
ax.grid(which="major", axis='x', color='#DAD8D7', alpha=0.5, zorder=1)
ax.grid(which="major", axis='y', color='#DAD8D7', alpha=0.5, zorder=1)

ax.spines[["top", "right", "bottom"]].set_visible(False)
ax.spines['left'].set_linewidth(1.1)

# set white figure background
fig.patch.set_facecolor("w")

index_values = summary_data.index.get_level_values("date_time")
bar_x_positions = np.arange(index_values.nunique()) - 0.3
bar_x_labels = index_values.year.unique()
bar_width = 0.3

ax.set_ylabel("Number of Casualties", fontsize=12, labelpad=10)
ax.yaxis.set_label_position("left")
ax.yaxis.set_major_formatter(lambda s, i : f"{s:,.0f}")
ax.yaxis.set_tick_params(
    pad=2, labeltop=False, labelbottom=True, bottom=False, labelsize=12
)

ax.set_xlabel("", fontsize=12, labelpad=10)
ax.xaxis.set_label_position("bottom")
ax.xaxis.set_tick_params(pad=2, labelbottom=True, bottom=True, labelsize=12)
ax.xaxis.set_ticks(bar_x_positions + 0.3, bar_x_labels, rotation=45, ha="right")

cmap = plt.get_cmap("Dark2")

category_values = summary_data.index.get_level_values("accident_severity_str").unique()
for i, category in enumerate(category_values):
    bar_data = summary_data.loc[(slice(None), category), :]
    bar = ax.bar(
        bar_x_positions + (i*bar_width),
        bar_data["casualties"],
        width=bar_width,
        zorder=2,
        color=cmap(i),
        label=category
    )

    bar_data_pct = summary_data_percent.loc[(slice(None), category), :]
    ax.bar_label(
        bar,
        labels=map(lambda x: f"{x:.2f}%", bar_data_pct["casualties"]),
        padding=5,
        color="black",
        fontsize=10,
    )

ax.legend(loc="best", fontsize=12)

ax.text(
    x=0.05,
    y=.93,
    s="UK Traffic Accident Casualties by Severity 2019 - 2023",
    transform=fig.transFigure,
    ha="left",
    fontsize=14,
    weight="bold",
    alpha=.8
)

ax.text(
    x=0.05,
    y=.90,
    s="Sum by year",
    transform=fig.transFigure,
    ha="left",
    fontsize=12,
    alpha=.8
)

plt.show()
_images/26b0ce59405285450602121f8a9338df44dcbdbca3050ba4e99469e8479b2b67.png

Temporal Factor Identification Using Dataclocklib

With this dataset, we might want to explore the data using different timescales. Below, we will summarise and aggregate the data by day of week and hour of day.

One assumption might be that the frequency of accidents is related to the time of day and that there are distinct trends for weekdays and weekends.

We might also want to investigate what contributes to multiple-casualty accidents, which are influencing the differences between the mean and median values highlighted in the ‘summary_data’ DataFrame.

dow_labels = tuple(calendar.day_name)

summary_data_dow = (
    data
    .set_index("date_time")
    .filter(["number_of_casualties"])
    .resample("1h")
    .agg(sum=pd.NamedAgg(column="number_of_casualties", aggfunc="sum"))
    .reset_index()
    .assign(day_of_week=lambda x: x["date_time"].dt.day_of_week)
    .assign(hour=lambda x: x["date_time"].dt.hour)
    .groupby(["day_of_week", "hour"])
    .agg(sum=pd.NamedAgg(column="sum", aggfunc="sum"))
    .reset_index(level="hour")
)

summary_data_dow
hour sum
day_of_week
0 0 1108
0 1 761
0 2 545
0 3 472
0 4 438
... ... ...
6 19 4291
6 20 3522
6 21 2914
6 22 2421
6 23 1740

168 rows × 2 columns

Total Accidents by Hour & Day of Week Chart

Here we create a basic line chart depicting hourly trends for each day of the week.

On the weekdays Monday to Thursday, the trend looks fairly consistent. We see a rise in accidents from 06:00 with a peak around 08:00, during the early morning commute.

Accidents fall by late morning, with another rise from 14:00 to 17:00, which is in line with the late afternoon commute. Friday morning starts off the same as the other weekdays, but around mid-morning, we see a notable increase in accident frequency, in comparison to the other weekdays.

rcParams["font.family"] = "DejaVu Sans"

# create figure with polar projection
fig, ax = plt.subplots(figsize=(13.33, 7.5), dpi=96)

# adjust subplots for custom title, subtitle and source text
plt.subplots_adjust(
    left=None, bottom=0.2, right=None, top=0.85, wspace=None, hspace=None
)

# set white figure background
fig.patch.set_facecolor("w")

# Create the grid
ax.grid(which="major", axis='x', color='#DAD8D7', alpha=0.5, zorder=1)
ax.grid(which="major", axis='y', color='#DAD8D7', alpha=0.5, zorder=1)

ax.spines[["top", "right", "bottom"]].set_visible(False)
ax.spines['left'].set_linewidth(1.1)

ax.xaxis.set_label_position("bottom")
ax.xaxis.set_tick_params(pad=2, labelbottom=True, bottom=True, labelsize=12)
ax.xaxis.set_major_formatter(lambda s, i : f"{int(s):02d}:00")
ax.xaxis.set_major_locator(MaxNLocator(23, integer=True))
ax.set_xlabel("Hour of Day", fontsize=12, labelpad=10)

ax.set_ylabel("Cumulative Number of Accidents", fontsize=12, labelpad=10)
ax.yaxis.set_label_position("left")
ax.yaxis.set_major_formatter(lambda s, i : f'{s:,.0f}')
ax.yaxis.set_tick_params(
    pad=2, labeltop=False, labelbottom=True, bottom=False, labelsize=12
)

cmap = plt.get_cmap("tab10")

for i in range(7):
    line_data = summary_data_dow.loc[i]
    line = ax.plot(
        line_data["hour"],
        line_data["sum"],
        color=cmap(i),
        label=dow_labels[i],
        zorder=2
    )

    ax.plot(
        line_data["hour"].iloc[-1],
        line_data["sum"].iloc[-1],
        "o",
        color=cmap(i),
        markersize=10,
        alpha=0.3
    )

    ax.plot(
        line_data["hour"].iloc[-1],
        line_data["sum"].iloc[-1],
        "o",
        color=cmap(i),
        markersize=5
    )

# Add legend
ax.legend(loc="upper left", fontsize=12)

ax.text(
    x=0.05,
    y=.93,
    s="UK Traffic Accidents 2019 - 2023",
    transform=fig.transFigure,
    ha="left",
    fontsize=14,
    weight="bold",
    alpha=.8
)

ax.text(
    x=0.05,
    y=.90,
    s="Sum by day of week & hour of day",
    transform=fig.transFigure,
    ha="left",
    fontsize=12,
    alpha=.8
)

plt.show()
_images/f61f0af0129d85eb3caee31d0096f4357570ebeb8bb381fb846692f45dc6df46.png

Dataclocklib Day of Week by Hour Chart

We will now utilise the dataclocklib library to demonstrate another way of visualising these temporal trends, using a data clock chart. Here, we use the day of week by hour of day mode (‘DOW_HOUR’) to split the data into temporal bins.

We see an initial spike from 0700 to 1000, which coincides with the daily morning commute. We see lower casualty figures for Saturday & Sunday (the two outer most rings), which is inline with our assumptions.

The majority of casualties occur 1200 to 1900 each day. In particular, we see higher numbers for Friday (3rd outermost ring) between 1500 - 1800.

Between 1700 and 1800 we see a temporal trend of increasing intensity over time from Monday - Friday. This is visually represented by a steady increase in darkness for this particular colour scheme. This trend is highlighted in a tabular form with the filtered DataFrame below.

# Monday - Friday casualty sum for 1700 shows increasing intensity
summary_data_dow.loc[0:4, :].query("hour.eq(17)")
hour sum
day_of_week
0 17 8590
1 17 9019
2 17 9168
3 17 9190
4 17 9460
graph_data, fig, ax = dataclock(
      data=data,
      date_column="date_time",
      agg_column="number_of_casualties",
      agg="sum",
      mode="DOW_HOUR",
      cmap_name="CMRmap_r",
      chart_title="UK Traffic Accident Casualties 2019 - 2023",
      chart_subtitle="Sum by day of week (rings) & hour of day (wedges)",
      chart_source="https://data.dft.gov.uk/road-accidents-safety-data/dft-road-casualty-statistics-collision-last-5-years.csv"
   )
_images/1bac82463554214dd1da3c88e1f80545383e54cf9c7f6a92036c7eb0c7ba1c6d.png

We can also group the data by year and apply the chart function over the grouped data to see if these pattern remain consistent.

data_grouped = data.assign(year=lambda x: x["date_time"].dt.year).groupby("year")
for year, view in data_grouped:
      graph_data, fig, ax = dataclock(
            data=view,
            date_column="date_time",
            agg_column="number_of_casualties",
            agg="sum",
            mode="DOW_HOUR",
            cmap_name="CMRmap_r",
            chart_title=f"UK Traffic Accident Casualties {year}",
            chart_subtitle="Sum by day of week (rings) & hour of day (wedges)",
            chart_source="https://data.dft.gov.uk/road-accidents-safety-data/dft-road-casualty-statistics-collision-last-5-years.csv"
      )
_images/07cabbe3a4f604fd101b620e5ea421afbc0c41313b5df398538bfbaf3c661e53.png _images/0d693c24ac110155aab79dddfe5a0ef707d36c12c660a9a3218773a493542d15.png _images/ab8a9e10575a13e4dd2c4af1fe00d219ce36bcc7fd07335a24388fb5eefd3bd0.png _images/5a21515335e3238d87d9a3b0a5d484533fac4e9f00bea0ee0f988bb885b33ab9.png _images/c571e0dbe73cd0d86d2e37f141928029514d6cac0952384bafebd359c2ab22c3.png

Dataclocklib Accident Severity for Day of Week by Hour Chart

Below, we have grouped the data by accident severity to look for any distinguishable temporal patterns patterns. We have also changed the colour map to a traffic light style graduated colour scheme.

For fatal accidents we see larger numbers of casualties in certain time periods compared to slight and serious accidents. For example we see more intense patterns for Friday - Sunday between the hours of 2000 and 0200 than for the other categories.

Serious and slight categories show similar visual presentations, especially for the working day morning and afternoon commutes. You might also notice a higher distribution of serious accident casualties on weekends compared with those presented for the slight category.

data_grouped = data.groupby("accident_severity_str")
for severity, view in data_grouped:
      graph_data, fig, ax = dataclock(
            data=view,
            date_column="date_time",
            agg_column="number_of_casualties",
            agg="sum",
            mode="DOW_HOUR",
            cmap_name="RdYlGn_r",
            chart_title=f"UK Traffic Accident '{severity}' Casualties 2019 - 2023",
            chart_subtitle="Sum by day of week (rings) & hour of day (wedges)",
            chart_source="https://data.dft.gov.uk/road-accidents-safety-data/dft-road-casualty-statistics-collision-last-5-years.csv"
      )
_images/c28c359bcac5ad2e852acfd09a255b4970f5f9be146070e6ad03a72ea72a8dcf.png _images/db3d289531d445b707c162eb8de631414546d2d16d6b23870ecb10c1e8917f34.png _images/7564a653e35b765c84b880f9d0d5fed8f703641bb5ffcb498585e00f7a201c4a.png

Dataclocklib Accident Severity for Year by Month Chart

Here we use the same data grouped by accident severity category and visualise in a data clock with years and months.

for severity, view in data_grouped:
    graph_data, fig, ax = dataclock(
        data=view,
        date_column="date_time",
        agg_column="number_of_casualties",
        agg="sum",
        mode="YEAR_MONTH",
        cmap_name="RdYlGn_r",
        chart_title=f"UK Traffic Accident '{severity}' Casualties 2019 - 2023",
        chart_subtitle="Sum by year (rings) & month (wedges)",
        chart_source="https://data.dft.gov.uk/road-accidents-safety-data/dft-road-casualty-statistics-collision-last-5-years.csv"
    )
_images/56e584c8c9534227c13ba41b72a83d86575d610465263cbf8a91bd5aefcc790e.png _images/97c1adcde0dd3cdbbcfe149555931a7fa23512384e40581e8cb768bfaf21d6d2.png _images/7e505a516c7b4d086f558a1f58a337f6b7426d79bc51aca2a26c8b443cc69703.png