Step 1 - Put Raw Spot Yield Data into DataFrame

Dataset

Spot yield data has been taken from the Bank of England website

Term ... Term
Date 0.5 1 1.5 2 38 38.5 39 39.5 40
1970-01-31 NaN 8.64 8.71 8.7 ... NaN NaN NaN NaN NaN
1970-02-28 NaN 8.41 8.4 8.37 ... NaN NaN NaN NaN NaN
1970-03-31 NaN 7.74 7.78 7.8 ... NaN NaN NaN NaN NaN
1970-04-30 NaN 7.61 7.86 7.97 ... NaN NaN NaN NaN NaN
1970-05-31 NaN 7.39 7.74 7.86 ... NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ...
2024-08-30 4.29 4.22 4.06 3.96 ... 4.42 4.41 4.4 4.38 4.37
2024-09-30 4.38 4.16 3.96 3.85 ... 4.49 4.48 4.46 4.45 4.43
2024-10-31 4.52 4.39 4.29 4.24 ... 4.7 4.69 4.67 4.66 4.64
2024-11-29 4.4 4.22 4.12 4.07 ... 4.61 4.6 4.58 4.56 4.55
2024-12-31 4.46 4.31 4.23 4.2 ... 4.97 4.95 4.93 4.92 4.9

Python

load data files from BoE s/s function

load spreadsheet into dataframe function
# imports
import pandas as pd

def load_spot_curve(file_path, sheet="4. spot curve", skiprows_data=5, skiprows_header=3):
    """
    Loads an Excel spot curve sheet and returns a cleaned DataFrame with proper headers.
    """
    # Load the raw data
    df = pd.read_excel(file_path, sheet_name=sheet, engine="openpyxl", skiprows=skiprows_data, header=None)

    # Load headers
    col_names = pd.read_excel(file_path, sheet_name=sheet, engine="openpyxl", skiprows=skiprows_header, nrows=1, header=None)
    col_names[0] = "Date"
    df.columns = col_names.iloc[0]

    return df
call load spreadsheet into dataframe twice function
from dataframes.load_individual_data_files import *
import pandas as pd

def produce_2_raw_dataset_dataframes(file_path1,file_path2):
    df1 = load_spot_curve(file_path1)
    df2 = load_spot_curve(file_path2)
    return df1, df2  # returning two DataFrames

join data files into 1 combined dataset function

import pandas as pd

def join_spot_curve_datasets(df1,df2):
    #join the two dataframes to create df
    df = pd.concat([df1, df2], ignore_index=True)
    return df

format the data functions

format date column fucntion

import pandas as pd

def format_date_column(df, column_name='Date'):
    """
    Converts a datetime column to a string in 'YYYY-MM-DD' format.

    Args:
        df (pd.DataFrame): The DataFrame containing the column.
        column_name (str): Name of the column to format. Default is 'Date'.

    Returns:
        pd.DataFrame: DataFrame with the formatted date column.
    """
    df[column_name] = df[column_name].dt.strftime('%Y-%m-%d')
    return df

load individual data file from BoE s/s function

import pandas as pd

def round_numeric_columns(df, decimals=2):
    numeric_cols = df.select_dtypes(include='number').columns
    df[numeric_cols] = df[numeric_cols].round(decimals)
    return df

truncate with ellipsis function

import pandas as pd

def truncate_with_ellipsis(df, n=5):
    """
    Keep first n and last n rows/cols, insert ellipsis in between.
    """

    # ---- columns ----
    if df.shape[1] > 2 * n:
        cols = list(df.columns[:n]) + ["..."] + list(df.columns[-n:])
        df_cols = df[cols[:n] + cols[-n:]].copy()
        df_cols.insert(n, "...", "...")  # middle column
    else:
        df_cols = df.copy()

    # ---- rows ----
    if df_cols.shape[0] > 2 * n:
        top = df_cols.iloc[:n]
        bottom = df_cols.iloc[-n:]

        ellipsis_row = pd.DataFrame(
            [["..."] * df_cols.shape[1]],
            columns=df_cols.columns,
            index=["..."]
        )

        result = pd.concat([top, ellipsis_row, bottom])
    else:
        result = df_cols

    return result

let's run it

from dataframes.truncate_with_ellipsis import *
from dataframes.produce_2_raw_dataset_dataframes import *
from dataframes.round_numeric_columns import *
from dataframes.format_date_column import *
from dataframes.join_data_files import *
import pandas as pd

def create_raw_dataset_for_presentation(file_path1="raw_data/GLC Nominal month end data_1970 to 2015.xlsx",file_path2="raw_data/GLC Nominal month end data_2016 to present.xlsx"):  
    """Load a spot curve file and apply formatting."""
    df1, df2 = produce_2_raw_dataset_dataframes(file_path1,file_path2)
    df = join_spot_curve_datasets(df1,df2)
    df = format_date_column(df)
    df = round_numeric_columns(df, 2)
    df.columns = pd.MultiIndex.from_product([['Term'], df.columns])
    return truncate_with_ellipsis(df).to_html(index=False)

Here are download links to the spreadsheets obtained from the Bank of England website, referenced in the above function:

Using the functions defined above and plugging in the location of the datasets obtained from the Bank of England we get: