Raw Spot Data

The combined dataframe has been created by combining data from 2 spreadsheets from the Bank of England website.

Row Count Check

Description Row Count First Date Last Date No. months between dates
DataFrame 1 552 1970-01-31 2015-12-31 552
DataFrame 2 108 2016-01-31 2024-12-31 108
Combined DataFrame 660 1970-01-31 2024-12-31 660

Sum of Values Check

⚠️ To Be Completed

Description Sum of Values Taken from s/sheet Sum of Values Taken from DataFrame Difference
Dataset 0 191503.17232202887 0
Dataset 0 17844.999330876683 0
Combined Dataset 0 209348.17165290558 0

⚠️ To Be Completed

Python

load spreadsheet into dataframe function

# imports
import pandas as pd

def load_spot_curve(file_path, sheet="4. spot curve", skiprows_data=5, skiprows_header=3):
    """
    Loads an Excel spot curve sheet and returns a cleaned DataFrame with proper headers.
    """
    # Load the raw data
    df = pd.read_excel(file_path, sheet_name=sheet, engine="openpyxl", skiprows=skiprows_data, header=None)

    # Load headers
    col_names = pd.read_excel(file_path, sheet_name=sheet, engine="openpyxl", skiprows=skiprows_header, nrows=1, header=None)
    col_names[0] = "Date"
    df.columns = col_names.iloc[0]

    return df

call load spreadsheet into dataframe twice function

from dataframes.load_individual_data_files import *
import pandas as pd

def produce_2_raw_dataset_dataframes(file_path1,file_path2):
    df1 = load_spot_curve(file_path1)
    df2 = load_spot_curve(file_path2)
    return df1, df2  # returning two DataFrames

run creation of joint dataframe

import pandas as pd

def join_spot_curve_datasets(df1,df2):
    #join the two dataframes to create df
    df = pd.concat([df1, df2], ignore_index=True)
    return df

let's run it

from dataframes.produce_2_raw_dataset_dataframes import *
from dataframes.format_date_column import *
from dataframes.join_data_files import *
from datetime import datetime
import pandas as pd


def months_between_dates(date1, date2):
    date_format = '%Y-%m-%d'
    d1 = datetime.strptime(date1, date_format)
    d2 = datetime.strptime(date2, date_format)
    return abs((d2.year - d1.year) * 12 + d2.month - d1.month +1)

def create_data_check_on_number_of_rows(file_path1="raw_data/GLC Nominal month end data_1970 to 2015.xlsx",file_path2="raw_data/GLC Nominal month end data_2016 to present.xlsx"):  
    """Load a spot curve file"""
    df1, df2 = produce_2_raw_dataset_dataframes(file_path1,file_path2)
    df = join_spot_curve_datasets(df1,df2)

    df1_first_date = str(df1.iloc[0,0].strftime('%Y-%m-%d')) 
    df2_first_date = str(df2.iloc[0,0].strftime('%Y-%m-%d'))
    df_first_date = str(df.iloc[0,0].strftime('%Y-%m-%d'))

    df1_last_date = str(df1.iloc[len(df1)-1,0].strftime('%Y-%m-%d')) 
    df2_last_date = str(df2.iloc[len(df2)-1,0].strftime('%Y-%m-%d')) 
    df_last_date  = str(df.iloc[len(df)-1,0].strftime('%Y-%m-%d'))

    df1_months  = months_between_dates(df1_first_date,df1_last_date)
    df2_months = months_between_dates(df2_first_date,df2_last_date)
    df_months = months_between_dates(df_first_date,df_last_date)


    data = [
        ["DataFrame 1", len(df1), df1_first_date, df1_last_date, df1_months],
        ["DataFrame 2", len(df2), df2_first_date, df2_last_date, df2_months],
        ["Combined DataFrame", len(df), df_first_date, df_last_date, df_months]
    ]

    html = pd.DataFrame(data, columns=["Description", "Row Count", "First Date", "Last Date", "No. months between dates"]).to_html(index=False)

    html = f"""
    
{html}
""" return html def create_data_check_on_sum_of_values(file_path1="raw_data/GLC Nominal month end data_1970 to 2015.xlsx",file_path2="raw_data/GLC Nominal month end data_2016 to present.xlsx"): """Load a spot curve file""" df1, df2 = produce_2_raw_dataset_dataframes(file_path1,file_path2) df = join_spot_curve_datasets(df1,df2) df1_sum = str(df1.iloc[:, 1:].sum().sum()) df2_sum = str(df2.iloc[:, 1:].sum().sum()) df_sum = str(df.iloc[:, 1:].sum().sum()) data = [ ["Dataset", 0,df1_sum,0], ["Dataset", 0,df2_sum,0], ["Combined Dataset", 0,df_sum,0] ] html = pd.DataFrame(data, columns=["Description", "Sum of Values Taken from s/sheet", "Sum of Values Taken from DataFrame", "Difference"]).to_html(index=False) html = f"""
{html}
""" return html