Raw Spot Data
The combined dataframe has been created by combining data from 2 spreadsheets from the Bank of England website.
Row Count Check
| Description | Row Count | First Date | Last Date | No. months between dates |
|---|---|---|---|---|
| DataFrame 1 | 552 | 1970-01-31 | 2015-12-31 | 552 |
| DataFrame 2 | 108 | 2016-01-31 | 2024-12-31 | 108 |
| Combined DataFrame | 660 | 1970-01-31 | 2024-12-31 | 660 |
Sum of Values Check
⚠️ To Be Completed
| Description | Sum of Values Taken from s/sheet | Sum of Values Taken from DataFrame | Difference |
|---|---|---|---|
| Dataset | 0 | 191503.17232202887 | 0 |
| Dataset | 0 | 17844.999330876683 | 0 |
| Combined Dataset | 0 | 209348.17165290558 | 0 |
⚠️ To Be Completed
Python
load spreadsheet into dataframe function
# imports
import pandas as pd
def load_spot_curve(file_path, sheet="4. spot curve", skiprows_data=5, skiprows_header=3):
"""
Loads an Excel spot curve sheet and returns a cleaned DataFrame with proper headers.
"""
# Load the raw data
df = pd.read_excel(file_path, sheet_name=sheet, engine="openpyxl", skiprows=skiprows_data, header=None)
# Load headers
col_names = pd.read_excel(file_path, sheet_name=sheet, engine="openpyxl", skiprows=skiprows_header, nrows=1, header=None)
col_names[0] = "Date"
df.columns = col_names.iloc[0]
return df
call load spreadsheet into dataframe twice function
from dataframes.load_individual_data_files import *
import pandas as pd
def produce_2_raw_dataset_dataframes(file_path1,file_path2):
df1 = load_spot_curve(file_path1)
df2 = load_spot_curve(file_path2)
return df1, df2 # returning two DataFrames
run creation of joint dataframe
import pandas as pd
def join_spot_curve_datasets(df1,df2):
#join the two dataframes to create df
df = pd.concat([df1, df2], ignore_index=True)
return df
let's run it
from dataframes.produce_2_raw_dataset_dataframes import *
from dataframes.format_date_column import *
from dataframes.join_data_files import *
from datetime import datetime
import pandas as pd
def months_between_dates(date1, date2):
date_format = '%Y-%m-%d'
d1 = datetime.strptime(date1, date_format)
d2 = datetime.strptime(date2, date_format)
return abs((d2.year - d1.year) * 12 + d2.month - d1.month +1)
def create_data_check_on_number_of_rows(file_path1="raw_data/GLC Nominal month end data_1970 to 2015.xlsx",file_path2="raw_data/GLC Nominal month end data_2016 to present.xlsx"):
"""Load a spot curve file"""
df1, df2 = produce_2_raw_dataset_dataframes(file_path1,file_path2)
df = join_spot_curve_datasets(df1,df2)
df1_first_date = str(df1.iloc[0,0].strftime('%Y-%m-%d'))
df2_first_date = str(df2.iloc[0,0].strftime('%Y-%m-%d'))
df_first_date = str(df.iloc[0,0].strftime('%Y-%m-%d'))
df1_last_date = str(df1.iloc[len(df1)-1,0].strftime('%Y-%m-%d'))
df2_last_date = str(df2.iloc[len(df2)-1,0].strftime('%Y-%m-%d'))
df_last_date = str(df.iloc[len(df)-1,0].strftime('%Y-%m-%d'))
df1_months = months_between_dates(df1_first_date,df1_last_date)
df2_months = months_between_dates(df2_first_date,df2_last_date)
df_months = months_between_dates(df_first_date,df_last_date)
data = [
["DataFrame 1", len(df1), df1_first_date, df1_last_date, df1_months],
["DataFrame 2", len(df2), df2_first_date, df2_last_date, df2_months],
["Combined DataFrame", len(df), df_first_date, df_last_date, df_months]
]
html = pd.DataFrame(data, columns=["Description", "Row Count", "First Date", "Last Date", "No. months between dates"]).to_html(index=False)
html = f"""
{html}
"""
return html
def create_data_check_on_sum_of_values(file_path1="raw_data/GLC Nominal month end data_1970 to 2015.xlsx",file_path2="raw_data/GLC Nominal month end data_2016 to present.xlsx"):
"""Load a spot curve file"""
df1, df2 = produce_2_raw_dataset_dataframes(file_path1,file_path2)
df = join_spot_curve_datasets(df1,df2)
df1_sum = str(df1.iloc[:, 1:].sum().sum())
df2_sum = str(df2.iloc[:, 1:].sum().sum())
df_sum = str(df.iloc[:, 1:].sum().sum())
data = [
["Dataset", 0,df1_sum,0],
["Dataset", 0,df2_sum,0],
["Combined Dataset", 0,df_sum,0]
]
html = pd.DataFrame(data, columns=["Description", "Sum of Values Taken from s/sheet", "Sum of Values Taken from DataFrame", "Difference"]).to_html(index=False)
html = f"""
{html}
"""
return html