# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
file_path = r'C:\Users\sheyi\OneDrive\Documents\BuildingsProject\UCLanPrestonBldDATA.xlsx'
data = pd.ExcelFile(file_path)

# Check sheet names
print(data.sheet_names)

# Load the relevant sheet
gas_data = data.parse('GasData(kWh)')

# Display the first few rows of the dataset
gas_data.head()

['GasData(kWh)']

# Filter data for Livesey House
livesey_house_data = gas_data[gas_data['Site'] == 'Livesey House']

# Drop unnecessary columns and reset index
livesey_house_data = livesey_house_data.drop(columns=['Site', 'Units']).reset_index(drop=True)

# Transpose the data for proper structure
livesey_house_data = livesey_house_data.T
livesey_house_data.columns = ['GasConsumption_kWh']

# Generate a Date column based on the time period
livesey_house_data['Date'] = pd.date_range(start='2010-01-01', periods=len(livesey_house_data), freq='ME')

# Reset index
livesey_house_data = livesey_house_data.reset_index(drop=True)

# Display the cleaned data
livesey_house_data.head()

import matplotlib.pyplot as plt

# Plot the gas consumption for the Kirkham building
plt.figure(figsize=(12, 6))
plt.plot(Livesey_data_clean.index, Livesey_data_clean['Gas Consumption (kWh)'], 
         marker='o', linestyle='-', label='Gas Consumption (kWh)')
plt.title('Gas Consumption Over Time - Livesey House', fontsize=14)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Gas Consumption (kWh)', fontsize=12)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

# Plot the trend of gas consumption over time
plt.figure(figsize=(12, 6))
plt.plot(livesey_house_data['Date'], livesey_house_data['GasConsumption_kWh'], label='Gas Consumption (kWh)', color='orange')
plt.title('Energy Consumption Trend - Livesey House (2010-2020)', fontsize=14)
plt.xlabel('Date (Month-Year)', fontsize=12)
plt.ylabel('Gas Consumption (kWh)', fontsize=12)
plt.xticks(
    ticks=livesey_house_data['Date'][::12],  # Show one tick per year
    labels=livesey_house_data['Date'][::12].dt.strftime('%b-%Y'),
    rotation=45  # Rotate for better readability
)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend()
plt.show()

# Seasonal patterns
livesey_house_data['Year'] = livesey_house_data['Date'].dt.year
livesey_house_data['Month'] = livesey_house_data['Date'].dt.month

plt.figure(figsize=(12, 6))
sns.boxplot(data=livesey_house_data, x='Month', y='GasConsumption_kWh')
plt.title('Monthly Energy Consumption Patterns - Livesey House', fontsize=14)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Gas Consumption (kWh)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.show()

# Filter the data for Livesey House
Livesey_data = gas_data[gas_data['Site'] == 'Livesey House']

# Transpose the data to have months as a time-series format
Livesey_data_clean = Livesey_data.drop(columns=['Site', 'Units']).transpose()
Livesey_data_clean.columns = ['Gas Consumption (kWh)']

# Convert the index to datetime, assuming the columns represent monthly data
Livesey_data_clean.index = pd.date_range(start='2010-01', periods=len(Livesey_data_clean), freq='ME')

# Display the cleaned data to ensure correctness
Livesey_data_clean.head()

# Split data into training (2010–2015) and testing (2016)
train_data = livesey_house_data[livesey_house_data['Year'] <= 2015]
test_data = livesey_house_data[livesey_house_data['Year'] == 2016]

# Define features (Year, Month, Temperature) and target (Gas Consumption)
X_train = train_data[['Year', 'Month', 'Preston Temperature in C']]
y_train = train_data['GasConsumption_kWh']
X_test = test_data[['Year', 'Month', 'Preston Temperature in C']]
y_test = test_data['GasConsumption_kWh']

# Train the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_rf = rf_model.predict(X_test)

# Evaluate the model
mae_rf = mean_absolute_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))
r2_rf = r2_score(y_test, y_pred_rf)

# Display evaluation metrics
print(f"Random Forest Results (2016):")
print(f"Mean Absolute Error (MAE): {mae_rf:.2f} kWh")
print(f"Root Mean Squared Error (RMSE): {rmse_rf:.2f} kWh")
print(f"R² Score: {r2_rf:.3f}")

# Display predicted and actual values
predicted_vs_actual_rf = test_data.copy()
predicted_vs_actual_rf['Predicted_kWh'] = y_pred_rf
print(predicted_vs_actual_rf[['Date', 'GasConsumption_kWh', 'Predicted_kWh']])

# Visualize predicted vs actual values for 2016
plt.figure(figsize=(10, 6))
plt.plot(test_data['Date'], y_test, label='Actual Consumption', marker='o')
plt.plot(test_data['Date'], y_pred_rf, label='Predicted Consumption', marker='x')
plt.xlabel('Date')
plt.ylabel('Gas Consumption (kWh)')
plt.title('Actual vs Predicted Gas Consumption (2016) Livesey House')
plt.legend()
plt.grid(True)
plt.show()

Random Forest Results (2016):
Mean Absolute Error (MAE): 4716.15 kWh
Root Mean Squared Error (RMSE): 6325.34 kWh
R² Score: 0.907
         Date  GasConsumption_kWh  Predicted_kWh
72 2016-01-31            44521.23   53301.155910
73 2016-02-29            49879.95   56888.604842
74 2016-03-31            53280.31   52675.375700
75 2016-04-30            44500.30   51522.668333
76 2016-05-31            22460.13   11954.993626
77 2016-06-30             2327.30     696.929934
78 2016-07-31                0.00     167.957842
79 2016-08-31                0.00     216.157206
80 2016-09-30                0.00     412.140019
81 2016-10-31            20254.29   29968.970141
82 2016-11-30            45611.88   46342.332309
83 2016-12-31            37202.96   47004.037802

# Filter the data for the Kirkham building
kirkham_data = gas_data[gas_data['Site'] == 'Kirkham']

# Transpose the data to have months as a time-series format
kirkham_data_clean = kirkham_data.drop(columns=['Site', 'Units']).transpose()
kirkham_data_clean.columns = ['Gas Consumption (kWh)']

# Convert the index to datetime, assuming the columns represent monthly data
kirkham_data_clean.index = pd.date_range(start='2010-01', periods=len(kirkham_data_clean), freq='ME')

# Display the cleaned data to ensure correctness
kirkham_data_clean.head()

import matplotlib.pyplot as plt

# Plot the gas consumption for the Kirkham building
plt.figure(figsize=(12, 6))
plt.plot(kirkham_data_clean.index, kirkham_data_clean['Gas Consumption (kWh)'], 
         marker='o', linestyle='-', label='Gas Consumption (kWh)')
plt.title('Gas Consumption Over Time - Kirkham Building', fontsize=14)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Gas Consumption (kWh)', fontsize=12)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

# Filter data for Livesey House
livesey_house_data = gas_data[gas_data['Site'] == 'Livesey House']

# Drop unnecessary columns and reset index
livesey_house_data = livesey_house_data.drop(columns=['Site', 'Units']).reset_index(drop=True)

# Transpose the data for proper structure
livesey_house_data = livesey_house_data.T
livesey_house_data.columns = ['GasConsumption_kWh']

# Generate a Date column based on the time period
livesey_house_data['Date'] = pd.date_range(start='2010-01-01', periods=len(livesey_house_data), freq='ME')

# Reset index
livesey_house_data = livesey_house_data.reset_index(drop=True)

# Display the cleaned data
livesey_house_data.head()

# Importing necessary libraries
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Load gas consumption data from the provided file
uclan_data_path = r'C:\Users\sheyi\OneDrive\Documents\BuildingsProject\UCLanPrestonBldDATA.xlsx'
uclan_gas_data = pd.ExcelFile(uclan_data_path).parse('GasData(kWh)')

# Extract Livesey House gas consumption data
livesey_house_data = uclan_gas_data[uclan_gas_data['Site'] == 'Livesey House']
livesey_house_data = livesey_house_data.drop(columns=['Site', 'Units']).T
livesey_house_data.columns = ['GasConsumption_kWh']

# Create a Date column
livesey_house_data['Date'] = pd.date_range(start='2010-01', periods=len(livesey_house_data), freq='ME')

# Load temperature data from the provided file
temperature_data_path = r'C:\Users\sheyi\OneDrive\Documents\BuildingsProject\Preston monthly avarage temperature data from 2010 to 2023.xlsx'
temperature_data = pd.ExcelFile(temperature_data_path).parse('Temperature')

# Clean and prepare temperature data
temperature_data['Date'] = pd.to_datetime(
    temperature_data['Year'].astype(str) + '-' + temperature_data['Month'], format='%Y-%B'
)
temperature_data['Date'] = temperature_data['Date'] + pd.offsets.MonthEnd(0)
temperature_data = temperature_data[['Date', 'Preston Temperature in C']]

# Merge gas consumption data with temperature data
livesey_house_data = pd.merge(livesey_house_data, temperature_data, on='Date', how='inner')

# Add 'Year' and 'Month' columns for feature engineering
livesey_house_data['Year'] = livesey_house_data['Date'].dt.year
livesey_house_data['Month'] = livesey_house_data['Date'].dt.month

# Split data into training (2010-2018) and testing (2019)
train_data = livesey_house_data[livesey_house_data['Year'] < 2019]
test_data = livesey_house_data[livesey_house_data['Year'] == 2019]

# Define features (Year, Month, Temperature) and target (Gas Consumption)
X_train = train_data[['Year', 'Month', 'Preston Temperature in C']]
y_train = train_data['GasConsumption_kWh']
X_test = test_data[['Year', 'Month', 'Preston Temperature in C']]
y_test = test_data['GasConsumption_kWh']

# Train the regression model
model_with_temp = LinearRegression()
model_with_temp.fit(X_train, y_train)

# Make predictions on the test set
y_pred_with_temp = model_with_temp.predict(X_test)

# Evaluate the model
mae_with_temp = mean_absolute_error(y_test, y_pred_with_temp)
rmse_with_temp = np.sqrt(mean_squared_error(y_test, y_pred_with_temp))
r2_with_temp = r2_score(y_test, y_pred_with_temp)

# Display evaluation metrics
print(f"Mean Absolute Error (MAE): {mae_with_temp:.2f} kWh")
print(f"Root Mean Squared Error (RMSE): {rmse_with_temp:.2f} kWh")
print(f"R² Score: {r2_with_temp:.3f}")

# Display predicted and actual values
predicted_vs_actual = test_data.copy()
predicted_vs_actual['Predicted_kWh'] = y_pred_with_temp
print(predicted_vs_actual[['Date', 'GasConsumption_kWh', 'Predicted_kWh']])

Mean Absolute Error (MAE): 6577.29 kWh
Root Mean Squared Error (RMSE): 7644.53 kWh
R² Score: 0.869
          Date  GasConsumption_kWh  Predicted_kWh
108 2019-01-31            61903.38   53886.591378
109 2019-02-28            44119.76   43779.534052
110 2019-03-31            44909.12   38786.508723
111 2019-04-30            33723.26   33793.483394
112 2019-05-31            16595.75   23686.426067
113 2019-06-30                0.00    8465.336743
114 2019-07-31                0.00   -6755.752581
115 2019-08-31                0.00   -1520.713915
116 2019-09-30              158.28    8828.356750
117 2019-10-31            16185.32   29405.491410
118 2019-11-30            38989.17   49982.626070
119 2019-12-31            42442.18   50103.632738

# Importing necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Load gas consumption data from the provided file
uclan_data_path = uclan_data_path = r'C:\Users\sheyi\OneDrive\Documents\BuildingsProject\UCLanPrestonBldDATA.xlsx'
uclan_gas_data = pd.ExcelFile(uclan_data_path).parse('GasData(kWh)')

# Extract Livesey House gas consumption data
livesey_house_data = uclan_gas_data[uclan_gas_data['Site'] == 'Livesey House']
livesey_house_data = livesey_house_data.drop(columns=['Site', 'Units']).T
livesey_house_data.columns = ['GasConsumption_kWh']

# Create a Date column
livesey_house_data['Date'] = pd.date_range(start='2010-01', periods=len(livesey_house_data), freq='ME')

# Load temperature data from the provided file
temperature_data_path = r'C:\Users\sheyi\OneDrive\Documents\BuildingsProject\Preston monthly avarage temperature data from 2010 to 2023.xlsx'
temperature_data = pd.ExcelFile(temperature_data_path).parse('Temperature')

# Clean and prepare temperature data
temperature_data['Date'] = pd.to_datetime(
    temperature_data['Year'].astype(str) + '-' + temperature_data['Month'], format='%Y-%B'
)
temperature_data['Date'] = temperature_data['Date'] + pd.offsets.MonthEnd(0)
temperature_data = temperature_data[['Date', 'Preston Temperature in C']]

# Merge gas consumption data with temperature data
livesey_house_data = pd.merge(livesey_house_data, temperature_data, on='Date', how='inner')

# Add 'Year' and 'Month' columns for feature engineering
livesey_house_data['Year'] = livesey_house_data['Date'].dt.year
livesey_house_data['Month'] = livesey_house_data['Date'].dt.month

# Split data into training (2010-2018) and testing (2019)
train_data = livesey_house_data[livesey_house_data['Year'] < 2019]
test_data = livesey_house_data[livesey_house_data['Year'] == 2019]

# Define features (Year, Month, Temperature) and target (Gas Consumption)
X_train = train_data[['Year', 'Month', 'Preston Temperature in C']]
y_train = train_data['GasConsumption_kWh']
X_test = test_data[['Year', 'Month', 'Preston Temperature in C']]
y_test = test_data['GasConsumption_kWh']

# Train the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_rf = rf_model.predict(X_test)

# Evaluate the model
mae_rf = mean_absolute_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))
r2_rf = r2_score(y_test, y_pred_rf)

# Display evaluation metrics
print(f"Random Forest Results:")
print(f"Mean Absolute Error (MAE): {mae_rf:.2f} kWh")
print(f"Root Mean Squared Error (RMSE): {rmse_rf:.2f} kWh")
print(f"R² Score: {r2_rf:.3f}")

# Display predicted and actual values
predicted_vs_actual_rf = test_data.copy()
predicted_vs_actual_rf['Predicted_kWh'] = y_pred_rf
print(predicted_vs_actual_rf[['Date', 'GasConsumption_kWh', 'Predicted_kWh']])



# Visualize predicted vs actual values
plt.figure(figsize=(10, 6))
plt.plot(test_data['Date'], y_test, label='Actual Consumption', marker='o')
plt.plot(test_data['Date'], y_pred_rf, label='Predicted Consumption', marker='x')
plt.xlabel('Date')
plt.ylabel('Gas Consumption (kWh)')
plt.title('Actual vs Predicted Gas Consumption (2019)')
plt.legend()
plt.grid(True)
plt.show()

Random Forest Results:
Mean Absolute Error (MAE): 2741.32 kWh
Root Mean Squared Error (RMSE): 4173.44 kWh
R² Score: 0.961
          Date  GasConsumption_kWh  Predicted_kWh
108 2019-01-31            61903.38   58353.318150
109 2019-02-28            44119.76   52634.586344
110 2019-03-31            44909.12   45036.396622
111 2019-04-30            33723.26   33520.605626
112 2019-05-31            16595.75   17811.027373
113 2019-06-30                0.00     442.187000
114 2019-07-31                0.00      90.670930
115 2019-08-31                0.00     777.962900
116 2019-09-30              158.28     214.554079
117 2019-10-31            16185.32   22761.127639
118 2019-11-30            38989.17   47305.258450
119 2019-12-31            42442.18   45468.982200

# Feature Importance
feature_importances = rf_model.feature_importances_
features = ['Year', 'Month', 'Preston Temperature in C']
importance_df = pd.DataFrame({
    'Feature': features,
    'Importance': feature_importances
}).sort_values(by='Importance', ascending=False)

# Print feature importance
print("Feature Importance:")
print(importance_df)

Feature Importance:
                    Feature  Importance
2  Preston Temperature in C    0.909685
1                     Month    0.052893
0                      Year    0.037421

# Visualize feature importance
plt.figure(figsize=(8, 6))
plt.barh(importance_df['Feature'], importance_df['Importance'], color='skyblue')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importance for Random Forest')
plt.gca().invert_yaxis()  # Invert y-axis to show the most important feature at the top
plt.show()

# Importing necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Load gas consumption data from the provided file
uclan_data_path = uclan_data_path = r'C:\Users\sheyi\OneDrive\Documents\BuildingsProject\UCLanPrestonBldDATA.xlsx'
uclan_gas_data = pd.ExcelFile(uclan_data_path).parse('GasData(kWh)')

# Extract Livesey House gas consumption data
Kirkham_data = uclan_gas_data[uclan_gas_data['Site'] == 'Kirkham']
Kirkham_data = Kirkham_data.drop(columns=['Site', 'Units']).T
Kirkham_data.columns = ['GasConsumption_kWh']

# Create a Date column
Kirkham_data['Date'] = pd.date_range(start='2010-01', periods=len(Kirkham_data), freq='ME')

# Load temperature data from the provided file
temperature_data_path = r'C:\Users\sheyi\OneDrive\Documents\BuildingsProject\Preston monthly avarage temperature data from 2010 to 2023.xlsx'
temperature_data = pd.ExcelFile(temperature_data_path).parse('Temperature')

# Clean and prepare temperature data
temperature_data['Date'] = pd.to_datetime(
    temperature_data['Year'].astype(str) + '-' + temperature_data['Month'], format='%Y-%B'
)
temperature_data['Date'] = temperature_data['Date'] + pd.offsets.MonthEnd(0)
temperature_data = temperature_data[['Date', 'Preston Temperature in C']]

# Merge gas consumption data with temperature data
Kirkham_data = pd.merge(Kirkham_data, temperature_data, on='Date', how='inner')

# Add 'Year' and 'Month' columns for feature engineering
Kirkham_data['Year'] = Kirkham_data['Date'].dt.year
Kirkham_data['Month'] = Kirkham_data['Date'].dt.month

# Split data into training (2010-2018) and testing (2019)
train_data = Kirkham_data[Kirkham_data['Year'] < 2019]
test_data = Kirkham_data[Kirkham_data['Year'] == 2019]

# Define features (Year, Month, Temperature) and target (Gas Consumption)
X_train = train_data[['Year', 'Month', 'Preston Temperature in C']]
y_train = train_data['GasConsumption_kWh']
X_test = test_data[['Year', 'Month', 'Preston Temperature in C']]
y_test = test_data['GasConsumption_kWh']

# Train the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_rf = rf_model.predict(X_test)

# Evaluate the model
mae_rf = mean_absolute_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))
r2_rf = r2_score(y_test, y_pred_rf)

# Display evaluation metrics
print(f"Random Forest Results:")
print(f"Mean Absolute Error (MAE): {mae_rf:.2f} kWh")
print(f"Root Mean Squared Error (RMSE): {rmse_rf:.2f} kWh")
print(f"R² Score: {r2_rf:.3f}")

# Display predicted and actual values
predicted_vs_actual_rf = test_data.copy()
predicted_vs_actual_rf['Predicted_kWh'] = y_pred_rf
print(predicted_vs_actual_rf[['Date', 'GasConsumption_kWh', 'Predicted_kWh']])



# Visualize predicted vs actual values
plt.figure(figsize=(10, 6))
plt.plot(test_data['Date'], y_test, label='Actual Consumption', marker='o')
plt.plot(test_data['Date'], y_pred_rf, label='Predicted Consumption', marker='x')
plt.xlabel('Date')
plt.ylabel('Gas Consumption (kWh)')
plt.title('Actual vs Predicted Gas Consumption Kirkh (2019)')
plt.legend()
plt.grid(True)
plt.show()

Random Forest Results:
Mean Absolute Error (MAE): 2929.20 kWh
Root Mean Squared Error (RMSE): 4177.25 kWh
R² Score: 0.970
          Date  GasConsumption_kWh  Predicted_kWh
108 2019-01-31            70209.26   70045.830800
109 2019-02-28            48932.41   51321.963242
110 2019-03-31            50120.50   45202.751927
111 2019-04-30            30997.93   35607.786485
112 2019-05-31            14734.54   17583.606142
113 2019-06-30             1954.84    2385.623500
114 2019-07-31             2083.12    1755.522018
115 2019-08-31             1830.87    1767.239700
116 2019-09-30             2170.73    3548.858361
117 2019-10-31            28585.00   38059.038133
118 2019-11-30            51351.87   58913.670786
119 2019-12-31            57026.72   58011.499800

# Seasonal patterns
Kirkham_data['Year'] = Kirkham_data['Date'].dt.year
Kirkham_data['Month'] = Kirkham_data['Date'].dt.month

plt.figure(figsize=(12, 6))
sns.boxplot(data=Kirkham_data, x='Month', y='GasConsumption_kWh')
plt.title('Monthly Energy Consumption Patterns - Kirkham Building', fontsize=14)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Gas Consumption (kWh)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.show()

	Site	Units	Jan 10	Feb 10	Mar 10	Apr 10	May 10	Jun 10	Jul 10	Aug 10	...	Mar 20	Apr 20	May 20	Jun 20	Jul 20	Aug 20	Sep 20	Oct 20	Nov 20	Dec 20
0	Livesey House	kWh	61829.740000	59240.12	51277.655802	29831.212250	4409.812403	4267.560390	4409.812403	4409.812403	...	27447.66	400.87	0.00	0.00	0.00	0.00	0.00	20280.32	33409.65	37914.52
1	Library	kWh	366652.960000	267362.37	268890.230000	107600.040000	122853.435938	94315.526840	56337.931222	1438.696000	...	108329.87	65585.08	51582.27	36278.75	37332.08	46933.91	71113.41	94731.47	106556.01	205203.31
2	Kirkham	kWh	152675.310000	138026.66	116205.393742	67357.212925	51917.224167	3443.529167	2673.304375	1201.442077	...	36368.14	256.12	3930.80	1932.62	2027.38	1912.34	2558.31	32110.50	56987.63	68389.76
3	Arts Centre St Peters	kWh	7909.909665	0.00	40307.827497	17292.850210	2111.392229	2043.282803	2111.392229	2111.392229	...	12441.44	0.00	0.00	0.00	0.00	0.00	0.00	11908.53	21564.41	16101.61
4	Computer & Technology	kWh	14401.860000	12296.36	53017.457627	5544.573373	2690.993839	1654.021161	1270.535935	1258.654516	...	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00

	GasConsumption_kWh	Date
0	61829.740000	2010-01-31
1	59240.120000	2010-02-28
2	51277.655802	2010-03-31
3	29831.212250	2010-04-30
4	4409.812403	2010-05-31

	Gas Consumption (kWh)
2010-01-31	61829.740000
2010-02-28	59240.120000
2010-03-31	51277.655802
2010-04-30	29831.212250
2010-05-31	4409.812403

Date	Actual Consumption (kWh)	Predicted Consumption (kWh)
2016-01-31	44,521.23	53,301.16
2016-02-29	49,879.95	56,888.60
2016-03-31	53,280.31	52,675.38
2016-04-30	44,500.30	51,522.67
2016-05-31	22,460.13	11,954.99
2016-06-30	2,327.30	696.93
2016-07-31	0.00	167.96
2016-08-31	0.00	216.16
2016-09-30	0.00	412.14
2016-10-31	20,254.29	29,968.97
2016-11-30	45,611.88	46,342.33
2016-12-31	37,202.96	47,004.04

	Gas Consumption (kWh)
2010-01-31	152675.310000
2010-02-28	138026.660000
2010-03-31	116205.393742
2010-04-30	67357.212925
2010-05-31	51917.224167

Explanation of the Visualizations¶

1. Gas Consumption Trend Over Time (2010-2020)¶

2. Monthly Gas Consumption Patterns¶

Summary of Visualizations:¶

Random Forest Model Analysis for Livesey House (2016)¶

Model Performance Metrics¶

Predicted vs. Actual Gas Consumption (2016)¶

Key Insights¶

Visualization¶

Conclusion¶

Decomposition of Gas Consumption Time Series - Kirkham Building¶

Linear Regression Model Analysis¶

Model Performance Metrics¶

Predicted vs. Actual Gas Consumption (2019)¶

Key Insights¶

Recommendations¶

Random Forest Model Analysis¶

Model Performance Metrics¶

Predicted vs. Actual Gas Consumption (2019)¶

Key Insights¶

Predicted vs. Actual Gas Consumption Plot¶

Purpose of the Plot¶

Interpretation¶

Conclusion¶

Feature Importance Analysis for Random Forest Model¶

Purpose of Feature Importance¶

Feature Importance Results¶

Observations¶

Visualization¶

space¶

space¶

Date	Actual Consumption (kWh)	Predicted Consumption (kWh)
2019-01-31	61,903.38	53,886.59
2019-02-28	44,119.76	43,779.53
2019-03-31	44,909.12	38,786.51
2019-04-30	33,723.26	33,793.48
2019-05-31	16,595.75	23,686.43
2019-06-30	0.00	8,465.34
2019-07-31	0.00	-6,755.75
2019-08-31	0.00	-1,520.71
2019-09-30	158.28	8,828.36
2019-10-31	16,185.32	29,405.49
2019-11-30	38,989.17	49,982.63
2019-12-31	42,442.18	50,103.63

Date	Actual Consumption (kWh)	Predicted Consumption (kWh)
2019-01-31	61,903.38	58,353.32
2019-02-28	44,119.76	52,634.59
2019-03-31	44,909.12	45,036.40
2019-04-30	33,723.26	33,520.61
2019-05-31	16,595.75	17,811.03
2019-06-30	0.00	442.19
2019-07-31	0.00	90.67
2019-08-31	0.00	777.96
2019-09-30	158.28	214.55
2019-10-31	16,185.32	22,761.13
2019-11-30	38,989.17	47,305.26
2019-12-31	42,442.18	45,468.98

Feature	Importance
Preston Temperature in C	90.97%
Month	5.29%
Year	3.74%