import warnings
warnings.filterwarnings("ignore")
from datetime import datetime, timedelta
from openstef_dbc.log import logging
import numpy as np
import plotly.graph_objects as go
from app_settings import AppSettings
Settings = AppSettings()
logging.configure_logging(loglevel=Settings.loglevel, runtime_env=Settings.env)
from openstef.data_classes.prediction_job import PredictionJobDataClass
prediction_job = PredictionJobDataClass(
id=1337,
model='xgb',
quantiles=[0.1,0.3,0.5,0.7,0.9],
forecast_type="demand",
lat=52.0,
lon=5.0,
horizon_minutes=47*60,
resolution_minutes=15,
name="Example",
default_modelspecs=None,
)
import pandas as pd
data = pd.read_csv('data/example_input.csv', index_col='index', parse_dates=True)
split = 200
data_train = data.iloc[:-split,:]
data_test = data.iloc[-split:,:]
data_forecast = data.copy()
data_forecast["load"].iloc[-split:] = np.nan
data_train.head(5)
load | APX | clouds | radiation | temp | winddeg | windspeed | windspeed_100m | pressure | humidity | ... | sjv_E1A | sjv_E1B | sjv_E1C | sjv_E2A | sjv_E2B | sjv_E3A | sjv_E3B | sjv_E3C | sjv_E3D | sjv_E4A | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
index | |||||||||||||||||||||
2020-10-02 10:00:00+00:00 | 2.620000 | 34.0 | 99.758911 | 1.552899e+06 | 16.449036 | 154.711456 | 3.527778 | 9.349441 | 99453.476562 | 0.686240 | ... | 0.000031 | 0.000030 | 0.000029 | 0.000033 | 0.000032 | 0.000061 | 0.000048 | 0.000048 | 0.000031 | 0.0 |
2020-10-02 10:15:00+00:00 | 0.796667 | 34.0 | 99.819193 | 1.575618e+06 | 16.400948 | 157.491554 | 3.557639 | 9.232026 | 99416.363281 | 0.683780 | ... | 0.000032 | 0.000030 | 0.000029 | 0.000033 | 0.000032 | 0.000060 | 0.000048 | 0.000048 | 0.000031 | 0.0 |
2020-10-02 10:30:00+00:00 | 0.300000 | 34.0 | 99.879475 | 1.598338e+06 | 16.352859 | 160.271652 | 3.587500 | 9.114612 | 99379.250000 | 0.681319 | ... | 0.000032 | 0.000031 | 0.000029 | 0.000033 | 0.000031 | 0.000058 | 0.000048 | 0.000048 | 0.000031 | 0.0 |
2020-10-02 10:45:00+00:00 | 1.773333 | 34.0 | 99.939756 | 1.594736e+06 | 16.304771 | 163.051750 | 3.617361 | 8.997197 | 99342.136719 | 0.678859 | ... | 0.000032 | 0.000030 | 0.000029 | 0.000032 | 0.000031 | 0.000057 | 0.000048 | 0.000048 | 0.000031 | 0.0 |
2020-10-02 11:00:00+00:00 | 1.740000 | 28.8 | 100.000038 | 1.591135e+06 | 16.256683 | 165.831848 | 3.647222 | 8.879783 | 99305.023438 | 0.676398 | ... | 0.000031 | 0.000029 | 0.000027 | 0.000031 | 0.000030 | 0.000057 | 0.000048 | 0.000048 | 0.000031 | 0.0 |
5 rows × 25 columns
import plotly.graph_objects as go
figure = go.Figure()
figure.add_scatter(x=data_train.index, y=data_train["load"], name="Measured")
figure.update_layout(title="Historic load")
figure.show()
from openstef.pipeline.train_model import train_model_pipeline
train_model_pipeline(
prediction_job,
data_train,
check_old_model_age=False,
mlflow_tracking_uri=Settings.paths_mlflow_tracking_uri,
artifact_folder=Settings.paths_artifact_folder,
)
2023-02-04 12:30:09 [info ] Proloaf not available, setting constructor to None [openstef.model.model_creator] 2023-02-04 12:30:09 [warning ] feature_modules not an attribute of the old model, using None [MLflowSerializer] experiment_name=1337 2023-02-04 12:30:09 [info ] Model successfully loaded with MLflow [MLflowSerializer] 2023-02-04 12:30:09 [info ] Found 2 values of constant load (repeated values), converted to NaN value. [openstef.validation.validation] cleansing_step=repeated_values frac_values=0.00017972681524083394 num_values=2 pj_id=1337 2023-02-04 12:30:09 [info ] Removed 2 NaN values [openstef.validation.validation] num_removed_values=2 2023-02-04 12:30:15 [info ] Fitted a new model, not yet stored [root] 2023-02-04 12:30:17 [info ] New model is better than old model, continuing with training procces [openstef.pipeline.train_model] 2023-02-04 12:30:20 [info ] Model saved with MLflow [MLflowSerializer] experiment_name=1337 2023-02-04 12:30:22 [info ] Logged figures to MLflow. [MLflowSerializer] 2023-02-04 12:30:22 [info ] Writing reports to ./output/artifacts/1337 [openstef.metrics.reporter]
from openstef.pipeline.create_forecast import create_forecast_pipeline
forecast = create_forecast_pipeline(
prediction_job,
data_forecast,
mlflow_tracking_uri=Settings.paths_mlflow_tracking_uri,
)
2023-02-04 12:30:24 [warning ] feature_modules not an attribute of the old model, using None [MLflowSerializer] experiment_name=1337 2023-02-04 12:30:24 [info ] Model successfully loaded with MLflow [MLflowSerializer] 2023-02-04 12:30:24 [info ] Model successfully loaded with MLflow [MLflowSerializer] 2023-02-04 12:30:24 [info ] Found 202 values of constant load (repeated values), converted to NaN value. [openstef.validation.validation] cleansing_step=repeated_values frac_values=0.0178319209039548 num_values=202 pj_id=1337 2023-02-04 12:30:26 [info ] Postproces in preparation of storing [openstef.postprocessing.postprocessing]
import plotly.graph_objects as go
figure = go.Figure()
figure.add_scatter(x=data_test.index, y=data_test["load"], name="Measured")
figure.add_scatter(x=forecast.index, y=forecast["forecast"], name="Forecasted")
figure.update_layout(title="Forecast 48h")
figure.show()
from openstef_dbc.database import DataBase
database = DataBase(Settings)
pid = 321
database.get_prediction_job(pid)
2023-02-04 12:30:27 [info ] package: mysql.connector.plugins [mysql.connector.authentication] 2023-02-04 12:30:27 [info ] plugin_name: mysql_native_password [mysql.connector.authentication] 2023-02-04 12:30:27 [info ] AUTHENTICATION_PLUGIN_CLASS: MySQLNativePasswordAuthPlugin [mysql.connector.authentication]
PredictionJobDataClass(id=321, model='xgb', forecast_type='demand', horizon_minutes=2880, resolution_minutes=15, lat=52.067, lon=5.894, name='Location_B', train_components=True, description='Location_B_System_1+Location_B_System_2', quantiles=[0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95], train_split_func=None, backtest_split_func=None, train_horizons_minutes=None, default_modelspecs=None, save_train_forecasts=False, completeness_treshold=0.5, minimal_table_length=100, flatliner_treshold=24, depends_on=None, sid=None, turbine_type=None, n_turbines=None, hub_height=None, pipelines_to_run=[<PipelineType.TRAIN: 'train'>, <PipelineType.HYPER_PARMATERS: 'hyper_parameters'>, <PipelineType.FORECAST: 'forecast'>], alternative_forecast_model_pid=None)
now = datetime.utcnow()
data = database.get_model_input(
pid=pid,
datetime_start=now - timedelta(days=120),
datetime_end=now + timedelta(days=1),
location=(prediction_job.lat, prediction_job.lon),
).dropna()
data.head(5)
2023-02-04 12:30:28 [info ] Combining sources into single dataframe [Weather]
load | radiation | temp | windspeed | pressure | |
---|---|---|---|---|---|
2022-10-23 13:30:00+00:00 | -11.766667 | 315037.0 | 8.530 | 4.0600 | 69406.0 |
2022-10-23 13:45:00+00:00 | -7.733333 | 235796.0 | 7.715 | 4.2075 | 77971.0 |
2022-10-23 14:00:00+00:00 | -3.186667 | 156555.0 | 6.900 | 4.3550 | 86536.0 |
2022-10-23 14:15:00+00:00 | 1.106667 | 118011.5 | 6.085 | 4.5025 | 95101.0 |
2022-10-23 14:30:00+00:00 | 5.276667 | 79468.0 | 5.270 | 4.6500 | 103666.0 |
from openstef.tasks import train_model
train_model.main(config=Settings, database=database)
2023-02-04 12:30:29 [info ] Task started [openstef.tasks.utils.taskcontext] task=train_model 2023-02-04 12:30:29 [info ] Querying prediction jobs from database [openstef.tasks.utils.taskcontext] model_type=['xgb', 'xgb_quantile', 'lgb', 'linear', 'proloaf'] task=train_model 2023-02-04 12:30:29 [info ] Pre-loop completed [openstef.tasks.utils.taskcontext] ktp_checkpoint=pre-loop ktp_runtime=0.1 task=train_model 2023-02-04 12:30:29 [info ] Iteration started [openstef.tasks.utils.taskcontext] datetime_end=datetime.datetime(2023, 2, 4, 12, 30, 29, 832705) iteration=0 num_jobs=4 pid=317 task=train_model 2023-02-04 12:30:29 [info ] Added metadata to predictionjob completed [openstef.tasks.utils.taskcontext] iteration=0 ktp_checkpoint=Added metadata to PredictionJob ktp_runtime=0.0 task=train_model 2023-02-04 12:30:31 [info ] Combining sources into single dataframe [Weather] 2023-02-04 12:30:31 [info ] Retrieved timeseries input completed [openstef.tasks.utils.taskcontext] iteration=0 ktp_checkpoint=Retrieved timeseries input ktp_runtime=1.315 task=train_model 2023-02-04 12:30:31 [warning ] No old model found, training new model [openstef.pipeline.train_model] pid=317 2023-02-04 12:30:31 [info ] Found 1545 values of constant load (repeated values), converted to NaN value. [openstef.validation.validation] cleansing_step=repeated_values frac_values=0.13410294245291207 num_values=1545 pj_id=317 2023-02-04 12:30:31 [info ] Removed 1545 NaN values [openstef.validation.validation] num_removed_values=1545 2023-02-04 12:30:36 [info ] Fitted a new model, not yet stored [root]
2023/02/04 13:30:38 INFO mlflow.tracking.fluent: Experiment with name '317' does not exist. Creating a new experiment.
2023-02-04 12:30:38 [info ] No previous model found in MLflow [MLflowSerializer] experiment_name=317 2023-02-04 12:30:40 [info ] Model saved with MLflow [MLflowSerializer] experiment_name=317 2023-02-04 12:30:42 [info ] Logged figures to MLflow. [MLflowSerializer] 2023-02-04 12:30:42 [info ] Writing reports to ./output/artifacts/317 [openstef.metrics.reporter] 2023-02-04 12:30:43 [info ] Model trained completed [openstef.tasks.utils.taskcontext] iteration=0 ktp_checkpoint=Model trained ktp_runtime=12.166 task=train_model 2023-02-04 12:30:43 [info ] Iteration completed [openstef.tasks.utils.taskcontext] iteration=0 ktp_runtime=13.485 ktp_successful=1 task=train_model 2023-02-04 12:30:43 [info ] Iteration started [openstef.tasks.utils.taskcontext] datetime_end=datetime.datetime(2023, 2, 4, 12, 30, 29, 832705) iteration=1 num_jobs=4 pid=313 task=train_model 2023-02-04 12:30:43 [info ] Added metadata to predictionjob completed [openstef.tasks.utils.taskcontext] iteration=1 ktp_checkpoint=Added metadata to PredictionJob ktp_runtime=0.0 task=train_model 2023-02-04 12:30:44 [info ] Combining sources into single dataframe [Weather] 2023-02-04 12:30:44 [info ] Retrieved timeseries input completed [openstef.tasks.utils.taskcontext] iteration=1 ktp_checkpoint=Retrieved timeseries input ktp_runtime=1.31 task=train_model 2023-02-04 12:30:44 [warning ] No old model found, training new model [openstef.pipeline.train_model] pid=313 2023-02-04 12:30:44 [info ] Found 1545 values of constant load (repeated values), converted to NaN value. [openstef.validation.validation] cleansing_step=repeated_values frac_values=0.13410294245291207 num_values=1545 pj_id=313 2023-02-04 12:30:44 [info ] Removed 1545 NaN values [openstef.validation.validation] num_removed_values=1545 2023-02-04 12:30:49 [info ] Fitted a new model, not yet stored [root]
2023/02/04 13:30:50 INFO mlflow.tracking.fluent: Experiment with name '313' does not exist. Creating a new experiment.
2023-02-04 12:30:50 [info ] No previous model found in MLflow [MLflowSerializer] experiment_name=313 2023-02-04 12:30:53 [info ] Model saved with MLflow [MLflowSerializer] experiment_name=313 2023-02-04 12:30:55 [info ] Logged figures to MLflow. [MLflowSerializer] 2023-02-04 12:30:55 [info ] Writing reports to ./output/artifacts/313 [openstef.metrics.reporter] 2023-02-04 12:30:56 [info ] Model trained completed [openstef.tasks.utils.taskcontext] iteration=1 ktp_checkpoint=Model trained ktp_runtime=11.813 task=train_model 2023-02-04 12:30:56 [info ] Iteration completed [openstef.tasks.utils.taskcontext] iteration=1 ktp_runtime=13.126 ktp_successful=1 task=train_model 2023-02-04 12:30:56 [info ] Iteration started [openstef.tasks.utils.taskcontext] datetime_end=datetime.datetime(2023, 2, 4, 12, 30, 29, 832705) iteration=2 num_jobs=4 pid=459 task=train_model 2023-02-04 12:30:56 [info ] Added metadata to predictionjob completed [openstef.tasks.utils.taskcontext] iteration=2 ktp_checkpoint=Added metadata to PredictionJob ktp_runtime=0.0 task=train_model 2023-02-04 12:30:57 [info ] Combining sources into single dataframe [Weather] 2023-02-04 12:30:57 [info ] Retrieved timeseries input completed [openstef.tasks.utils.taskcontext] iteration=2 ktp_checkpoint=Retrieved timeseries input ktp_runtime=1.291 task=train_model 2023-02-04 12:30:57 [warning ] No old model found, training new model [openstef.pipeline.train_model] pid=459 2023-02-04 12:30:57 [info ] Found 1545 values of constant load (repeated values), converted to NaN value. [openstef.validation.validation] cleansing_step=repeated_values frac_values=0.13410294245291207 num_values=1545 pj_id=459 2023-02-04 12:30:57 [info ] Removed 1545 NaN values [openstef.validation.validation] num_removed_values=1545 2023-02-04 12:31:49 [info ] Fitted a new model, not yet stored [root]
2023/02/04 13:31:51 INFO mlflow.tracking.fluent: Experiment with name '459' does not exist. Creating a new experiment.
2023-02-04 12:31:51 [info ] No previous model found in MLflow [MLflowSerializer] experiment_name=459 2023-02-04 12:31:53 [info ] Model saved with MLflow [MLflowSerializer] experiment_name=459 2023-02-04 12:31:55 [info ] Logged figures to MLflow. [MLflowSerializer] 2023-02-04 12:31:55 [info ] Writing reports to ./output/artifacts/459 [openstef.metrics.reporter] 2023-02-04 12:31:56 [info ] Model trained completed [openstef.tasks.utils.taskcontext] iteration=2 ktp_checkpoint=Model trained ktp_runtime=59.099 task=train_model 2023-02-04 12:31:56 [info ] Iteration completed [openstef.tasks.utils.taskcontext] iteration=2 ktp_runtime=60.396 ktp_successful=1 task=train_model 2023-02-04 12:31:56 [info ] Iteration started [openstef.tasks.utils.taskcontext] datetime_end=datetime.datetime(2023, 2, 4, 12, 30, 29, 832705) iteration=3 num_jobs=4 pid=321 task=train_model 2023-02-04 12:31:56 [info ] Added metadata to predictionjob completed [openstef.tasks.utils.taskcontext] iteration=3 ktp_checkpoint=Added metadata to PredictionJob ktp_runtime=0.0 task=train_model 2023-02-04 12:31:57 [info ] Combining sources into single dataframe [Weather] 2023-02-04 12:31:57 [info ] Retrieved timeseries input completed [openstef.tasks.utils.taskcontext] iteration=3 ktp_checkpoint=Retrieved timeseries input ktp_runtime=0.955 task=train_model 2023-02-04 12:31:57 [warning ] No old model found, training new model [openstef.pipeline.train_model] pid=321 2023-02-04 12:31:57 [info ] Found 1545 values of constant load (repeated values), converted to NaN value. [openstef.validation.validation] cleansing_step=repeated_values frac_values=0.13410294245291207 num_values=1545 pj_id=321 2023-02-04 12:31:57 [info ] Removed 1545 NaN values [openstef.validation.validation] num_removed_values=1545 2023-02-04 12:32:03 [info ] Fitted a new model, not yet stored [root]
2023/02/04 13:32:04 INFO mlflow.tracking.fluent: Experiment with name '321' does not exist. Creating a new experiment.
2023-02-04 12:32:04 [info ] No previous model found in MLflow [MLflowSerializer] experiment_name=321 2023-02-04 12:32:06 [info ] Model saved with MLflow [MLflowSerializer] experiment_name=321 2023-02-04 12:32:08 [info ] Logged figures to MLflow. [MLflowSerializer] 2023-02-04 12:32:08 [info ] Writing reports to ./output/artifacts/321 [openstef.metrics.reporter] 2023-02-04 12:32:10 [info ] Model trained completed [openstef.tasks.utils.taskcontext] iteration=3 ktp_checkpoint=Model trained ktp_runtime=12.173 task=train_model 2023-02-04 12:32:10 [info ] Iteration completed [openstef.tasks.utils.taskcontext] iteration=3 ktp_runtime=13.133 ktp_successful=1 task=train_model 2023-02-04 12:32:10 [info ] Loop completed [openstef.tasks.utils.taskcontext] jobs_started=4 jobs_successful=4 jobs_unsuccessful=0 ktp_checkpoint=loop ktp_runtime=100.148 num_jobs=4 successful=0 task=train_model 2023-02-04 12:32:10 [info ] Task completed [openstef.tasks.utils.taskcontext] ktp_runtime=100.25 ktp_successful=1 task=train_model