1 Libraries

library(httr)
library(data.table)
library(dplyr)
library(lubridate)
library(knitr)
library(highcharter)
library(DT)
library(caret)
library(tibble) 
library(rsample)   
library(jtools)



2 Daten importieren und vorbereiten

2.1 Meteorologische Daten

fread("https://data.geo.admin.ch/ch.meteoschweiz.klima/nbcn-tageswerte/nbcn-daily_BAS_previous.csv", sep = ";", colClasses = c("character", "Date", rep("numeric", 10))) %>%
  mutate(
    timestamp = as.POSIXct(
      paste0(substr(date, 1, 4), "-", substr(date, 5, 6), "-", substr(date, 7, 8), " 00:00:00"),
      format="%Y-%m-%d %H:%M:%S"
    )
  ) %>%
  mutate_at(c("gre000d0", "hto000d0", "nto000d0", "prestad0", "rre150d0", "sre000d0", "tre200d0", "tre200dn", "tre200dx", "ure200d0"), as.numeric) %>%
  bind_rows(
    fread("https://data.geo.admin.ch/ch.meteoschweiz.klima/nbcn-tageswerte/nbcn-daily_BAS_current.csv", sep = ";", colClasses = c("character", "Date", rep("numeric", 10))) %>%
      mutate(
        timestamp = as.POSIXct(
          paste0( substr(date, 1, 4), "-", substr(date, 5, 6), "-", substr(date, 7, 8), " 00:00:00"),
          format="%Y-%m-%d  %H:%M:%S", tz="Europe/Berlin"
        )
      ) %>%
      mutate_at(c("gre000d0", "hto000d0", "nto000d0", "prestad0", "rre150d0", "sre000d0", "tre200d0", "tre200dn", "tre200dx", "ure200d0"), as.numeric)
  ) %>%
  relocate(timestamp) %>%
  select(-c(date, `station/location`)) %>%
  mutate(
    year = as.numeric(substr(timestamp, 1, 4)),
    month = as.numeric(substr(timestamp, 6, 7)),
    day = as.numeric(substr(timestamp, 9, 10))
  ) %>%
  data.frame() %>%
  assign("meteo", ., inherits = TRUE)



2.2 Feiertage, Ferien und Veranstaltungen

httr::GET("https://data.bs.ch/explore/dataset/100074/download/?format=csv&timezone=Europe%2FBerlin") %>%
  content(., "text") %>%
  fread(sep = ";") %>%
  select(tag_datum, name, code, kategorie_name) %>%
  filter(name != "Fasnachtsmontag", name != "Fasnachtsmittwoch", name != "Dies Academicus") %>%
  filter(!(tag_datum == "2008-05-01 00:00:00" & name == "Tag der Arbeit")) %>% # Tag der Arbeit doubles with Auffahrt
  filter(kategorie_name %in% c("Feiertag", "Ferien") | code == "herbstm") %>%
  filter(name != "Semesterferien") %>%
  assign("rd_veranst", ., inherits = TRUE)

rd_veranst %>%
  data.frame() %>%
  mutate(Herbstmesse = if_else(code == "herbstm", "Herbstmesse", "")) %>%
  select(tag_datum, Herbstmesse) %>%
  filter(Herbstmesse != "") %>%
  full_join(
    rd_veranst %>%
      mutate(Feiertage = if_else(kategorie_name == "Feiertag", name, "")) %>%
      select(tag_datum, Feiertage) %>%
      filter(Feiertage != ""),
    by = "tag_datum"
  ) %>%
  full_join(
    rd_veranst %>%
      mutate(Ferien = if_else(kategorie_name == "Ferien", name, "")) %>%
      select(tag_datum, Ferien) %>%
      filter(Ferien != ""),
    by = "tag_datum"
  ) %>%
  mutate(timestamp = as.POSIXct(tag_datum, tz="Europe/Berlin")) %>%
  filter(year(timestamp) > 2011) %>%
  select(timestamp, Herbstmesse, Feiertage, Ferien) %>%
  mutate(
    year = lubridate::year(timestamp), 
    month = lubridate::month(lubridate::floor_date(timestamp, "month")),
    day = lubridate::day(lubridate::floor_date(timestamp, "day"))
  ) %>%
  data.frame() %>%
  assign("Veranstaltungen", ., inherits = TRUE)

rm(events, rd_veranst)



2.3 Gasverbrauchs-Daten

httr::GET("https://data.bs.ch/explore/dataset/100304/download/?format=csv&timezone=Europe%2FBerlin")  %>%
  content(., "text") %>%
  fread(sep = ";") %>%
  group_by(year, month, day) %>%
  filter(year > 2011) %>%
  summarise(gasverbrauch = sum(value, na.rm = T)) %>%
  ungroup() %>%
  mutate(timestamp = as.POSIXct(
    paste0(year, "-", month, "-", day, " 00:00:00"), format="%Y-%m-%d  %H:%M:%S", tz="Europe/Berlin"
    )
  ) %>%
  mutate(gasverbrauch=gasverbrauch/1000000) %>%
  relocate(timestamp) %>%
  assign("gas_daily", ., inherits = TRUE)



2.4 Datensätze zusammenfügen

meteo %>%
  full_join(Veranstaltungen %>%
              select(-timestamp), by = c("year" = "year", "month" = "month", "day" = "day")) %>%
  full_join(gas_daily %>%
              select(-timestamp), by = c("year" = "year", "month" = "month", "day" = "day")) %>%
    mutate(weekday = lubridate::wday(as.Date(timestamp), label = TRUE, abbr = TRUE),
         daytype = if_else(weekday %in% c(1,7), "Wochenende", "Werktage")) %>%
  relocate(timestamp, gasverbrauch) %>%
  filter(!is.na(gasverbrauch)) %>%
  mutate(HGT = if_else(tre200d0 <= 12, 20-tre200d0, 0)) %>%
  mutate(Herbstmesse = if_else(is.na(Herbstmesse), "No", Herbstmesse),
         Feiertage = if_else(is.na(Feiertage), "No", Feiertage),
         Ferien = if_else(is.na(Ferien), "No", Ferien)) %>%
  mutate(time = as.Date(timestamp, tz = 'Europe/Berlin')) %>%
  select(-timestamp) %>%
  relocate(time) %>%
  slice(1:(n() - 1)) %>%              
  assign("Data", ., inherits = TRUE)

# Variable für Energiespar-Kampagnen
start_date= as.Date("2022-09-01")
end_date= as.Date("2023-01-31")

Data %>%
  mutate(time = as.Date(time),
         Feiertage_dummy = if_else(Feiertage == "No", 0, 1),
         Ferien_dummy = if_else(Ferien == "No", 0, 1),
         Herbstmesse_dummy = if_else(Herbstmesse == "No", 0, 1),
         Wochenende_dummy = if_else(daytype == "Werktage", 0, 1),
         Energiespar_Kampagnen = ifelse(as.Date(time) %within% interval(start_date, end_date), 1,0),
         year_month = paste(year, 
                            formatC(month, width = 2, format = "d", flag = "0"),
                            sep = "-"),
         Energiespar_Kampagnen_monatlich = ifelse(Energiespar_Kampagnen == 1, as.character(year_month), "0"),
         weekday = factor(weekday, ordered = FALSE)
  ) %>%
  slice(1:(n() - 1)) %>% # Unvollständigen Tag für den Gasverbrauch entfernen.
  assign("Data_selec", ., inherits = TRUE)



3 OLS Regression

Data_selec_model <- Data_selec %>%
  filter(time < as.Date("2025-09-30"))

set.seed(12345)

inTraining <- createDataPartition(Data_selec_model$gasverbrauch, p = .7, list = FALSE)
training <- Data_selec_model[ inTraining,]
testing  <- Data_selec_model[-inTraining,]

fitControl <- trainControl(method = "repeatedcv",
                           number = 10,
                           repeats = 10)

set.seed(54321)
ols_Fit1 <- train(gasverbrauch ~ time + as.factor(month) + weekday +
                  as.factor(Ferien_dummy) + as.factor(Feiertage_dummy) + as.factor(Herbstmesse_dummy) +
                  gre000d0 + hto000d0 + nto000d0 + prestad0 + rre150d0 + 
                  sre000d0 +
                  tre200d0 + tre200dn + tre200dx + ure200d0 +
                  Energiespar_Kampagnen +
                  HGT +
                  I(HGT^2) + I(tre200d0^2),
                  data = training, 
                  method = "lm",
                  trControl = fitControl,
                  verbose = TRUE)
# ols_Fit1
# ols_Fit1$resample
# summary(ols_Fit1)

3.1 Leistung cross-validation

ols_Fit1$results %>% 
  round(digits = 3) -> ols_Fit1$results


data.frame(
  RMSE = paste0(ols_Fit1$results[2], " &pm; ", ols_Fit1$results[5]),
  Rsquared = paste0(ols_Fit1$results[3], " &pm; ", ols_Fit1$results[6]),
  MAE = paste0(ols_Fit1$results[4], " &pm; ", ols_Fit1$results[7])
) %>%
  kable()
RMSE Rsquared MAE
0.904 ± 0.069 0.967 ± 0.005 0.695 ± 0.055

3.2 Leistung Test

testing %>%
  mutate(pred = predict(ols_Fit1, newdata = testing)) %>%
  select(obs = gasverbrauch, pred) %>%
  defaultSummary() %>%
  round(digits = 3) -> performance_test

data.frame(
  RMSE = performance_test[1],
  Rsquared = performance_test[2],
  MAE = performance_test[3]
) %>%
  kable(row.names = F, align = "lll") 
RMSE Rsquared MAE
0.9 0.967 0.705

3.3 Endgültiges Modell

final_model <- lm(gasverbrauch ~ time + as.factor(month) + weekday +
                  as.factor(Ferien_dummy) + as.factor(Feiertage_dummy) + as.factor(Herbstmesse_dummy) +
                  gre000d0 + #hto000d0 + nto000d0 + 
                  prestad0 + #rre150d0 + sre000d0 + 
                  tre200d0 + tre200dn + 
                  # tre200dx + 
                  ure200d0 +
                  Energiespar_Kampagnen +
                  HGT + #I(HGT^2) + 
                  I(tre200d0^2),
                  data = Data_selec_model)


summ(final_model,
     model.info = T,
     model.fit = F,
     digits = getOption("jtools-digits", 3),
     stars = T,
     robust=T
)
Observations 1490
Dependent variable gasverbrauch
Type OLS linear regression
Est. S.E. t val. p
(Intercept) 31.160 4.143 7.522 0.000
time -0.001 0.000 -21.872 0.000
as.factor(month)2 -0.744 0.141 -5.278 0.000
as.factor(month)3 -1.511 0.141 -10.740 0.000
as.factor(month)4 -2.726 0.166 -16.411 0.000
as.factor(month)5 -3.451 0.172 -20.107 0.000
as.factor(month)6 -3.304 0.185 -17.817 0.000
as.factor(month)7 -3.041 0.199 -15.265 0.000
as.factor(month)8 -3.524 0.182 -19.316 0.000
as.factor(month)9 -3.475 0.170 -20.392 0.000
as.factor(month)10 -2.623 0.160 -16.377 0.000
as.factor(month)11 -1.370 0.167 -8.177 0.000
as.factor(month)12 -0.095 0.155 -0.616 0.538
weekdayMo 0.029 0.088 0.330 0.741
weekdayDi 0.005 0.089 0.055 0.956
weekdayMi 0.005 0.085 0.059 0.953
weekdayDo -0.087 0.085 -1.034 0.301
weekdayFr -0.593 0.088 -6.703 0.000
weekdaySa -0.641 0.087 -7.369 0.000
as.factor(Ferien_dummy)1 -0.421 0.070 -6.015 0.000
as.factor(Feiertage_dummy)1 -0.525 0.135 -3.886 0.000
as.factor(Herbstmesse_dummy)1 -0.921 0.156 -5.923 0.000
gre000d0 -0.003 0.001 -4.487 0.000
prestad0 0.012 0.004 3.001 0.003
tre200d0 -0.739 0.034 -21.534 0.000
tre200dn -0.075 0.020 -3.797 0.000
ure200d0 -0.009 0.004 -2.354 0.019
Energiespar_Kampagnen -1.692 0.103 -16.409 0.000
HGT 0.049 0.013 3.864 0.000
I(tre200d0^2) 0.016 0.001 21.023 0.000
Standard errors: Robust, type = HC3
RMSE Rsquared MAE
0.879 0.968 0.681



Data_selec %>%
  bind_cols(
    predict(final_model, Data_selec, interval = "prediction")
  ) %>%
  slice(1:(n() - 1)) -> Data_model_ols

highchart(type = "stock") %>%
  hc_add_series(Data_model_ols, "line", hcaes(time, gasverbrauch), color = "#008AC3",
                tooltip = list(pointFormat = "Effektiver Gasverbrauch: {point.gasverbrauch:.2f} GWh",
                               shared = TRUE),
                zIndex = 1) %>%
  hc_xAxis(title = list(text = "")) %>%
  hc_add_series(Data_model_ols, "line", hcaes(time, fit), color = "#B375AB",
                tooltip = list(pointFormat = "Erwarteter Gasverbrauch: {point.fit:.2f} GWh",
                               shared = TRUE),
                zIndex = 2) %>%
  hc_add_series(Data_model_ols, type = "arearange",
                hcaes(x = time, low = lwr, high = upr),
                zIndex = 0,
                color = "#E7CEE2",
                tooltip = list(pointFormat = "95% Konfidenzintervall: {point.lwr:.2f} - {point.upr:.2f} GWh"), shared = TRUE
  ) %>%
  hc_yAxis(floor=0, title = list(text = ""), opposite = FALSE) %>%
  hc_plotOptions(series = list(marker = list(enabled = FALSE))) %>%
  hc_rangeSelector(selected = 0)
write.csv2(Data_model_ols %>%
             mutate(vgl_real_minus_forecast = gasverbrauch - fit) %>%
             select(
               time,
               gasverbrauch,
               forecast = fit,
               vgl_real_minus_forecast,
               forecast_lowFI = lwr,
               forecast_highFI = upr
             ),
           "100353.csv", row.names=F, na = "")




Hinweis: Je nach Kombination von Betriebssystemen und Versionen von RStudio, R und den verwendeten Pakete können die Ergebnisse leicht von den publizierten Resultaten abweichen. Die angewendete Konfiguration lautet:


sessionInfo()
## R version 4.3.3 (2024-02-29 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=German_Switzerland.utf8  LC_CTYPE=German_Switzerland.utf8   
## [3] LC_MONETARY=German_Switzerland.utf8 LC_NUMERIC=C                       
## [5] LC_TIME=German_Switzerland.utf8    
## 
## time zone: Europe/Zurich
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] jtools_2.3.0      rsample_1.3.1     tibble_3.2.1      caret_7.0-1      
##  [5] lattice_0.22-7    ggplot2_3.5.2     DT_0.33           highcharter_0.9.4
##  [9] knitr_1.50        lubridate_1.9.4   dplyr_1.1.4       data.table_1.17.0
## [13] httr_1.4.7       
## 
## loaded via a namespace (and not attached):
##  [1] pROC_1.18.5          sandwich_3.1-1       rlang_1.1.5         
##  [4] magrittr_2.0.3       furrr_0.3.1          compiler_4.3.3      
##  [7] systemfonts_1.2.2    vctrs_0.6.5          reshape2_1.4.4      
## [10] stringr_1.5.1        pkgconfig_2.0.3      fastmap_1.2.0       
## [13] backports_1.5.0      pander_0.6.6         rmarkdown_2.29      
## [16] prodlim_2024.06.25   purrr_1.0.4          xfun_0.52           
## [19] cachem_1.1.0         jsonlite_2.0.0       recipes_1.3.1       
## [22] broom_1.0.9          parallel_4.3.3       R6_2.6.1            
## [25] bslib_0.9.0          stringi_1.8.7        RColorBrewer_1.1-3  
## [28] rlist_0.4.6.2        parallelly_1.43.0    rpart_4.1.24        
## [31] jquerylib_0.1.4      Rcpp_1.0.14          bookdown_0.44       
## [34] assertthat_0.2.1     iterators_1.0.14     future.apply_1.11.3 
## [37] zoo_1.8-13           Matrix_1.6-5         splines_4.3.3       
## [40] nnet_7.3-20          igraph_2.1.4         timechange_0.3.0    
## [43] tidyselect_1.2.1     rstudioapi_0.17.1    dichromat_2.0-0.1   
## [46] yaml_2.3.10          timeDate_4041.110    codetools_0.2-20    
## [49] curl_6.2.2           listenv_0.9.1        plyr_1.8.9          
## [52] quantmod_0.4.28      withr_3.0.2          evaluate_1.0.4      
## [55] future_1.40.0        survival_3.8-3       xml2_1.3.8          
## [58] xts_0.14.1           pillar_1.11.0        foreach_1.5.2       
## [61] stats4_4.3.3         generics_0.1.4       TTR_0.24.4          
## [64] scales_1.4.0         globals_0.18.0       class_7.3-23        
## [67] glue_1.8.0           tools_4.3.3          ModelMetrics_1.2.2.2
## [70] gower_1.0.2          forcats_1.0.0        grid_4.3.3          
## [73] tidyr_1.3.1          ipred_0.9-15         nlme_3.1-168        
## [76] cli_3.6.3            kableExtra_1.4.0     viridisLite_0.4.2   
## [79] svglite_2.1.3        lava_1.8.1           gtable_0.3.6        
## [82] broom.mixed_0.2.9.6  sass_0.4.9           digest_0.6.37       
## [85] htmlwidgets_1.6.4    farver_2.1.2         htmltools_0.5.8.1   
## [88] lifecycle_1.0.4      hardhat_1.4.2        MASS_7.3-60.0.1


Gasverbrauch im Versorgungsgebiet der IWB: https://data.bs.ch/explore/dataset/100304/

Effektiver und erwarteter täglicher Gasverbrauch: https://data.bs.ch/explore/dataset/100353/

Der Code des Modells kann selber ausgeführt und weiterentwickelt werden. Hierfür wird Renku verwendet. Renku ist eine Plattform, die verschiedene Werkzeuge für reproduzierbare und kollaborative Datenanalyseprojekte bündelt: https://renkulab.io/projects/stata/reproducible/erwarteter-gasverbrauch-basel-stadt

Webartikel: https://statistik.bs.ch/artikel/gasverbrauch