commits_2024_09_alphar.txt

﻿5562d69934df98736b4e037b9e68ac422e5b2e1a MislavSag Wed Sep 4 13:23:46 2024 +0000 update
diff --git a/R/exlusion_list.R b/R/exlusion_list.R
new file mode 100644
index 0000000..116908d
--- /dev/null
+++ b/R/exlusion_list.R
@@ -0,0 +1,84 @@
+library(data.table)
+library(tabulizer)
+library(stringr)
+library(AzureStor)
+library(xts)
+
+
+# PDF url
+url = "https://www.robeco.com/files/docm/docu-exclusion-list.pdf"
+
+# Get PDF tables
+result = list()
+for (i in 2:5){
+  out = as.data.table(extract_tables(url, page = i, method = 'stream'))
+  result[[i-1]] = out
+}
+
+# Remove all empty rows
+dt = lapply(result, function(x) {
+  ns = unlist(x[, lapply(.SD, function(y) all(y == ''))], use.names = TRUE)
+  col_ = names(ns[ns == TRUE])
+  x[, (col_) := NULL]
+  x
+})
+
+# Check if all values in column are numbers. If yes, bind with next column
+dt = lapply(dt, function(x) {
+  vector_delete = c()
+  for (i in 1:(ncol(x)-1)) {
+    if (all(str_detect(x[[i]], "^[0-9]+$"))) {
+      x[[i]] = paste0(x[[i]], " ", x[[i+1]])
+      vector_delete = c(vector_delete, i+1)
+    }
+  }
+  x[, (colnames(x)[vector_delete]) := NULL]
+  x
+})
+dt[[2]]
+
+# Make multicolumn data.table one column data.table
+dt = lapply(dt, function(x) {
+  if (ncol(x) > 1) {
+    x = melt(x, measure.vars = colnames(x))
+  }
+  x[, 2]
+})
+dt = rbindlist(dt)
+dt = dt[value != ""]
+
+# Check if row contain number
+dt[, number := str_extract(value, "^[0-9]+")]
+dt[200:250]
+dt[250:300]
+dt[, meta_logical := is.na(number) &
+     ((shift(number, 1, type = "lead") == 1 & !grepl("level", value, ignore.case = TRUE)) |
+        grepl("level", shift(value, 1, type = "lead"), ignore.case = TRUE))]
+dt[meta_logical == TRUE, meta := value]
+dt[, meta := na.locf(meta)]
+dt = dt[, .(meta, company = value)]
+
+# Check
+dt[130:200]
+
+# Remove first numbersequence tokenized by spce form comapny column
+dt[, company := str_remove(company, "^[0-9]+\\s+")]
+
+# Remove meta meta
+dt = dt[meta != company]
+
+# Remove all rows that contain onlz umbers in company column
+dt = dt[!grepl("^[0-9]+$", company)]
+
+# Exclude list
+dt[, unique(meta)]
+pat = "Controversial weapons|Thermal coal mining|Thermal coal power|Coal power expansion plans"
+dt[, exclude := grepl(pat, meta)]
+
+# Save
+fwrite(dt, "F:/meta/exclusion_list.csv")
+
+# Add to Azure
+bl_endp_key = storage_endpoint(Sys.getenv("BLOB-ENDPOINT-SNP"), Sys.getenv("BLOB-KEY-SNP"))
+cont = storage_container(bl_endp_key, "qc-live")
+storage_write_csv(dt, cont, "exclusion_list.csv")
diff --git a/R/exuber_v4.R b/R/exuber_v4.R
index 7d3a301..d798e3c 100644
--- a/R/exuber_v4.R
+++ b/R/exuber_v4.R
@@ -10,6 +10,8 @@ library(runner)
 library(glue)
 library(tsDyn)
 library(AzureStor)
+library(gausscov)
+library(readr)
 # library(vars)
 # library(ggplot2)
 
@@ -639,8 +641,6 @@ charts.PerformanceSummary(backtest_xts)
 
 
 # ROLLING GAUSSCOV --------------------------------------------------------
-library(gausscov)
-
 # Prepare data
 dt = copy(backtest_data)
 dt[, y := shift(close, n = 1, type = "lead") / close - 1]
@@ -716,6 +716,7 @@ dates = f1st_res_dt[, unique(date)]
 min_date = dt[, min(date)]
 predictions = vector("numeric", length(dates))
 for (i in seq_along(dates)) {
+  # i = 1000
   sample_dt = dt[date %between% c(min_date, dates[i]),
                  .SD,
                  .SDcols = c("y", na.omit(f1st_res_dt[date == dates[i]]$cols))]
@@ -730,21 +731,25 @@ predictions_dt = merge(dt[, .(date, close)], predictions_dt, by = "date")
 # Backtest
 predictions_dt[, signal := predictions > 0]
 predictions_dt[, spy := close / shift(close) - 1]
-predictions_dt[, strategy := spy * shift(signal)]
+predictions_dt[, strategy := spy * shift(signal, 1)]
+predictions_dt = predictions_dt[date > as.Date("2002-10-01")] # to match QC backtest
 backtest_xts = as.xts.data.table(predictions_dt[, .(date, spy, strategy)])
 charts.PerformanceSummary(backtest_xts)
-charts.PerformanceSummary(backtest_xts[35000:nrow(backtest_xts)])
+# charts.PerformanceSummary(backtest_xts[35000:nrow(backtest_xts)])
+# charts.PerformanceSummary(backtest_xts[2180:nrow(backtest_xts)]) # same as QC
 
 # Save for QC backtesting
 qc_data = predictions_dt[, .(date, signal)]
 qc_data = na.omit(qc_data)
-qc_data[, date := as.character(date)]
+qc_data[, let(
+  date = as.character(date),
+  signal = as.integer(signal)
+)]
 bl_endp_key = storage_endpoint(Sys.getenv("BLOB-ENDPOINT-SNP"), Sys.getenv("BLOB-KEY-SNP"))
 cont = storage_container(bl_endp_key, "qc-backtest")
 storage_write_csv(qc_data, cont, "exuber_gausscov.csv", col_names = FALSE)
 
 
-
 # TVAR --------------------------------------------------------------------
 # prepare data2
 varvar = "sd_radf_sum" # kurtosis_bsadf_log, sd_radf_sum
@@ -888,8 +893,8 @@ tvar_backtest <- function(tvar_res_i) {
       sides[i] <- 0
     } else if (indicator[i-1] > threshold_1[i-1] & coef_ret_1_middle[i-1] < 0 & coef_ret_2_middle[i-1] < 0) {
       sides[i] <- 0
-    # } else if (indicator[i-1] < threshold_1[i-1] & coef_ret_1_down[i-1] < 0 & coef_ret_2_down[i-1] < 0 & coef_ret_3_down[i-1] < 0) {
-    #   sides[i] <- 0
+      # } else if (indicator[i-1] < threshold_1[i-1] & coef_ret_1_down[i-1] < 0 & coef_ret_2_down[i-1] < 0 & coef_ret_3_down[i-1] < 0) {
+      #   sides[i] <- 0
     } else {
       sides[i] <- 1
     }
diff --git a/R/fund_price_disbalanse.R b/R/fund_price_disbalanse.R
index 4bed0f1..8f7aa46 100644
--- a/R/fund_price_disbalanse.R
+++ b/R/fund_price_disbalanse.R
@@ -171,9 +171,10 @@ backtest = function(uni,
   # DEBUG
   # uni[, .(symbol, date, date_month, target)][date_month == as.Date("2024-01-31")]
   if (ret_sharpe) {
-    uni = uni[, .(ret = sum(target * (1 / length(target)))), by = date_month]
-    # sr = PerformanceAnalytics::SharpeRatio(as.xts.data.table(uni))
-    sr = PerformanceAnalytics::SortinoRatio(as.xts.data.table(uni))
+    uni[, weight := 1 / length(target), by = date_month]
+    uni = uni[, .(ret = sum(target * weight)), by = date_month]
+    sr = PerformanceAnalytics::SharpeRatio(as.xts.data.table(uni))
+    # sr = PerformanceAnalytics::SortinoRatio(as.xts.data.table(uni))
     # sr = Return.annualized(as.xts.data.table(uni))
     return(sr[1, ])
   } else {
@@ -181,11 +182,11 @@ backtest = function(uni,
   }
 }
 params = expand.grid(
-  eps_thresh = c(-100, 0),         # min EPS to include stock in universe
+  eps_thresh = c(-100, 0),         # min EPS to include stock in universe. -100 means all.
   close_raw_thresh = c(1, 10, 20), # min price to include stock in universe
   epsg_thresh = c(100, 200, 500),  # how many stocks to possibly include in universe
   return_mom = cols[c(1, 6, 12)],  # return period to calculate to identify mean reversion
-  eps_n = cols_epsg[c(1, 3, 6)],   # number of months to calculate EPS SD
+  eps_n = cols_epsg[c(1, 3, 6, 9)],# number of months to calculate EPS SD
   mom_n = c(10, 20, 50),           # number of stocks to include in universe
   coarse_n = c(1000, 2000),        # number of stocks to include in coarse universe
   rev_positive = c(TRUE, FALSE),   # include only stocks with positive revenue growth
@@ -204,7 +205,7 @@ results[ind_]
 # Check best backtest
 best_ = backtest(universe, params[ind_, 1], params[ind_, 2], params[ind_, 3],
                   params[ind_, 4], params[ind_, 5], params[ind_, 6],
-                 params[ind_, 7], FALSE)
+                 params[ind_, 7], params[ind_, 8], FALSE)
 best_ret = best_[, .(ret = sum(target * (1 / length(target)))), by = date_month]
 best_xts = as.xts.data.table(best_ret[, .(date_month, ret)])
 Return.annualized(best_xts)
diff --git a/R/ib_history.R b/R/ib_history.R
new file mode 100644
index 0000000..6531bb3
--- /dev/null
+++ b/R/ib_history.R
@@ -0,0 +1,68 @@
+library(ibrestr)
+library(data.table)
+library(AzureStor)
+
+
+# Init IB
+ib = IB$new(
+  host = "cgspaperpra.eastus.azurecontainer.io",
+  port = 5000,
+  strategy_name = "Least Volatile Local",
+  account_id = "DU6474915",
+  email_config = list(
+    email_from = "mislav.sagovac@contentio.biz",
+    email_to = "mislav.sagovac@contentio.biz",
+    smtp_host = "mail.contentio.biz",
+    smtp_port = 587,
+    smtp_user = "mislav.sagovac@contentio.biz",
+    smtp_password = "s8^t5?}r-x&Q"
+  ),
+  logger = NULL
+)
+
+# Check gateway
+ib$check_gateway()
+
+# Find VIX conid
+vix_symbols = ib$search_contract_by_symbol("VIX")
+vix_symbols[[1]]
+
+# Get history data for VIX
+dates = seq.POSIXt(as.POSIXct("2005-09-01 16:00:00"), Sys.time(), by = "130 days")
+vix_history = list()
+for (i in seq_along(dates)) {
+  print(dates[i])
+  test_ = tryCatch({
+    vix_history[[i]] = ib$get_historical_data_hmds(
+      conid = "13455763",
+      barType = "Last",
+      period = "150d",
+      bar = "1h",
+      startTime = format.POSIXct(dates[i], "%Y%m%d-%H:%M:%S"),
+      clean = TRUE
+    )
+  }, error = function(e) NULL)
+  if (is.null(test_)) {
+    Sys.sleep(15L)
+    vix_history[[i]] = ib$get_historical_data_hmds(
+      conid = "13455763",
+      barType = "Last",
+      period = "150d",
+      bar = "1h",
+      startTime = format.POSIXct(dates[i], "%Y%m%d-%H:%M:%S"),
+      clean = TRUE
+    )
+  }
+}
+vix_history_dt = rbindlist(vix_history)
+vix_history_dt = unique(vix_history_dt)
+
+# Checks
+head(vix_history_dt[as.Date(datetime) > as.Date("2020-01-01")], 20)
+
+# Save to Azure
+qc_data = vix_history_dt[, .(date = datetime, open = o, high = h, low = l, close = c)]
+qc_data[, date := as.character(date)]
+bl_endp_key = storage_endpoint(Sys.getenv("BLOB-ENDPOINT-SNP"), Sys.getenv("BLOB-KEY-SNP"))
+cont = storage_container(bl_endp_key, "qc-backtest")
+storage_write_csv(qc_data, cont, "vix.csv", col_names = FALSE)
diff --git a/R/mining_sesonality_rolling.R b/R/mining_sesonality_rolling.R
index fa9777a..6b6ecd0 100644
--- a/R/mining_sesonality_rolling.R
+++ b/R/mining_sesonality_rolling.R
@@ -1,119 +1,161 @@
 library(data.table)
 library(quantreg)
-library(AzureStor)
 library(qlcal)
 library(lubridate)
+library(AzureStor)
 
 
+# SET UP ------------------------------------------------------------------
+# global vars
+PATH = "F:/data/equity/us"
 
-# DATA IMPORT -------------------------------------------------------------
-# set up
+# Set calendar
+calendars
 setCalendar("UnitedStates/NYSE")
 
-# import daily market data
-system.time({dt = fread("F:/lean_root/data/all_stocks_daily.csv")})
 
-# this want be necessary after update
-setnames(dt, c("date", "open", "high", "low", "close", "volume", "close_adj", "symbol"))
+# PRICE DATA --------------------------------------------------------------
+# Import QC daily data
+prices = fread("F:/lean/data/stocks_daily.csv")
+setnames(prices, gsub(" ", "_", c(tolower(colnames(prices)))))
+
+# Remove duplicates
+prices = unique(prices, by = c("symbol", "date"))
+
+# Remove duplicates - there are same for different symbols (eg. phun and phun.1)
+dups = prices[, .(symbol , n = .N),
+              by = .(date, open, high, low, close, volume, adj_close,
+                     symbol_first = substr(symbol, 1, 1))]
+dups = dups[n > 1]
+dups[, symbol_short := gsub("\\.\\d$", "", symbol)]
+symbols_remove = dups[, .(symbol, n = .N),
+                      by = .(date, open, high, low, close, volume, adj_close,
+                             symbol_short)]
+symbols_remove[n >= 2, unique(symbol)]
+symbols_remove = symbols_remove[n >= 2, unique(symbol)]
+symbols_remove = symbols_remove[grepl("\\.", symbols_remove)]
+prices = prices[symbol %notin% symbols_remove]
+
+# Adjust all columns
+prices[, adj_rate := adj_close / close]
+prices[, let(
+  open = open*adj_rate,
+  high = high*adj_rate,
+  low = low*adj_rate
+)]
+setnames(prices, "close", "close_raw")
+setnames(prices, "adj_close", "close")
+prices[, let(adj_rate = NULL)]
+setcolorder(prices, c("symbol", "date", "open", "high", "low", "close", "volume"))
+
+# Remove observations where open, high, low, close columns are below 1e-008
+# This step is opional, we need it if we will use finfeatures package
+prices = prices[open > 1e-008 & high > 1e-008 & low > 1e-008 & close > 1e-008]
+
+# Remove missing values
+prices = na.omit(prices)
+
+# Keep only symbol with at least 2 years of data
+# This step is optional
+symbol_keep = prices[, .N, symbol][N >= 2 * 252, symbol]
+prices = prices[symbol %chin% symbol_keep]
+
+# Sort
+setorder(prices, symbol, date)
 
-# remove duplicates
-dt = unique(dt, by = c("symbol", "date"))
+# save SPY for later and keep only events symbols
+spy = prices[symbol == "spy"]
 
-# remove missing values
-dt = na.omit(dt)
+# free memory
+gc()
 
-# order data
-setorder(dt, "symbol", "date")
 
-# adjust all prices, not just close
-adjust_cols <- c("open", "high", "low")
-adjust_cols_new <- c("open_adj", "high_adj", "low_adj")
-dt[, (adjust_cols_new) := lapply(.SD, function(x) x * (close_adj / close)), .SDcols = adjust_cols] # adjust open, high and low prices
+# PREPARE DATA FOR SEASONALITY ANALYSIS -----------------------------------
+# Calculate return
+prices[, returns := close / shift(close) - 1, by = symbol] # calculate returns
 
-# calculate returns
-dt[, returns := close_adj / shift(close_adj) - 1, by = symbol] # calculate returns
-dt <- dt[returns < 1] # TODO:: better outlier detection mechanism. For now, remove daily returns above 100%
+# Remove outliers
+nrow(prices[returns > 1]) / nrow(prices)
+prices = prices[returns < 1] # TODO:: better outlier detection mechanism. For now, remove daily returns above 100%
 
-# plot
-plot(as.xts.data.table(dt[symbol == "aapl", .(date, close_adj)]))
-plot(as.xts.data.table(dt[symbol == "meta", .(date, close_adj)]))
-plot(as.xts.data.table(dt[symbol == "fb", .(date, close_adj)]))
+# define target variables
+prices[, return_day := shift(close, 1, type = "lead") / close - 1, by = symbol]
+prices[, return_day3 := shift(close, 3, type = "lead") / close - 1, by = symbol]
+prices[, return_week := shift(close, 5, type = "lead") / close - 1, by = symbol]
+prices[, return_week2 := shift(close, 10, type = "lead") / close - 1, by = symbol]
 
-# check for zero prices
-dt[close_adj == 0] # there is not zero prices
-dt = dt[close > 0 & close_adj > 0]
+# define frequency unit
+prices[, yearmonthid := yearmon(date)]
+prices[, day_of_month := 1:.N, by = .(symbol, yearmonthid)]
+prices[, day_of_month := as.factor(day_of_month)]
 
-# remove symobls with < 252 observations
-dt_n <- dt[, .N, by = symbol]
-dt_n <- dt_n[N > 252 * 4]
-dt <- dt[symbol %in% dt_n[, symbol]]
+# Remove missing values and select columns we need
+dt = na.omit(prices, cols = c("symbol", "return_week2", "day_of_month"))
 
-# save SPY for later and keep only events symbols
-spy <- dt[symbol == "spy"]
+# Structure of dates
+dt[, .N, by = day_of_month] # we probbably want to turn 23 to 22
+dt[day_of_month == 23, day_of_month := 22] # 23 day to 22 day
+dt[day_of_month == 22, day_of_month := 21] # not sure about this but lets fo with it
 
+# Remove symbols with  less than 750 observations (3 years of data)
+symbols_keep = dt[, .N, by = symbol]
+symbols_keep = symbols_keep[N >= 750, symbol]
+dt = dt[symbol %in% symbols_keep]
 
 
 # SEASONALITY MINING ------------------------------------------------------
-# define target variables
-dt[, return_day := shift(close_adj, 1, type = "lead") / close_adj - 1, by = symbol]
-dt[, return_day3 := shift(close_adj, 3, type = "lead") / close_adj - 1, by = symbol]
-dt[, return_week := shift(close_adj, 5, type = "lead") / close_adj - 1, by = symbol]
-dt[, return_week2 := shift(close_adj, 10, type = "lead") / close_adj - 1, by = symbol]
-
-# define frequency unit
-dt[, yearmonthid := round(date, digits = "month")]
-dt[, day_of_month := 1:.N, by = .(symbol, yearmonthid)]
-dt[, day_of_month := as.factor(day_of_month)]
+# define all year-months and start year
+yearmonthids = dt[, sort(unique(yearmonthid))]
+end_dates = seq.Date(as.Date("2019-01-01"), dt[, max(date)], by = "month")
+end_dates = as.IDate(end_dates)
 
-# remove missing values
-dt = na.omit(dt, cols = c("symbol", "return_week2", "day_of_month"))
+# Remove symbols inactive before first date. This can  produce survivorship bias,
+# but we will be faster. If this doesn't work, it want work with all data fro sure.
+symbols_keep = dt[, (end_dates[1] - max(date)) < 7, by = symbol] # we must have at least 7 days of data
+dt = dt[symbol %in% symbols_keep[V1 == TRUE, symbol]]
 
-# get coeffs from summary of quantile regression
+# Get coeffs from summary of quantile regression
 get_coeffs = function(df, y = "return_week") {
-  # df = dt[symbol == "a.1"]
   res = rq(as.formula(paste0(y, " ~ day_of_month")), data = as.data.frame(df))
   summary_fit = summary.rq(res, se = 'nid')
   as.data.table(summary_fit$coefficients, keep.rownames = TRUE)
 }
 
-# define all year-months and start year
-yearmonthids = dt[, sort(unique(yearmonthid))]
-end_years = seq.Date(as.Date("2019-01-01"),
-                     as.Date("2023-05-01"), by = "month")
-end_years = as.character(end_years)
-# first_year = "2010-01-01"
-
-# get median regression coefficients
-# symbols = dt[, unique(symbol)]
-# sample_ = dt[symbol %in% symbols[1:20000]]
-dt[day_of_month == 23, day_of_month := 22] # 23 day to 22 day
-dt_sample = dt[, .SD[(as.IDate(end_years[1])-max(date)) < 7], by = symbol] # we must have
-dt_sample = dt_sample[, .SD[nrow(.SD) > 1008], by = symbol] # we must have at least 3 yers of data
+# Sample data - this is just for test
+dt_sample = dt[symbol %in% dt[, sample(unique(symbol), 10)]]
 
-# dt_sample[, year := year(date)]
-X_seasons_day3 = dt_sample[, lapply(as.IDate(end_years), function(y) {
-  if ((y - max(date)) > 7) return(list(NA))
-  tryCatch(list(get_coeffs(.SD[yearmonthid %between% c(y - 2520, y)]), "return_day3"),
+# get median regression coefficients - experiment
+sample_size_days = 2520
+seasonality_results = dt[, lapply(end_dates, function(date_) {
+  if ((date_ - max(date)) > 7) return(list(NA))
+  # print(date_ - sample_size_days)
+  # get_coeffs(.SD[yearmonthid %between% c(yearmon(y - sample_size_days), yearmon(y))])
+  tryCatch(list(get_coeffs(.SD[yearmonthid %between% c(yearmon(date_ - sample_size_days), yearmon(date_))])),
            error = function(e) list(NA))
-  }), by = .(symbol)]
-
-cols = paste0("month", strftime(end_years, format = "%y%m%d"))
-colnames(X_seasons_day3)[2:length(colnames(X_seasons_day3))] = cols
+  }), by = symbol]
+cols = paste0("month", strftime(end_dates, format = "%y%m%d"))
+colnames(seasonality_results)[2:length(colnames(seasonality_results))] = cols
 
 # save
 time = strftime(Sys.time(), "%Y%m%d%H%M%S")
-saveRDS(X_seasons_day3, file.path("D:/features", paste0("seasonality-day3", time, ".rds")))
+saveRDS(seasonality_results, file.path("D:/features", paste0("seasonality-week", time, ".rds")))
+
+# Import data
+
+
+# INSPECT RESULTS ---------------------------------------------------------
+# seasonality_results[1, month190101]
 
 
 # CREATE PORTFOLIOS -------------------------------------------------------
-# create portfolio function
+# Portfolio 1 - keep min Pr for every symbol
 portfolios_l = list()
 for (i in seq_along(cols)) {
 
   # sample
   col = cols[i]
   cols_ = c("symbol", col)
-  x = X_seasons[, ..cols_]
+  x = seasonality_results[, ..cols_]
 
   # remove missing values
   x[, number_of_rows := vapply(get(col), function(y) length(y), FUN.VALUE = integer(1L))]
@@ -144,7 +186,7 @@ for (i in seq_along(cols)) {
   # sample
   col = cols[i]
   cols_ = c("symbol", col)
-  x = X_seasons[, ..cols_]
+  x = seasonality_results[, ..cols_]
 
   # remove missing values
   x[, number_of_rows := vapply(get(col), function(y) length(y), FUN.VALUE = integer(1L))]
@@ -179,7 +221,7 @@ for (i in seq_along(cols)) {
   # sample
   col = cols[i]
   cols_ = c("symbol", col)
-  x = X_seasons[, ..cols_]
+  x = seasonality_results[, ..cols_]
 
   # remove missing values
   x[, number_of_rows := vapply(get(col), function(y) length(y), FUN.VALUE = integer(1L))]
@@ -211,12 +253,14 @@ portfolio3[, Value := 1]
 
 # clean portfolios
 portfolio_prepare = function(portfolio) {
+  # portfolio = copy(portfolio1)
+
   # set trading dates
-  portfolio[, date := as.Date(gsub("month", "", date), format = "%y%m%d")]
+  # portfolio[, date := as.Date(gsub("month", "", date), format = "%y%m%d")]
   portfolio[, rn := gsub("day_of_month", "", rn)]
 
   # get trading days
-  date_ = portfolio[, date]
+  date_ = portfolio[, as.Date(paste0(gsub("month", "", date), "01"), format = "%y%m%d")]
   seq_ = 1:nrow(portfolio)
   seq_dates = lapply(date_, function(x) getBusinessDays(x, x %m+% months(1) - 1))
   dates = mapply(function(x, y) x[y], x = seq_dates, y = portfolio[, as.integer(rn)])
diff --git a/R/minmax_panel.R b/R/minmax_panel.R
index 9b65485..d712d2e 100644
--- a/R/minmax_panel.R
+++ b/R/minmax_panel.R
@@ -1,12 +1,11 @@
 library(data.table)
+library(Rcpp)
+library(TTR)
 library(lubridate)
 library(ggplot2)
-library(moments)
-library(TTR)
 library(PerformanceAnalytics)
-library(gausscov)
 library(runner)
-library(doParallel)
+library(glue)
 
 
 # UTILS -------------------------------------------------------------------
@@ -17,67 +16,23 @@ AFTER_COVID = c("2021-06-01", "2022-01-01")
 CORECTION   = c("2022-01-01", "2022-08-01")
 NEW         = c("2022-08-01", as.character(Sys.Date()))
 
+# Globals
+RESULTS = "F:/strategies/MinMaxWfo"
 
-# DATA --------------------------------------------------------------------
-# Import prices and MinMax data
-list.files("F:/predictors/minmax")
-dt = fread("F:/predictors/minmax/20240228.csv")
-
-# check timezone
-dt[, attr(date, "tz")]
-dt[, date := with_tz(date, tzone = "America/New_York")]
-dt[, attr(date, "tz")]
-
-# Spy data
-spy = dt[symbol == "spy", .(date, close, returns)]
-
-# Extreme returns
-cols = colnames(dt)[grep("^p_9", colnames(dt))]
-cols_new_up = paste0("above_", cols)
-dt[, (cols_new_up) := lapply(.SD, function(x) ifelse(returns > x, returns - shift(x), 0)),
-   by = .(symbol), .SDcols = cols] # Shifted to remove look-ahead bias
-cols = colnames(dt)[grep("^p_0", colnames(dt))]
-cols_new_down = paste0("below_", cols)
-dt[, (cols_new_down) := lapply(.SD, function(x) ifelse(returns < x, abs(returns - shift(x)), 0)),
-   by = .(symbol), .SDcols = cols]
-
-
-# SYSTEMIC RISK -----------------------------------------------------------
-# help function to calcualte tail risk measures from panel
-tail_risk = function(dt, FUN = mean, cols_prefix = "mean_") {
-  cols = colnames(dt)[grep("below_p|above_p", colnames(dt))]
-  indicators_ = dt[, lapply(.SD, function(x) f(x, na.rm = TRUE)),
-                   by = .(date), .SDcols = cols,
-                   env = list(f = FUN)]
-  colnames(indicators_) = c("date", paste0(cols_prefix, cols))
-  setorder(indicators_, date)
-  above_sum_cols = colnames(indicators_)[grep("above", colnames(indicators_))]
-  below_sum_cols = colnames(indicators_)[grep("below", colnames(indicators_))]
-  excess_sum_cols = gsub("above", "excess", above_sum_cols)
-  indicators_[, (excess_sum_cols) := indicators_[, ..above_sum_cols] - indicators_[, ..below_sum_cols]]
-}
-
-# get tail risk mesures
-indicators_mean      = tail_risk(dt, FUN = "mean", cols_prefix = "mean_")
-indicators_sd        = tail_risk(dt, FUN = "sd", cols_prefix = "sd_")
-indicators_sum       = tail_risk(dt, FUN = "sum", cols_prefix = "sum_")
-indicators_skewness  = tail_risk(dt, FUN = "skewness", cols_prefix = "skewness_")
-indicators_kurtosis  = tail_risk(dt, FUN = "kurtosis", cols_prefix = "kurtosis_")
-
-# merge indicators and spy
-indicators = Reduce(function(x, y) merge(x, y, by = "date", all.x = TRUE, all.y = FALSE),
-                    list(indicators_mean, indicators_sd, indicators_sum,
-                         indicators_skewness, indicators_kurtosis))
 
-# Inspect final table
-dim(indicators)
-excess_cols = colnames(indicators)[grepl("excess", colnames(indicators))]
-
-# Free memory
-rm(dt)
+# DATA --------------------------------------------------------------------
+# SPY data
+spy = fread("F:/predictors/minmax/20240228.csv")
+spy[, attr(date, "tz")]
+spy[, date := with_tz(date, tzone = "America/New_York")]
+spy[, attr(date, "tz")]
+spy = spy[symbol == "spy", .(date, close, returns)]
 gc()
 
-# merge spy and indicators
+# Import indicators
+indicators = fread("F:/predictors/minmax/indicators.csv")
+
+# Merge spy and indicators
 sysrisk = merge(indicators, spy, by = "date", all.x = TRUE, all.y = FALSE)
 sysrisk = na.omit(sysrisk, cols = "returns")
 
@@ -113,75 +68,121 @@ na.omit(data_plot)[, mean(returns), by = .(alpha)] |>
   ggplot(aes(x = alpha, y = V1)) +
   geom_bar(stat = "identity")
 
-# Prepare backtest data
-# cols = c("date", "close", "returns", excess_cols)
-cols = c("date", "close", "returns", excess_cols[grep("sd_", excess_cols)])
-# colnames(indicators)[grepl("sd_", colnames(indicators))]
+# Choose columns
+# 1) Choose subset of columns
+cols = c("date", "close", "returns", colnames(indicators)[grepl("sum_ex", colnames(indicators))])
 backtest_dt = sysrisk[, ..cols]
+# 2) choose all columns
+backtest_dt = copy(sysrisk)
+
+# Remove columns with many NA values
 cols_keep = colnames(backtest_dt)[sapply(backtest_dt, function(x) sum(is.na(x))/length(x) < 0.5)]
 backtest_dt = backtest_dt[, ..cols_keep]
+
+# Remove NA values
 backtest_dt = na.omit(backtest_dt)
-predictors = backtest_dt[, colnames(backtest_dt)[4:ncol(backtest_dt)]]
 
-# Optimization insample parameters
-params = backtest_dt[, ..predictors]
-params = params[, lapply(.SD, quantile, probs = seq(0, 1, 0.02), na.rm = TRUE)]
-params = melt(params)
-params = merge(data.frame(sma_width=c(1, 5, 15, 22)), params, by=NULL)
-params = unique(params)
+# Define predictors
+predictors = setdiff(colnames(backtest_dt), c("date", "close", "returns"))
+predictors_excess = predictors[grepl("excess", predictors)]
+predictors_sum = predictors[grepl("sum", predictors)]
+predictors_sd = predictors[grepl("sd", predictors)]
+predictors_skew = predictors[grepl("skew", predictors)]
+predictors_kurtosis = predictors[grepl("kurtosis", predictors)]
+
+
+# INSAMPLE OPTIMIZATION ---------------------------------------------------
+# backtest Rcpp
+Rcpp::cppFunction("
+  double backtest_cpp(NumericVector returns, NumericVector indicator, double threshold) {
+    int n = indicator.size();
+    NumericVector sides(n);
+
+    for(int i=0; i<n; i++){
+      if(i==0 || R_IsNA(indicator[i-1])) {
+        sides[i] = 1;
+      } else if (indicator[i-1] < threshold){
+        sides[i] = 0;
+      } else {
+        sides[i] = 1;
+      }
+    }
+
+    NumericVector returns_strategy = returns * sides;
+
+    double cum_returns{ 1 + returns_strategy[0]} ;
+    for(int i=1; i<n; i++){
+      cum_returns *= (1 + returns_strategy[i]);
+    }
+    cum_returns = cum_returns - 1;
+
+    return cum_returns;
+  }
+", rebuild = TRUE)
+Rcpp::cppFunction("
+  double backtest_above_threshold(NumericVector returns, NumericVector indicator, double threshold) {
+    int n = indicator.size();
+    NumericVector sides(n);
+
+    for(int i=0; i<n; i++){
+      if(i==0 || R_IsNA(indicator[i-1])) {
+        sides[i] = 1;
+      } else if (indicator[i-1] > threshold){
+        sides[i] = 0;
+      } else {
+        sides[i] = 1;
+      }
+    }
+
+    NumericVector returns_strategy = returns * sides;
+
+    double cum_returns{ 1 + returns_strategy[0]} ;
+    for(int i=1; i<n; i++){
+      cum_returns *= (1 + returns_strategy[i]);
+    }
+    cum_returns = cum_returns - 1;
+
+    return cum_returns;
+  }
+", rebuild = TRUE)
 
-# help vectors
-returns_    = backtest_dt[, returns]
-thresholds_ = params[, 3]
-vars        = as.vector(params[, 2])
-ns          = params[, 1]
-
-# backtest vectorized
-# library(Rcpp)
-# Rcpp::cppFunction("
-#   double backtest_cpp(NumericVector returns, NumericVector indicator, double threshold) {
-#     int n = indicator.size();
-#     NumericVector sides(n);
-#
-#     for(int i=0; i<n; i++){
-#       if(i==0 || R_IsNA(indicator[i-1])) {
-#         sides[i] = 1;
-#       } else if(indicator[i-1] < threshold){
-#         sides[i] = 0;
-#       } else {
-#         sides[i] = 1;
-#       }
-#     }
-#
-#     NumericVector returns_strategy = returns * sides;
-#
-#     double cum_returns{ 1 + returns_strategy[0]} ;
-#     for(int i=1; i<n; i++){
-#       cum_returns *= (1 + returns_strategy[i]);
-#     }
-#     cum_returns = cum_returns - 1;
-#
-#     return cum_returns;
-#   }
-# ", rebuild = TRUE)
 backtest_vectorized = function(returns, indicator, threshold, return_cumulative = TRUE) {
-  sides = ifelse(c(NA, head(indicator, -1)) > threshold, 0, 1)
+  # returns = returns_
+  # i = 1
+  # indicator = SMA(backtest_dt[, get(vars[i])], ns[i])
+  # threshold = thresholds_[i]
+  # return_cumulative = TRUE
+
+  # sides = ifelse(c(NA, head(indicator, -1)) > threshold, 0, 1)
+  sides = ifelse(shift(indicator) < threshold, 0, 1)
   sides[is.na(sides)] = 1
 
-  returns_strategy <- returns * sides
+  returns_strategy = returns * sides
+  # returns_strategy_1 = returns_strategy
 
   if (return_cumulative) {
+    # cum_returns = 1 + returns_strategy[1]
+    # for (i in 2:length(returns_strategy)) {
+    #   cum_returns = cum_returns * (1 + returns_strategy[i])
+    # }
+    # return(cum_returns - 1)
     return(PerformanceAnalytics::Return.cumulative(returns_strategy))
   } else {
     return(returns_strategy)
   }
 }
 backtest <- function(returns, indicator, threshold, return_cumulative = TRUE) {
+  # returns = returns_
+  # i = 1
+  # indicator = SMA(backtest_dt[, get(vars[i])], ns[i])
+  # threshold = thresholds_[i]
+  # return_cumulative = TRUE
+
   sides <- vector("integer", length(indicator))
   for (i in seq_along(sides)) {
     if (i %in% c(1) || is.na(indicator[i-1])) {
-      sides[i] <- NA
-    } else if (indicator[i-1] > threshold) {
+      sides[i] <- 1
+    } else if (indicator[i-1] < threshold) {
       sides[i] <- 0
     } else {
       sides[i] <- 1
@@ -189,6 +190,8 @@ backtest <- function(returns, indicator, threshold, return_cumulative = TRUE) {
   }
   sides <- ifelse(is.na(sides), 1, sides)
   returns_strategy <- returns * sides
+  # returns_strategy_2 = returns_strategy
+
   if (return_cumulative) {
     return(PerformanceAnalytics::Return.cumulative(returns_strategy))
   } else {
@@ -212,42 +215,295 @@ performance <- function(x) {
   return(Perf)
 }
 
+# Function to get parameterss
+get_params = function(dt, predictors) {
+  # Optimization insample parameters
+  params = dt[, ..predictors]
+  params = params[, lapply(.SD, quantile, probs = seq(0, 1, 0.02), na.rm = TRUE)]
+  params = melt(params, variable.factor = FALSE)
+  param_sman = c(1, 5, 15, 22, 44, 66)
+
+  # Combine variables, thresholds and sma_n
+  params_expanded = params[rep(1:.N, each = length(param_sman))]
+  params_expanded[, new_col := rep(param_sman, times = nrow(params))]
+  params_expanded = unique(params_expanded)
+  setnames(params_expanded, c("variable", "thresholds", "sma_n"))
+
+  return(params_expanded)
+}
+
+# Parameters
+params_above_threshold = get_params(backtest_dt, predictors_sd)
+
+# help vectors
+returns_    = backtest_dt[, returns]
+vars        = params_expanded[, 1][[1]]
+thresholds_ = params_expanded[, 2][[1]]
+ns          = params_expanded[, 3][[1]]
+vars_above       = params_above_threshold[, 1][[1]]
+thresholds_above = params_above_threshold[, 2][[1]]
+ns_above         = params_above_threshold[, 3][[1]]
+
 # optimization loop
 system.time({
-  opt_results_l =
-    vapply(1:nrow(params), function(i)
-      backtest_cpp(returns_,
-                   SMA(sysrisk[, get(vars[i])], ns[i]),
-                   thresholds_[i]),
-      numeric(1))
+  opt_results = vapply(1:nrow(params_expanded), function(i) {
+    backtest_cpp(returns_, SMA(backtest_dt[, get(vars[i])], ns[i]), thresholds_[i])
+  }, numeric(1))
 })
-opt_results = cbind.data.frame(params, opt_results_l)
-opt_results = opt_results[order(opt_results$opt_results_l), ]
+opt_results_dt = as.data.table(
+  cbind.data.frame(params_expanded, cum_return = opt_results)
+)
+setnames(opt_results_dt, c("var", "threshold", "sma_n", "cum_return"))
+setorder(opt_results_dt, -cum_return)
+first(opt_results_dt, 10)
+
+# optimization loop for above threshold backtest
+system.time({
+  opt_results = vapply(1:nrow(params_expanded), function(i) {
+    backtest_above_threshold(returns_, SMA(backtest_dt[, get(vars[i])], ns[i]), thresholds_[i])
+  }, numeric(1))
+})
+opt_results_dt = as.data.table(
+  cbind.data.frame(params_expanded, cum_return = opt_results)
+)
+setnames(opt_results_dt, c("var", "threshold", "sma_n", "cum_return"))
+setorder(opt_results_dt, -cum_return)
+first(opt_results_dt, 10)
 
 # optimization loop vectorized
 system.time({
-  opt_results_vect_l =
-    vapply(1:nrow(params), function(i)
-      backtest_vectorized(returns_,
-                          SMA(backtest_dt[, get(vars[i])], ns[i]),
-                          thresholds_[i]),
+  opt_results_vect =
+    vapply(1:nrow(params_expanded), function(i)
+      backtest_vectorized(returns_, SMA(backtest_dt[, get(vars[i])], ns[i]), thresholds_[i]),
       numeric(1))
 })
-opt_results_vectorized = cbind.data.frame(params, opt_results_vect_l)
-opt_results_vectorized = opt_results_vectorized[order(opt_results_vectorized$opt_results_vect_l), ]
-
-# Same!
-tail(opt_results_vectorized, 40) # best results
-
-# inspect results
-strategy_returns <- backtest(returns_,
-                             SMA(backtest_dt[, sd_excess_p_999_halfyear ], 1),
-                             0.0001708726,
-                             FALSE)
+opt_results_vect_dt = as.data.table(
+  cbind.data.frame(params_expanded, cum_return = opt_results_vect)
+)
+setnames(opt_results_vect_dt, c("var", "threshold", "sma_n", "cum_return"))
+setorder(opt_results_vect_dt, -cum_return)
+first(opt_results_vect_dt, 10)
+
+# # optimization loop with backtest
+# system.time({
+#   opt_results_r =
+#     vapply(1:nrow(params_expanded), function(i)
+#       backtest(returns_, SMA(backtest_dt[, get(vars[i])], ns[i]), thresholds_[i]),
+#       numeric(1))
+# })
+# # user  system elapsed
+# # 1619.35    1.19 1622.57
+# opt_results_r_dt = cbind.data.frame(params_expanded, opt_results_r)
+# setnames(opt_results_r_dt, c("threshold", "sma_n", "var", "cum_return"))
+# setorder(opt_results_r_dt, -cum_return)
+# first(opt_results_r_dt, 10)
+
+# Compare above results. Should be all the same. If all test TRUE, they are same
+all.equal(length(opt_results_vect), length(opt_results_r), length(opt_results))
+all(round(opt_results_vect, 2) == round(opt_results_r, 2))
+all(round(opt_results_vect, 2) == round(opt_results, 2))
+
+# Results across vars
+vars_results = opt_results_dt[, .(var_mean = mean(cum_return)), by = var]
+vars_results[, var_agg := gsub("_.*", "", var)]
+vars_results[, mean(var_mean), by = var_agg]
+
+# inspect best results
+best_strategy = opt_results_dt[1, ]
+strategy_returns = backtest(returns_,
+                            SMA(backtest_dt[, .SD, .SDcols = best_strategy$var],
+                                best_strategy$sma_n),
+                            best_strategy$threshold,
+                            FALSE)
 dt_xts = xts(cbind(returns_, strategy_returns), order.by = backtest_dt[, date])
 charts.PerformanceSummary(dt_xts)
-performance(dt_xts[, 1])
-performance(dt_xts[, 2])
+charts.PerformanceSummary(dt_xts["2020/"])
+charts.PerformanceSummary(dt_xts["2022/"])
+charts.PerformanceSummary(dt_xts["2023/"])
+
+# Results across lags
+dt_ = as.data.table(opt_results_dt)
+dt_[, .(mean = mean(cum_return),
+        median = median(cum_return)), by = sma_n]
+
+# Optimization for above threshold
+opt_results_above = vapply(1:nrow(params_above_threshold), function(i) {
+  backtest_cpp(returns_,
+               SMA(backtest_dt[, get(vars_above[i])], ns_above[i]),
+               thresholds_above[i])
+}, numeric(1))
+opt_results_above_dt = as.data.table(
+  cbind.data.frame(params_above_threshold, cum_return = opt_results_above)
+)
+setnames(opt_results_above_dt, c("var", "threshold", "sma_n", "cum_return"))
+setorder(opt_results_above_dt, -cum_return)
+first(opt_results_above_dt, 10)
+
+
+# RCPP VS R ---------------------------------------------------------------
+# Source backtest.cpp file
+sourceCpp("backtest.cpp")
+
+# Backtest function
+system.time({
+  x = backtest(
+    returns_,
+    SMA(backtest_dt[, .SD, .SDcols = best_strategy$var], best_strategy$sma_n),
+    best_strategy$threshold,
+    TRUE)
+})
+system.time({
+  y = backtest_sell_below_threshold(
+    returns_,
+    SMA(backtest_dt[, .SD, .SDcols = best_strategy$var], best_strategy$sma_n),
+    best_strategy$threshold)
+})
+all(round(x, 3) == round(y, 3))
+
+# SMA function
+x_= runif(100)
+sma_x = SMA(x_, 50)
+sma_y = calculate_sma(x_, 50)
+all(sma_x == sma_y, na.rm = TRUE)
+
+# Optimization function
+params = backtest_dt[, ..predictors]
+params = params[, lapply(.SD, quantile, probs = seq(0, 1, 0.3), na.rm = TRUE)]
+params = melt(params, variable.factor = FALSE)
+param_sman = c(1, 5, 15, 22, 44, 66)
+params_expanded = params[rep(1:.N, each = length(param_sman))]
+params_expanded[, new_col := rep(param_sman, times = nrow(params))]
+params_expanded = unique(params_expanded)
+setnames(params_expanded, c("variable", "thresholds", "sma_n"))
+returns_    = backtest_dt[, returns]
+vars        = params_expanded[, 1][[1]]
+thresholds_ = params_expanded[, 2][[1]]
+ns          = params_expanded[, 3][[1]]
+system.time({
+  x = vapply(1:nrow(params_expanded), function(i) {
+    backtest_cpp(returns_, SMA(backtest_dt[, get(vars[i])], ns[i]), thresholds_[i])
+  }, numeric(1))
+})
+# Optimization function cpp
+params_ = copy(params_expanded)
+setnames(params_, c("variable", "thresholds", "sma_n"))
+system.time({y = opt_with_sma(df = backtest_dt, params = params_)})
+length(x) == length(y)
+all(round(x, 3) == round(y, 3))
+
+# WFO approaches
+windows = 7 * 22
+system.time({
+  x = lapply(windows, function(w) {
+    bres = runner(
+      x = as.data.frame(backtest_dt),
+      f = function(x) {
+        ret = vapply(1:nrow(params_), function(i) {
+          backtest_vectorized(x$returns,
+                              SMA(x[, params_[i, variable]], params_[i, sma_n]),
+                              params_[i, thresholds])
+        }, numeric(1))
+        returns_strategies = cbind.data.frame(params_, ret)
+        returns_strategies[order(returns_strategies$ret, decreasing = TRUE), ]
+      },
+      k = w,
+      at = 154:250,
+      na_pad = TRUE,
+      simplify = FALSE
+    )
+  })
+})
+df_ = as.data.frame(backtest_dt[1:250])
+colnames(df_)[1] = "time"
+
+system.time({y = wfo_with_sma(df_, params_, windows, "rolling")})
+length(x[[1]])
+length(y)
+nrow(x[[1]][[1]])
+nrow(y[[1]])
+y = lapply(y, function(df_) {
+  df_[, 1] = as.POSIXct(df_[, 1])
+  df_
+})
+y = lapply(y, function(df_) {
+  df_[, 1] = with_tz(df_[, 1], tzone = "America/New_York")
+  df_
+})
+y = lapply(y, function(df_) {
+  cbind.data.frame(df_, params_)
+})
+y = rbindlist(y)
+setorder(y, Scalar, -Vector)
+head(x[[1]][[1]]); head(y)
+all(round(x[[1]][[1]][, "ret"], 2) == y[1:nrow(x[[1]][[1]]), round(Vector, 2)])
+
+
+# WALK FORWARD OPTIMIZATION -----------------------------------------------
+# Optimization params
+predictors_ = c(predictors_skew, predictors_kurtosis)
+params = backtest_dt[, ..predictors_]
+params = params[, lapply(.SD, quantile, probs = seq(0, 1, 0.02), na.rm = TRUE)]
+params = melt(params, variable.factor = FALSE)
+param_sman = c(1, 5, 15, 22, 44, 66)
+params_expanded = params[rep(1:.N, each = length(param_sman))]
+params_expanded[, new_col := rep(param_sman, times = nrow(params))]
+params_expanded = unique(params_expanded)
+setnames(params_expanded, c("variable", "thresholds", "sma_n"))
+
+# WFO utils
+clean_wfo = function(y) {
+  y = lapply(y, function(df_) {
+    df_[, 1] = as.POSIXct(df_[, 1])
+    df_[, 1] = with_tz(df_[, 1], tzone = "America/New_York")
+    cbind.data.frame(df_, params_expanded)
+  })
+  y = rbindlist(y)
+  return(y)
+}
+calcualte_and_save_wfo = function(window) {
+  # window = 7 * 22 * 6
+  wfo_results_ = wfo(df, params_expanded, window, "rolling")
+  wfo_results_ = clean_wfo(wfo_results_)
+  fwrite(wfo_results_, file.path(RESULTS, glue("wfo_results_{window}.csv")))
+  return(0)
+}
+
+# Walk forward optimization
+df = as.data.frame(backtest_dt)
+colnames(df)[1] = "time"
+calcualte_and_save_wfo(7 * 22 * 1)
+# calcualte_and_save_wfo(7 * 22 * 6)
+# calcualte_and_save_wfo(7 * 22 * 12)
+# calcualte_and_save_wfo(7 * 22 * 18) # never finished try before go home
+
+# Import results
+list.files(RESULTS)
+wfo_results = fread(file.path(RESULTS, "wfo_results_154.csv"))
+setnames(wfo_results, c("date", "return", "variable", "thresholds", "n"))
+setorder(wfo_results, date, return)
+
+# Keep only sd
+# wfo_results = wfo_results[variable %like% "sd_"]
+
+# Keep best
+wfo_results_best_n = wfo_results[, first(.SD, 50), by = date]
+wfo_results_best_n[, unique(variable)]
+
+# Merge results with backtest data, that is price data
+backtest_long = melt(backtest_dt, id.vars = c("date", "returns"))
+backtest_long = merge(backtest_long, wfo_results_best_n, by = c("date", "variable"),
+                      all.x = TRUE, all.y = FALSE)
+backtest_long = na.omit(backtest_long, cols = "return")
+backtest_long[, signal := value < thresholds]
+backtest_long[, signal_ensamble := sum(signal) > 20, by = date]
+backtest_long = unique(backtest_long[, .(date, returns, signal = signal_ensamble)])
+
+# Backtest
+backtest_xts = backtest_long[, .(date, benchmark = returns, signal = shift(signal))]
+backtest_xts[, strategy := benchmark * signal]
+backtest_xts = as.xts.data.table(na.omit(backtest_xts))
+charts.PerformanceSummary(backtest_xts)
+
 
 
 # GAUSSCOV PREDICTIONS ----------------------------------------------------
diff --git a/R/minmax_panel_indicators.R b/R/minmax_panel_indicators.R
new file mode 100644
index 0000000..384e1a1
--- /dev/null
+++ b/R/minmax_panel_indicators.R
@@ -0,0 +1,60 @@
+# TODO: I don't need all those libriaries
+library(data.table)
+library(lubridate)
+library(ggplot2)
+library(moments)
+
+
+# DATA --------------------------------------------------------------------
+# Import prices and MinMax data
+list.files("F:/predictors/minmax")
+dt = fread("F:/predictors/minmax/20240228.csv")
+
+# check timezone
+dt[, attr(date, "tz")]
+dt[, date := with_tz(date, tzone = "America/New_York")]
+dt[, attr(date, "tz")]
+
+# Spy data
+spy = dt[symbol == "spy", .(date, close, returns)]
+
+# Extreme returns
+cols = colnames(dt)[grep("^p_9", colnames(dt))]
+cols_new_up = paste0("above_", cols)
+dt[, (cols_new_up) := lapply(.SD, function(x) ifelse(returns > x, returns - shift(x), 0)),
+   by = .(symbol), .SDcols = cols] # Shifted to remove look-ahead bias
+cols = colnames(dt)[grep("^p_0", colnames(dt))]
+cols_new_down = paste0("below_", cols)
+dt[, (cols_new_down) := lapply(.SD, function(x) ifelse(returns < x, abs(returns - shift(x)), 0)),
+   by = .(symbol), .SDcols = cols]
+
+
+# SYSTEMIC RISK -----------------------------------------------------------
+# help function to calcualte tail risk measures from panel
+tail_risk = function(dt, FUN = mean, cols_prefix = "mean_") {
+  cols = colnames(dt)[grep("below_p|above_p", colnames(dt))]
+  indicators_ = dt[, lapply(.SD, function(x) f(x, na.rm = TRUE)),
+                   by = .(date), .SDcols = cols,
+                   env = list(f = FUN)]
+  colnames(indicators_) = c("date", paste0(cols_prefix, cols))
+  setorder(indicators_, date)
+  above_sum_cols = colnames(indicators_)[grep("above", colnames(indicators_))]
+  below_sum_cols = colnames(indicators_)[grep("below", colnames(indicators_))]
+  excess_sum_cols = gsub("above", "excess", above_sum_cols)
+  indicators_[, (excess_sum_cols) := indicators_[, ..above_sum_cols] - indicators_[, ..below_sum_cols]]
+}
+
+# Get tail risk mesures
+indicators_mean      = tail_risk(dt, FUN = "mean", cols_prefix = "mean_")
+indicators_sd        = tail_risk(dt, FUN = "sd", cols_prefix = "sd_")
+indicators_sum       = tail_risk(dt, FUN = "sum", cols_prefix = "sum_")
+indicators_skewness  = tail_risk(dt, FUN = "skewness", cols_prefix = "skewness_")
+indicators_kurtosis  = tail_risk(dt, FUN = "kurtosis", cols_prefix = "kurtosis_")
+
+# Merge indicators and spy
+indicators = Reduce(function(x, y) merge(x, y, by = "date", all.x = TRUE, all.y = FALSE),
+                    list(indicators_mean, indicators_sd, indicators_sum,
+                         indicators_skewness, indicators_kurtosis))
+
+# Save indicators
+fwrite(indicators, "F:/predictors/minmax/indicators.csv")
diff --git a/R/quantroom_strategies.R b/R/quantroom_strategies.R
new file mode 100644
index 0000000..d3e18e7
--- /dev/null
+++ b/R/quantroom_strategies.R
@@ -0,0 +1,10 @@
+library(quantmod)
+library(data.table)
+library(PerformanceAnalytics)
+library(dplyr)
+library(lubridate)
+library(future.apply)
+library(timeDate)
+library(RQuantLib)
+
+
diff --git a/R/strategy_beneficial_ownership_.R b/R/strategy_beneficial_ownership_.R
index aa566d9..1e3b069 100644
--- a/R/strategy_beneficial_ownership_.R
+++ b/R/strategy_beneficial_ownership_.R
@@ -6,7 +6,33 @@ library(arrow)
 # Import benefits ownership data
 bo = read_parquet("F:/data/equity/us/fundamentals/beneficial.parquet")
 
+# Set data types
+bo[, filingDate := as.Date(filingDate)]
+bo[, percentOfClass := as.numeric(percentOfClass)]
+
+# Keep unique rows
+bo = unique(bo)
+
+# Sort
+setorder(bo, symbol, filingDate)
+
 #
 bo[symbol == "AAPL"]
 bo[symbol == "AAPL"][filingDate == "2024-02-14"]
 
+# Aggregate percent of class for every symbol and date
+bo_agg = bo[, .(symbol, filingDate, nameOfReportingPerson, percentOfClass)]
+bo_agg = unique(bo_agg)
+bo_agg = bo_agg[, .(total  = sum(percentOfClass)), by = .(symbol, filingDate)]
+
+# Test
+bo_agg[, all(total < 100)]
+bo_agg[total > 100]
+unique(bo[, .(symbol, filingDate, nameOfReportingPerson, percentOfClass)])[symbol == "RELL" & filingDate == "2013-06-10"]
+
+# Remove observations where percent isgreater than 100
+bo_agg = bo_agg[total < 100]
+
+# Check
+plot(as.xts.data.table(bo_agg[symbol == "AAPL", .(filingDate, total)]))
+plot(as.xts.data.table(bo_agg[symbol == "V", .(filingDate, total)]))
diff --git a/R/strategy_etf_constituents.R b/R/strategy_etf_constituents.R
new file mode 100644
index 0000000..085ebab
--- /dev/null
+++ b/R/strategy_etf_constituents.R
@@ -0,0 +1,162 @@
+library(data.table)
+library(TTR)
+library(ggplot2)
+library(PerformanceAnalytics)
+
+
+# # SET UP ------------------------------------------------------------------
+# # global vars
+# PATH = "F:/data/equity/us"
+
+
+# PRICE DATA --------------------------------------------------------------
+# Import QC daily data
+prices = fread("F:/lean/data/stocks_daily.csv")
+setnames(prices, gsub(" ", "_", c(tolower(colnames(prices)))))
+
+# Remove duplicates
+prices = unique(prices, by = c("symbol", "date"))
+
+# Remove duplicates - there are same for different symbols (eg. phun and phun.1)
+dups = prices[, .(symbol , n = .N),
+              by = .(date, open, high, low, close, volume, adj_close,
+                     symbol_first = substr(symbol, 1, 1))]
+dups = dups[n > 1]
+dups[, symbol_short := gsub("\\.\\d$", "", symbol)]
+symbols_remove = dups[, .(symbol, n = .N),
+                      by = .(date, open, high, low, close, volume, adj_close,
+                             symbol_short)]
+symbols_remove[n >= 2, unique(symbol)]
+symbols_remove = symbols_remove[n >= 2, unique(symbol)]
+symbols_remove = symbols_remove[grepl("\\.", symbols_remove)]
+prices = prices[symbol %notin% symbols_remove]
+
+# Adjust all columns
+prices[, adj_rate := adj_close / close]
+prices[, let(
+  open = open*adj_rate,
+  high = high*adj_rate,
+  low = low*adj_rate
+)]
+setnames(prices, "close", "close_raw")
+setnames(prices, "adj_close", "close")
+prices[, let(adj_rate = NULL)]
+setcolorder(prices, c("symbol", "date", "open", "high", "low", "close", "volume"))
+
+# Remove observations where open, high, low, close columns are below 1e-008
+# This step is opional, we need it if we will use finfeatures package
+prices = prices[open > 1e-008 & high > 1e-008 & low > 1e-008 & close > 1e-008]
+
+# Remove missing values
+prices = na.omit(prices)
+
+# Keep only symbol with at least 1 year of data
+# This step is optional
+symbol_keep = prices[, .N, symbol][N >= 252, symbol]
+prices = prices[symbol %chin% symbol_keep]
+
+# keep 100 most liquid at every date
+# 200 by volume => cca 20 mil rows
+# 100 by volume => cca 13 mil rows
+# 50 by volume => cca 8 mil rows
+# prices[, dollar_volume := close * volume]
+# setorder(prices, date, -dollar_volume)
+# liquid_symbols = prices[, .(symbol = first(symbol, 100)), by = date]
+# liquid_symbols = liquid_symbols[, unique(symbol)]
+# sprintf("We keep %f percent of data",
+#         length(liquid_symbols) / prices[, length(unique(symbol))] * 100)
+# prices = prices[symbol %chin% liquid_symbols]
+# prices[, dollar_volume := NULL]
+
+# Sort
+setorder(prices, symbol, date)
+
+# free memory
+gc()
+
+
+# PREPARE PRICES DATA -----------------------------------------------------
+# calculate retursn
+prices[, returns := close / shift(close) - 1, by = .(symbol)]
+
+
+# ETF DATA ----------------------------------------------------------------
+# Import ETF constituents data
+etf_files = list.files("F:/data/equity/us/etf_constituents",
+                       full.names = TRUE,
+                       pattern = "csv$")
+etf_constituents = lapply(etf_files, function(x) {
+  cbind(symbol = basename(x), fread(x, colClasses = c("numeric" = "weight")))
+})
+etf_nrows = vapply(etf_constituents, function(x) nrow(x), FUN.VALUE = integer(1))
+etf_constituents = etf_constituents[etf_nrows > 2]
+index_remove = vapply(
+  etf_constituents,
+  function(x) x[, all(is.na(weight) | is.na(last_update))],
+  FUN.VALUE = logical(1))
+etf_constituents = etf_constituents[!index_remove]
+etf_constituents = lapply(etf_constituents, function(x) {
+  x[, let(
+    weight = as.numeric(weight),
+    market_value = as.numeric(market_value)
+  )]
+})
+etf_constituents = rbindlist(etf_constituents)
+etf_constituents[, symbol := gsub("\\.csv", "", symbol)]
+
+# Remove any missing values
+etf_constituents = na.omit(etf_constituents)
+
+# Check 1 ETF
+etf_constituents[, unique(symbol)]
+spy = etf_constituents[symbol == "spy"]
+
+
+# ANALYSE ONE SYMBOL ------------------------------------------------------
+# Pick symbol
+symbol_ = "V"
+
+# Aggregate by date
+etf_symbol = etf_constituents[ticker == symbol_]
+etf_symbol_dt = etf_symbol[, .(
+  shares_held = sum(shares_held),
+  weight = sum(weight),
+  market_value = sum(market_value)
+), by = date]
+setorder(etf_symbol_dt, date)
+
+# Plot vars
+plot(as.xts.data.table(etf_symbol_dt[, .(date, weight)]), type = "l")
+plot(as.xts.data.table(etf_symbol_dt[, .(date, market_value)]), type = "l")
+plot(as.xts.data.table(etf_symbol_dt[, .(date, shares_held / 1000)]), type = "l")
+plot(as.xts.data.table(etf_symbol_dt[weight < quantile(weight, p = 0.85),
+                                     .(date, weight = SMA(weight, 22))]),
+     type = "l")
+plot(as.xts.data.table(etf_symbol_dt[market_value < quantile(market_value, p = 0.85),
+                                     .(date, weight = SMA(market_value, 22))]),
+     type = "l")
+plot(as.xts.data.table(etf_symbol_dt[shares_held < quantile(shares_held, p = 0.85),
+                                     .(date, shares_held = SMA(shares_held / 1000, 22))]),
+     type = "l")
+
+
+# SMA cross strategy
+etf_symbol_dt[, sma_short := SMA(market_value, 50)]
+etf_symbol_dt[, sma_long := SMA(market_value, 200)]
+etf_symbol_dt[, signal := sma_short > sma_long]
+plot(as.xts.data.table(etf_symbol_dt[, .(date, sma_short, sma_long)]))
+backtest_dt = prices[symbol == tolower(symbol_)][etf_symbol_dt, on = "date"]
+ggplot(na.omit(backtest_dt), aes(x = date, y = close)) +
+  geom_line() +
+  geom_point(aes(color = signal)) +
+  theme_minimal()
+backtest_dt[, Return.cumulative(returns), by = signal]
+performance_dt = na.omit(backtest_dt)
+performance_dt[, strategy := shift(signal) * returns]
+performance_dt = as.xts.data.table(performance_dt[, .(date, strategy, benchmark = returns)])
+charts.PerformanceSummary(performance_dt)
+etf_symbol_dt[, let(sma_short = NULL, sma_long = NULL, signal = NULL)]
+
+#
+etf_symbol_dt
+
diff --git a/R/strategy_ipo_ath.R b/R/strategy_ipo_ath.R
new file mode 100644
index 0000000..5fd5f25
--- /dev/null
+++ b/R/strategy_ipo_ath.R
@@ -0,0 +1,205 @@
+library(data.table)
+library(fs)
+library(arrow)
+library(findata)
+library(glue)
+library(httr)
+library(roll)
+library(PerformanceAnalytics)
+
+
+# SET UP ------------------------------------------------------------------
+# global vars
+PATH = "F:/data/equity/us"
+
+
+# PRICE DATA --------------------------------------------------------------
+# Import QC daily data
+prices = fread("F:/lean/data/stocks_daily.csv")
+setnames(prices, gsub(" ", "_", c(tolower(colnames(prices)))))
+
+# Remove duplicates
+prices = unique(prices, by = c("symbol", "date"))
+
+# Remove duplicates - there are same for different symbols (eg. phun and phun.1)
+dups = prices[, .(symbol , n = .N),
+              by = .(date, open, high, low, close, volume, adj_close,
+                     symbol_first = substr(symbol, 1, 1))]
+dups = dups[n > 1]
+dups[, symbol_short := gsub("\\.\\d$", "", symbol)]
+symbols_remove = dups[, .(symbol, n = .N),
+                      by = .(date, open, high, low, close, volume, adj_close,
+                             symbol_short)]
+symbols_remove[n >= 2, unique(symbol)]
+symbols_remove = symbols_remove[n >= 2, unique(symbol)]
+symbols_remove = symbols_remove[grepl("\\.", symbols_remove)]
+prices = prices[symbol %notin% symbols_remove]
+
+# Adjust all columns
+prices[, adj_rate := adj_close / close]
+prices[, let(
+  open = open*adj_rate,
+  high = high*adj_rate,
+  low = low*adj_rate
+)]
+setnames(prices, "close", "close_raw")
+setnames(prices, "adj_close", "close")
+prices[, let(adj_rate = NULL)]
+setcolorder(prices, c("symbol", "date", "open", "high", "low", "close", "volume"))
+
+# Remove missing values
+prices = na.omit(prices)
+
+# Sort
+setorder(prices, symbol, date)
+
+# Create returns
+prices[, returns := close / shift(close) - 1, by = symbol]
+
+# Remove missing values
+prices = na.omit(prices)
+
+# free memory
+gc()
+
+
+# SIGNAL ------------------------------------------------------------------
+# Keep only first 90 observations from begining of the series for all symbols
+setorder(prices, symbol, date)
+prices_ipo = prices[, .SD[1:90], by = symbol]
+
+# Create signal that is equal to 1 if close at ATH
+prices_ipo[, close_max := roll_max(close, width = nrow(.SD), min_obs = 2), by = symbol]
+# 1)
+# prices_ipo[, ath := close >= shift(close_max)]
+# 2)
+prices_ipo[, ath := close >= shift(close_max) & (shift(close) > 20)]
+
+# Create strategy returns
+# 1)
+# prices_ipo[, strategy := (returns * shift(ath)), by = symbol]
+# 2)
+# prices_ipo[returns == 1]
+# prices_ipo[symbol == "adil" & date %between% c(as.IDate("2018-11-27"), as.IDate("2018-11-30"))]
+prices_ipo[, strategy := returns * shift(ath), by = symbol]
+
+# Chek one symbol
+prices_ipo[symbol == "aapl", .(symbol, date, close, close_raw, returns, close_max, ath, strategy)]
+
+# Remove missing values
+prices_ipo = na.omit(prices_ipo)
+
+# keep only where ath (signal) is true
+prices_ipo = prices_ipo[, .SD[shift(ath) == TRUE], by = symbol]
+
+# Calculate portfolio return for every date
+portfolio = prices_ipo[, .(strategy, weight = 1 / length(strategy)), by = date]
+portfolio[, weight := ifelse(weight > 0.05, 0.05, weight)]
+setorder(portfolio, date)
+portfolio = portfolio[, .(strategy = sum(strategy * weight)), by = date]
+portfolio = portfolio[date > as.Date("2002-01-01")] # to match QC backtest
+
+# Equity curve
+portfolio_xts = as.xts.data.table(portfolio[, .(date, strategy)])
+charts.PerformanceSummary(na.omit(portfolio_xts))
+charts.PerformanceSummary(na.omit(tail(portfolio_xts, 1000)))
+
+# Test
+# portfolio[date %between% c(as.IDate("2006-06-13"), as.IDate("2006-06-19"))]
+# prices_ipo[date %between% c(as.IDate("2006-06-13"), as.IDate("2006-06-19"))]
+
+
+# ARCHIVE -----------------------------------------------------------------
+# # PROFILES DATA -----------------------------------------------------------
+# # Get profiles data from FMP cloud
+# api_key = Sys.getenv("APIKEY-FMPCLOUD")
+# url = glue("https://financialmodelingprep.com/api/v4/profile/all?apikey={api_key}")
+# file_ = "F:/profiles.csv"
+# GET(url, write_disk(file_, overwrite = TRUE))
+# profiles_raw = fread(file_)
+# profiles_raw[Symbol == "ZBAO"]
+# profiles = profiles_raw[, .(symbol = tolower(Symbol), cik, date_ipo = ipoDate,
+#                             country, exchange, exchangeShortName)]
+# exchanges_keep = c("NASDAQ", "NYSE", "AMEX", "ETF")
+# profiles_us = profiles[exchangeShortName %in% exchanges_keep]
+#
+# # NASDAQ IPO DATA ---------------------------------------------------------
+# # Get NASDAQ IPO data. Takes some time to webacrape this data
+# nasdaq = Nasdaq$new()
+# ipo_nasdaq = nasdaq$ipo_calendar()
+# ipo_nasdaq_priced = copy(ipo_nasdaq$priced)
+# ipo_nasdaq_priced[, let(
+#   proposedSharePrice = as.numeric(proposedSharePrice),
+#   sharesOffered = as.numeric(gsub(",", "", sharesOffered)),
+#   date = as.Date(pricedDate, format = "%m/%d/%Y"),
+#   dollarValueOfSharesOffered = as.numeric(gsub(",|\\$", "", dollarValueOfSharesOffered)),
+#   dealStatus = NULL,
+#   symbol = tolower(proposedTickerSymbol),
+#   proposedTickerSymbol = NULL,
+#   pricedDate = NULL
+# )]
+# ipo_nasdaq_filed = copy(ipo_nasdaq$filed)
+# ipo_nasdaq_filed[, let(
+#   date = as.Date(filedDate, format = "%m/%d/%Y"),
+#   dollarValueOfSharesOffered = as.numeric(gsub(",|\\$", "", dollarValueOfSharesOffered)),
+#   symbol = tolower(proposedTickerSymbol),
+#   proposedTickerSymbol = NULL,
+#   filedDate = NULL
+# )]
+# ipo_nasdaq_filed = ipo_nasdaq_filed[date > as.Date("2016-06-01")]
+# setorder(ipo_nasdaq_filed, date)
+# ipo_nasdaq_filed[date == as.Date("2024-06-03")]
+# tail(ipo_nasdaq_filed, 20)
+#
+#
+# # IPO DATA ----------------------------------------------------------------
+# # Get IPO data
+# ipo_dt = read_parquet(path(PATH, "fundamentals", "ipo.parquet"))
+#
+# # Remove columns
+# ipo_dt[, all(filingDate == effectivenessDate)]
+# ipo_dt[filingDate != as.Date(acceptedDate)]
+# # form is constant; url not needed; effectivenessDate is the same as filingDate
+# ipo_dt[, let(form = NULL, url = NULL, effectivenessDate = NULL, date = filingDate, filingDate = NULL)]
+# ipo_dt[, symbol := tolower(symbol)]
+# ipo_dt[, .N, by = symbol][order(N, decreasing = TRUE)]
+# ipo_dt[, date_accepted := as.Date(acceptedDate)]
+#
+# # Merge with NASDAQ IPO data
+# ipo_dt[symbol == "brth"]
+# ipo_nasdaq_filed[symbol == "brth"]
+#
+# ipo_nasdaq_filed[ipo_dt, on = c("symbol")][!is.na(dealID)]
+# ipo_nasdaq_filed[ipo_dt, on = c("symbol", "date")][!is.na(dealID)]
+# ipo_nasdaq_filed[ipo_dt[, .(symbol, date = date_accepted)], on = c("symbol", "date")][!is.na(dealID)]
+# ipo_nasdaq_filed[ipo_dt[, .(symbol, date = date_accepted)], on = c("symbol")][!is.na(dealID)]
+
+# # SIGNAL ------------------------------------------------------------------
+# # Sample IPO symbols
+# prices_ipo = prices[symbol %in% profiles_us[, unique(symbol)]]
+#
+# # Merge IPO data to prices
+# prices_ipo = profiles_us[, .(symbol, date_ipo)][prices_ipo, on = c("symbol")]
+#
+# # Keep only data after the IPO date
+# prices_ipo = prices_ipo[, .SD[date >= date_ipo], by = "symbol"]
+#
+# # Checks
+# prices_ipo[, all(date_ipo == min(date)), by = symbol][, sum(V1) / nrow(.SD) * 100]
+# prices_ipo[, all(min(date) - date_ipo < 7), by = symbol][, sum(V1) / nrow(.SD) * 100]
+#
+# # Date can't be to far from ipo date
+# symbols_keep = prices_ipo[, all(min(date) - date_ipo < 7), by = symbol][V1 == TRUE, symbol]
+# prices_ipo = prices_ipo[symbol %in% symbols_keep]
+#
+# # Check some symbols
+# prices_ipo[symbol == "zbao"]
+# prices[symbol == "zbao"]
+#
+# # Create signal: check if symbol is on all time high
+# prices_ipo[, let(
+#   close_max = roll_max(close),
+#   signal = close == close_max
+# )]
+# prices_ipo = prices_ipo[signal == TRUE]
+# prices_ipo[, let(signal = NULL)]
diff --git a/R/strategy_momentum_btest.R b/R/strategy_momentum_btest.R
index 327f2ab..ca9852f 100644
--- a/R/strategy_momentum_btest.R
+++ b/R/strategy_momentum_btest.R
@@ -22,7 +22,7 @@ trade_details = function(bt.results, prices)
 
 # IMPORT DATA -------------------------------------------------------------
 # import hour data
-dt = fread("F:/lean_root/data/all_stocks_hour.csv")
+dt = fread("F:/lean/data/stocks_hour.csv")
 col = c("time", "open", "high", "low", "close", "volume", "close_adj", "symbol")
 setnames(dt, col)
 
@@ -69,6 +69,7 @@ symbols_ = dt[, unique(symbol)]
 "time" %in% symbols_
 "open" %in% symbols_
 "low" %in% symbols_
+dt = dt[symbol %notin% c("open", "low")]
 
 
 # BACKTEST WITH BTEST -----------------------------------------------------
@@ -124,6 +125,7 @@ active <- data.frame(instrument = colnames(P),
 active[instrument == "hrmn", ]
 active$end = floor_time(active$end, unit = "month") %m-% months(1) - days(1)
 
+
 # crate all envs again
 P = P[paste0("/", max(active$end), " 00:00:00")]
 timestamp <- index(P)
diff --git a/R/strategy_seasonality_portfolio.R b/R/strategy_seasonality_portfolio.R
new file mode 100644
index 0000000..caba3c8
--- /dev/null
+++ b/R/strategy_seasonality_portfolio.R
@@ -0,0 +1,72 @@
+library(data.table)
+library(qlcal)
+library(lubridate)
+library(AzureStor)
+
+
+# Set calendar
+calendars
+setCalendar("UnitedStates/NYSE")
+
+# Import results
+file = list.files("D:/features", pattern = "week", full.names = TRUE)
+seasonality_results = readRDS(file)
+setDT(seasonality_results)
+class(seasonality_results)
+dim(seasonality_results)
+colnames(seasonality_results)
+seasonality_results[[3]]
+
+# Portfolio 1:
+# 1) keep min Pr for every symbol
+# 2) keep top 10 for every month
+portfolio1 = melt(seasonality_results, id.vars = "symbol", variable.name = "date", value.name = "value")
+portfolio1 = na.omit(portfolio1)
+portfolio1 = portfolio1[, rbindlist(value), by = .(date, symbol)]
+portfolio1 = portfolio1[rn != "(Intercept)"]
+portfolio1[, minp := min(`Pr(>|t|)`) == `Pr(>|t|)`, by = .(date, symbol)]
+portfolio1 = portfolio1[minp == TRUE]
+setorderv(portfolio1, c("date", "Pr(>|t|)"))
+portfolio1 = portfolio1[, head(.SD, 100), by = date]
+
+# Clean portfolios
+portfolio_prepare = function(portfolio) {
+  # portfolio = copy(portfolio1)
+
+  # set trading dates
+  # portfolio[, date := as.Date(gsub("month", "", date), format = "%y%m%d")]
+  portfolio[, rn := gsub("day_of_month", "", rn)]
+
+  # get trading days
+  date_ = portfolio[, as.Date(paste0(gsub("month", "", date), "01"), format = "%y%m%d")]
+  seq_ = 1:nrow(portfolio)
+  seq_dates = lapply(date_, function(x) getBusinessDays(x, x %m+% months(1) - 1))
+  dates = mapply(function(x, y) x[y], x = seq_dates, y = portfolio[, as.integer(rn)])
+  portfolio[, dates_trading := as.Date(dates, origin = "1970-01-01")]
+  portfolio
+}
+portfolio1 = portfolio_prepare(portfolio1)
+# portfolio2 = portfolio_prepare(portfolio2)
+# portfolio3 = portfolio_prepare(portfolio3)
+
+# save to Azure for backtesting
+save_qc = function(portfolio, file_name) {
+  portfoliosqc = portfolio[, .(dates_trading, symbol, rn, Value)]
+  setorder(portfoliosqc, dates_trading)
+  portfoliosqc = na.omit(portfoliosqc)
+  setnames(portfoliosqc, "dates_trading", "date")
+  portfoliosqc = portfoliosqc[, .(symbol = paste0(symbol, collapse = "|"),
+                                  rn     = paste0(rn, collapse = "|"),
+                                  value  = paste0(Value, collapse = "|")),
+                              by = date]
+  portfoliosqc[, date := as.character(date)]
+  portfoliosqc = na.omit(portfoliosqc)
+  blob_key = "0M4WRlV0/1b6b3ZpFKJvevg4xbC/gaNBcdtVZW+zOZcRi0ZLfOm1v/j2FZ4v+o8lycJLu1wVE6HT+ASt0DdAPQ=="
+  endpoint = "https://snpmarketdata.blob.core.windows.net/"
+  BLOBENDPOINT = storage_endpoint(endpoint, key=blob_key)
+  cont = storage_container(BLOBENDPOINT, "qc-backtest")
+  storage_write_csv(portfoliosqc, cont, file_name)
+}
+save_qc(portfolio1, "seasons-portfolio1.csv")
+# save_qc(portfolio2, "seasons-portfolio2.csv")
+# save_qc(portfolio3, "seasons-portfolio3.csv")
diff --git a/R/strategy_vse4ts.R b/R/strategy_vse4ts.R
new file mode 100644
index 0000000..4802778
--- /dev/null
+++ b/R/strategy_vse4ts.R
@@ -0,0 +1,209 @@
+library(data.table)
+library(vse4ts)
+library(duckdb)
+library(lubridate)
+library(ggplot2)
+library(PerformanceAnalytics)
+library(TTR)
+
+
+# SET UP ------------------------------------------------------------------
+# Globals
+QCDATA  = "F:/lean/data"
+DATA    = "F:/data"
+RESULTS = "F:/strategies/vse4ts"
+
+
+# DATA --------------------------------------------------------------------
+# Import hour data for specific symbols
+con = dbConnect(duckdb::duckdb())
+path_ = file.path(QCDATA, "stocks_hour.csv")
+symbols = c("spy", "aapl")
+symbols_string = paste(sprintf("'%s'", symbols), collapse=", ")
+query = sprintf("
+  SELECT *
+  FROM '%s'
+  WHERE Symbol IN (%s)
+", path_, symbols_string)
+ohlcvh = dbGetQuery(con, query)
+dbDisconnect(con, shutdown = TRUE)
+
+# Clean hour data
+setDT(ohlcvh)
+col = c("date", "open", "high", "low", "close", "volume", "close_adj", "symbol")
+setnames(ohlcvh, col)
+ohlcvh = unique(ohlcvh, by = c("symbol", "date"))
+unadjustd_cols = c("open", "high", "low")
+ohlcvh[, (unadjustd_cols) := lapply(.SD, function(x) (close_adj / close) * x),
+       .SDcols = unadjustd_cols]
+ohlcvh = na.omit(ohlcvh)
+setorder(ohlcvh, symbol, date)
+setnames(ohlcvh, c("close", "close_adj"), c("close_raw", "close"))
+ohlcvh = ohlcvh[open > 0.00003 & high > 0.00003 & low > 0.00003 & close > 0.00003]
+ohlcvh[, dollar_volume := close_raw * volume]
+ohlcvh[, date := force_tz(date, "America/New_York")]
+
+# Calculate returns
+ohlcvh[, returns := close / shift(close) - 1, by = .(symbol)]
+
+# Remove NA values
+ohlcvh = na.omit(ohlcvh)
+
+
+# ANALYSIS ----------------------------------------------------------------
+# Calculate indicators
+ohlcvh[, vse := frollapply(returns, 66 * 7, vse), by = symbol]
+ohlcvh[, wnoise := frollapply(returns, 66 * 7, function(x) Wnoise.test(x)[["p.value"]]),
+       by = symbol]
+ohlcvh[, slm := frollapply(returns, 66 * 7, function(x) SLmemory.test(x)[["p.value"]]),
+       by = symbol]
+
+# Summary of indicators
+sample_symbol = "spy"
+plot(ohlcvh[symbol == sample_symbol, vse])
+plot(ohlcvh[symbol == sample_symbol, wnoise])
+plot(ohlcvh[symbol == sample_symbol, slm])
+ohlcvh[, .(
+  min_vse = min(vse, na.rm = TRUE),
+  max_vse = max(vse, na.rm = TRUE),
+  mean_vse = mean(vse, na.rm = TRUE),
+  sd_vse = sd(vse, na.rm = TRUE)
+)]
+ohlcvh[, .(
+  min_vse = min(wnoise, na.rm = TRUE),
+  max_vse = max(wnoise, na.rm = TRUE),
+  mean_vse = mean(wnoise, na.rm = TRUE),
+  sd_vse = sd(wnoise, na.rm = TRUE)
+)]
+ohlcvh[, .(
+  min_vse = min(slm, na.rm = TRUE),
+  max_vse = max(slm, na.rm = TRUE),
+  mean_vse = mean(slm, na.rm = TRUE),
+  sd_vse = sd(slm, na.rm = TRUE)
+)]
+plot(ohlcvh[symbol == sample_symbol, SMA(vse, 7)])
+plot(ohlcvh[symbol == sample_symbol, SMA(wnoise, 7)])
+plot(ohlcvh[symbol == sample_symbol, SMA(slm, 7)])
+
+# Save to Azure blob to backtest in Quantconnect for one symbol
+symbol_ = "spy"
+qc_data = ohlcvh[, .(date, vse)]
+qc_data[, date := as.character(date)]
+bl_endp_key = storage_endpoint(Sys.getenv("BLOB-ENDPOINT-SNP"), Sys.getenv("BLOB-KEY-SNP"))
+cont = storage_container(bl_endp_key, "qc-backtest")
+storage_write_csv(qc_data, cont, "vse_spy.csv", col_names = FALSE)
+
+# Make lag vse
+ohlcvh[, vse_lag_1 := shift(vse, 1, type = "lag"), by = .(symbol)]
+ohlcvh[, vse_lag_2 := shift(vse, 2, type = "lag"), by = .(symbol)]
+ohlcvh[, wnoise_lag_1 := shift(wnoise, 1, type = "lag"), by = .(symbol)]
+ohlcvh[, wnoise_lag_2 := shift(wnoise, 2, type = "lag"), by = .(symbol)]
+ohlcvh[, wnoise_lag_1 := shift(slm, 1, type = "lag"), by = .(symbol)]
+ohlcvh[, wnoise_lag_2 := shift(slm, 2, type = "lag"), by = .(symbol)]
+
+# Create target variable
+ohlcvh[, target_hour := shift(close, 1, type = "lead") / close - 1, by = .(symbol)]
+ohlcvh[, target_day := shift(close, 8, type = "lead") / close - 1, by = .(symbol)]
+ohlcvh[, target_week := shift(close, 35, type = "lead") / close - 1, by = .(symbol)]
+ohlcvh[, target_month := shift(close, 150, type = "lead") / close - 1, by = .(symbol)]
+
+# Sample data for one symbol
+symbol_ = "spy"
+dts = ohlcvh[symbol == symbol_]
+
+# Scaterplots for x = vse and y = target
+ggplot(na.omit(dts), aes(x = vse, y = target_hour)) +
+  geom_point() +
+  geom_smooth(method = "lm", se = FALSE)
+ggplot(na.omit(dts), aes(x = vse, y = target_day)) +
+  geom_point() +
+  geom_smooth(method = "lm", se = FALSE)
+ggplot(na.omit(dts), aes(x = vse, y = target_week)) +
+  geom_point() +
+  geom_smooth(method = "lm", se = FALSE)
+ggplot(na.omit(dts), aes(x = vse, y = target_month)) +
+  geom_point() +
+  geom_smooth(method = "lm", se = FALSE)
+ggplot(na.omit(dts), aes(x = wnoise, y = target_hour)) +
+  geom_point() +
+  geom_smooth(method = "lm", se = FALSE)
+ggplot(na.omit(dts), aes(x = wnoise, y = target_day)) +
+  geom_point() +
+  geom_smooth(method = "lm", se = FALSE)
+ggplot(na.omit(dts), aes(x = wnoise, y = target_week)) +
+  geom_point() +
+  geom_smooth(method = "lm", se = FALSE)
+ggplot(na.omit(dts), aes(x = wnoise, y = target_month)) +
+  geom_point() +
+  geom_smooth(method = "lm", se = FALSE)
+ggplot(na.omit(dts), aes(x = slm, y = target_hour)) +
+  geom_point() +
+  geom_smooth(method = "lm", se = FALSE)
+ggplot(na.omit(dts), aes(x = slm, y = target_day)) +
+  geom_point() +
+  geom_smooth(method = "lm", se = FALSE)
+ggplot(na.omit(dts), aes(x = slm, y = target_week)) +
+  geom_point() +
+  geom_smooth(method = "lm", se = FALSE)
+ggplot(na.omit(dts), aes(x = slm, y = target_month)) +
+  geom_point() +
+  geom_smooth(method = "lm", se = FALSE)
+
+# Scaterplots for x = vse and y = target for bins of vse
+bin_mean_returns = function(dt,
+                            number_of_bins = 20,
+                            target_var = "target_week",
+                            vse_var = "vse",
+                            plot = TRUE) {
+  labels = paste0("vse_", 1:number_of_bins)
+  dt[, vse_bin := cut(vse,
+                      breaks = number_of_bins,
+                      labels = labels,
+                      include.lowest = TRUE)]
+  dts_binds_results = dt[, .(mean_target = mean(get(target_var), na.rm = TRUE) * 100),
+                        by = .(vse_bin)]
+  dts_binds_results[, vse_bin_int := as.integer(gsub("vse_", "", vse_bin))]
+  setorder(dts_binds_results, vse_bin_int)
+
+  if (isTRUE(plot)) {
+    p = ggplot(dts_binds_results, aes(x = vse_bin_int, y = mean_target)) +
+      geom_point() +
+      geom_smooth(method = "lm", se = FALSE)
+    return(p)
+  } else {
+    return(dts_binds_results)
+  }
+}
+
+# Sample plots
+bin_mean_returns(dts, 40, "target_hour")
+bin_mean_returns(dts, 40, "target_day")
+bin_mean_returns(dts, 40, "target_week")
+bin_mean_returns(dts, 40, "target_month")
+bin_mean_returns(dts, 40, "target_hour", vse_var = "wnoise")
+bin_mean_returns(dts, 40, "target_day", vse_var = "wnoise")
+bin_mean_returns(dts, 40, "target_week", vse_var = "wnoise")
+bin_mean_returns(dts, 40, "target_month", vse_var = "wnoise")
+bin_mean_returns(dts, 40, "target_hour", vse_var = "slm")
+bin_mean_returns(dts, 40, "target_day", vse_var = "slm")
+bin_mean_returns(dts, 40, "target_week", vse_var = "slm")
+bin_mean_returns(dts, 40, "target_month", vse_var = "slm")
+
+# Simple dirty backtest
+number_of_bins = 40
+labels = paste0("vse_", 1:number_of_bins)
+dts[, vse_bin := cut(slm,
+                     breaks = number_of_bins,
+                     labels = labels,
+                     include.lowest = TRUE)]
+dts[, vse_bin_char := as.character(vse_bin)]
+dts[, unique(vse_bin_char)]
+dts[, .N, by = vse_bin_char]
+vse_bin_buy = paste0("vse_", 1:3)
+dts[, signal := 0]
+dts[vse_bin_char %in% vse_bin_buy, signal := 1]
+dts[, strategy := signal * returns]
+r = as.xts.data.table(na.omit(dts[, .(date, strategy, benchmark = returns)]))
+Return.cumulative(r)
+SharpeRatio(r)
+charts.PerformanceSummary(r)
diff --git a/R/strategy_vse4ts_spy.R b/R/strategy_vse4ts_spy.R
new file mode 100644
index 0000000..70a4440
--- /dev/null
+++ b/R/strategy_vse4ts_spy.R
@@ -0,0 +1,151 @@
+library(data.table)
+library(arrow)
+library(lubridate)
+library(ggplot2)
+
+
+# PRICE DATA --------------------------------------------------------------
+# There duplicates that are hard to identify because they have different symbols.
+# For example symbols lmb and lmb.1 have same data (same date and same OHLCV)
+# We should remove one of thoe symbols. We will remove the one with the number.
+# We use daily dat here because is consumes lots of RAM to use hour data directly.
+get_symbols_to_remove = function() {
+  prices = fread("F:/lean/data/stocks_daily.csv")
+  setnames(prices, gsub(" ", "_", c(tolower(colnames(prices)))))
+  dups = prices[, .(symbol , n = .N),
+                by = .(date, open, high, low, close, volume, adj_close,
+                       symbol_first = substr(symbol, 1, 1))]
+  dups = dups[n > 1]
+  dups[, symbol_short := gsub("\\.\\d$", "", symbol)]
+  symbols_remove = dups[, .(symbol, n = .N),
+                        by = .(date, open, high, low, close, volume, adj_close,
+                               symbol_short)]
+  symbols_remove[n >= 2, unique(symbol)]
+  symbols_remove = symbols_remove[n >= 2, unique(symbol)]
+  symbols_remove = symbols_remove[grepl("\\.", symbols_remove)]
+  return(symbols_remove)
+}
+symbols_remove = get_symbols_to_remove()
+
+# Import QC hourly data
+prices = fread("F:/lean/data/stocks_hour.csv")
+
+# Fix column names
+setnames(prices, gsub(" ", "_", c(tolower(colnames(prices)))))
+
+# Remove duplicates by symbol and date
+prices = unique(prices, by = c("symbol", "date"))
+
+# Remove duplicates we calculated above
+prices = prices[symbol %notin% symbols_remove]
+
+# Adjust all columns for splits and dividends
+prices[, adj_rate := adj_close / close]
+prices[, let(
+  open = open*adj_rate,
+  high = high*adj_rate,
+  low = low*adj_rate
+)]
+setnames(prices, "close", "close_raw")
+setnames(prices, "adj_close", "close")
+prices[, let(adj_rate = NULL)]
+setcolorder(prices, c("symbol", "date", "open", "high", "low", "close", "volume"))
+
+# Remove observations where open, high, low, close columns are below 1e-008
+prices = prices[open > 1e-008 & high > 1e-008 & low > 1e-008 & close > 1e-008]
+
+# Remove missing values
+prices = na.omit(prices)
+
+# Check timezone - for Quantconenct data should br NY tz, but R automaticly
+# converts it to UTC
+prices[, attr(date, "tz")]
+prices[symbol == "aapl", unique(date)]
+prices[, date := force_tz(date, "America/New_York")]
+
+# Keep only symbol with at least 2 years of data
+symbol_keep = prices[, .N, symbol][N >= 2 * 252 * 7, symbol]
+prices = prices[symbol %chin% symbol_keep]
+
+# Sort
+setorder(prices, symbol, date)
+
+# Keep SPY data
+spy = prices[symbol == "spy"]
+
+# free memory
+gc()
+
+
+# FILTERING ---------------------------------------------------------------
+# Filter by monthly dollar volume
+# keep 100 most liquid at every date
+# 200 by volume => cca xx mil rows
+# 100 by volume => cca 13 mil rows
+# 50 by volume => cca xx mil rows
+prices[, dollar_volume := close * volume]
+setorder(prices, date, -dollar_volume)
+liquid_symbols = prices[, .(symbol = first(symbol, 100)),
+                        by = .(ym = yearmon(date))]
+liquid_symbols = liquid_symbols[, unique(symbol)]
+sprintf("We keep %f percent of data",
+        length(liquid_symbols) / prices[, length(unique(symbol))] * 100)
+prices = prices[symbol %chin% liquid_symbols]
+prices[, dollar_volume := NULL]
+
+# free memory
+gc()
+
+
+# MINMAX INDICATORS -------------------------------------------------------
+# Calculate returns
+setorder(prices, symbol, date)
+prices[, returns := close / shift(close) - 1, by = "symbol"]
+
+# Keep only columns we need
+prices = prices[, .(symbol, date, close, close_raw, volume, returns)]
+
+# free memory
+gc()
+
+# calculate rolling quantiles
+prices[, p_999_4year    := roll::roll_quantile(returns, 255*7*4, p = 0.999), by = .(symbol)]
+prices[, p_001_4year    := roll::roll_quantile(returns, 255*7*4, p = 0.001), by = .(symbol)]
+prices[, p_999_2year    := roll::roll_quantile(returns, 255*7*2, p = 0.999), by = .(symbol)]
+prices[, p_001_2year    := roll::roll_quantile(returns, 255*7*2, p = 0.001), by = .(symbol)]
+prices[, p_999_year     := roll::roll_quantile(returns, 255*7, p = 0.999), by = .(symbol)]
+prices[, p_001_year     := roll::roll_quantile(returns, 255*7, p = 0.001), by = .(symbol)]
+prices[, p_999_halfyear := roll::roll_quantile(returns, 255*4, p = 0.999), by = .(symbol)]
+prices[, p_001_halfyear := roll::roll_quantile(returns, 255*4, p = 0.001), by = .(symbol)]
+
+prices[, p_99_4year    := roll::roll_quantile(returns, 255*7*4, p = 0.99), by = .(symbol)]
+prices[, p_01_4year    := roll::roll_quantile(returns, 255*7*4, p = 0.01), by = .(symbol)]
+prices[, p_99_2year    := roll::roll_quantile(returns, 255*7*2, p = 0.99), by = .(symbol)]
+prices[, p_01_2year    := roll::roll_quantile(returns, 255*7*2, p = 0.01), by = .(symbol)]
+prices[, p_99_year     := roll::roll_quantile(returns, 255*7, p = 0.99), by = .(symbol)]
+prices[, p_01_year     := roll::roll_quantile(returns, 255*7, p = 0.01), by = .(symbol)]
+prices[, p_99_halfyear := roll::roll_quantile(returns, 255*4, p = 0.99), by = .(symbol)]
+prices[, p_01_halfyear := roll::roll_quantile(returns, 255*4, p = 0.01), by = .(symbol)]
+
+prices[, p_97_4year    := roll::roll_quantile(returns, 255*7*4, p = 0.97), by = .(symbol)]
+prices[, p_03_4year    := roll::roll_quantile(returns, 255*7*4, p = 0.03), by = .(symbol)]
+prices[, p_97_2year    := roll::roll_quantile(returns, 255*7*2, p = 0.97), by = .(symbol)]
+prices[, p_03_2year    := roll::roll_quantile(returns, 255*7*2, p = 0.03), by = .(symbol)]
+prices[, p_97_year     := roll::roll_quantile(returns, 255*7, p = 0.97), by = .(symbol)]
+prices[, p_03_year     := roll::roll_quantile(returns, 255*7, p = 0.03), by = .(symbol)]
+prices[, p_97_halfyear := roll::roll_quantile(returns, 255*4, p = 0.97), by = .(symbol)]
+prices[, p_03_halfyear := roll::roll_quantile(returns, 255*4, p = 0.03), by = .(symbol)]
+
+prices[, p_95_4year    := roll::roll_quantile(returns, 255*7*4, p = 0.95), by = .(symbol)]
+prices[, p_05_4year    := roll::roll_quantile(returns, 255*7*4, p = 0.05), by = .(symbol)]
+prices[, p_95_2year    := roll::roll_quantile(returns, 255*7*2, p = 0.95), by = .(symbol)]
+prices[, p_05_2year    := roll::roll_quantile(returns, 255*7*2, p = 0.05), by = .(symbol)]
+prices[, p_95_year     := roll::roll_quantile(returns, 255*7, p = 0.95), by = .(symbol)]
+prices[, p_05_year     := roll::roll_quantile(returns, 255*7, p = 0.05), by = .(symbol)]
+prices[, p_95_halfyear := roll::roll_quantile(returns, 255*4, p = 0.95), by = .(symbol)]
+prices[, p_05_halfyear := roll::roll_quantile(returns, 255*4, p = 0.05), by = .(symbol)]
+
+# save market data
+time_ = format(Sys.Date(), format = "%Y%m%d")
+file_name = file.path("F:/predictors/minmax", paste0(time_,".csv"))
+fwrite(prices, file_name)
diff --git a/backtest.cpp b/backtest.cpp
index 9e57eed..c00d274 100644
--- a/backtest.cpp
+++ b/backtest.cpp
@@ -20,6 +20,24 @@ double backtest_cpp_gpt(NumericVector returns, NumericVector indicator, double t
   return cum_returns - 1; // Adjust for initial value
 }
 
+// [[Rcpp::export]]
+double backtest_sell_below_threshold(NumericVector returns, NumericVector indicator, double threshold) {
+  int n = indicator.size();
+  NumericVector sides(n, 1.0); // Initialize with 1s
+
+  for(int z = 1; z < n; ++z) { // Start from 1 since we look back one period
+    if(!NumericVector::is_na(indicator[z-1]) && indicator[z-1] < threshold) {
+      sides[z] = 0;
+    }
+  }
+
+  double cum_returns = 1.0; // Start cumulative returns at 1
+  for(int z = 0; z < n; ++z) {
+    cum_returns *= (1 + returns[z] * sides[z]);
+  }
+  return cum_returns - 1; // Adjust for initial value
+}
+
 DataFrame cbind_scalar_and_df(double x, DataFrame df) {
   // Get the number of rows in the DataFrame
   int n = df.nrows();
@@ -109,6 +127,54 @@ NumericVector opt(DataFrame df, DataFrame params) {
   return results;
 }
 
+// [[Rcpp::export]]
+NumericVector calculate_sma(NumericVector x, int n) {
+  int size = x.size();
+  NumericVector sma(size);
+  double sum = 0.0;
+
+  for(int i = 0; i < size; i++) {
+    sum += x[i];
+    if (i >= n) {
+      sum -= x[i - n];
+    }
+    if (i >= n - 1) {
+      sma[i] = sum / n;
+    } else {
+      sma[i] = NA_REAL;  // Not enough data points to calculate SMA
+    }
+  }
+
+  return sma;
+}
+
+// [[Rcpp::export]]
+NumericVector opt_with_sma(DataFrame df, DataFrame params) {
+
+  // Define help variables
+  int n_params = params.nrow();
+  NumericVector returns = df["returns"];
+  CharacterVector indicators = params["variable"];
+  NumericVector thresholds = params["thresholds"];
+  NumericVector sma_n = params["sma_n"];
+
+  // return 1;
+  // Loop through all params rows and calculate the backtest results
+  NumericVector results(n_params);
+  for(int i = 0; i < n_params; ++i) {
+    String ind_ = indicators[i];
+    NumericVector indicator_ = df[ind_];
+    int sma_n_ = sma_n[i];
+
+    // Calculate SMA for the current indicator
+    NumericVector sma_indicator_ = calculate_sma(indicator_, sma_n_);
+
+    results[i] = backtest_sell_below_threshold(returns, sma_indicator_, thresholds[i]);
+  }
+
+  return results;
+}
+
 // [[Rcpp::export]]
 List wfo(DataFrame df,
          DataFrame params,
@@ -160,3 +226,57 @@ List wfo(DataFrame df,
 
   return results;
 }
+
+// [[Rcpp::export]]
+List wfo_with_sma(DataFrame df,
+                  DataFrame params,
+                  int window,
+                  std::string window_type = "rolling") {
+
+  // Define help variables
+  int n = df.nrow();
+  NumericVector time = df["time"];
+  // NumericVector returns = df["returns"];
+  // CharacterVector indicators = params["variable"];
+  // NumericVector thresholds = params["thresholds"];
+  // NumericVector sma_n = params["sma_n"];
+
+  // Check if window size is valid relative to data size
+  if (window > n) {
+    stop("Window size exceeds available data.");
+  }
+
+  List results;
+  if (window_type == "expanding") {
+    results = List(n - window + 1);  // Start at the minimum window size
+  } else {
+    results = List(n - window + 1);
+  }
+
+  // Loop over data
+  for(int i = 0; i < results.size(); ++i) {
+
+    DataFrame df_window;
+
+    // Before processing each window, ensure the indices are valid
+    if (i + window - 1 < df.nrow()) {
+      if (window_type == "expanding") {
+        df_window = sliceDataFrameColumnWise(df, 0, i + window - 1);
+      } else {  // "rolling"
+        df_window = sliceDataFrameColumnWise(df, i, i + window - 1);
+      }
+    } else {
+      Rcpp::Rcout << "Skipping window " << i << " due to index out of range." << std::endl;
+      continue;
+    }
+
+    NumericVector sr = opt_with_sma(df_window, params);
+
+    // Bind time and results
+    int time_index = (window_type == "expanding") ? i + window - 1 : i + window - 1;
+    results[i] = bind_scalar_and_vector_to_df(time[time_index], sr);
+  }
+
+  return results;
+}
+
diff --git a/commits_2024_04_alphar.txt b/commits_2024_04_alphar.txt
index 46576c4..a64485a 100644
Binary files a/commits_2024_04_alphar.txt and b/commits_2024_04_alphar.txt differ
diff --git a/commits_2024_05_alphar.txt b/commits_2024_05_alphar.txt
new file mode 100644
index 0000000..ba6a7d8
Binary files /dev/null and b/commits_2024_05_alphar.txt differ