From e6a77518d6e498cb6ecfabc245703fec67a69689 Mon Sep 17 00:00:00 2001 From: marcvanduyn Date: Thu, 14 Nov 2024 22:10:28 +0100 Subject: [PATCH] Fix flake8 issues --- .gitignore | 1 + .../deployment/azure/azure_functions.py | 26 +- .../domain/__init__.py | 5 - .../domain/graphs.py | 418 ------------------ .../domain/models/date_range.py | 27 +- .../domain/services/market_data_sources.py | 80 ++-- .../domain/utils/polars.py | 25 +- .../indicators/advanced.py | 19 +- .../indicators/trend.py | 203 ++++++--- .../models/market_data_sources/ccxt.py | 62 +-- .../models/market_data_sources/pandas.py | 22 +- 11 files changed, 301 insertions(+), 587 deletions(-) delete mode 100644 investing_algorithm_framework/domain/graphs.py diff --git a/.gitignore b/.gitignore index 19a92065..f80c3c6e 100644 --- a/.gitignore +++ b/.gitignore @@ -148,3 +148,4 @@ bumpversion.egg-info/ **/backtest_data/* */backtest_reports/ **/backtest_reports/* +.vscode/ \ No newline at end of file diff --git a/investing_algorithm_framework/deployment/azure/azure_functions.py b/investing_algorithm_framework/deployment/azure/azure_functions.py index c98b2cdd..a02a49f7 100644 --- a/investing_algorithm_framework/deployment/azure/azure_functions.py +++ b/investing_algorithm_framework/deployment/azure/azure_functions.py @@ -4,16 +4,18 @@ from azure.mgmt.resource import ResourceManagementClient from azure.mgmt.storage import StorageManagementClient from azure.mgmt.web import WebSiteManagementClient -from zipfile import ZipFile import shutil + def deploy_to_azure_functions(azure_credentials_json, azure_function_path): """ This function deploys a Python function app to Azure Functions. Parameters: - - azure_credentials_json (str): Path to the Azure credentials JSON file. - - azure_function_path (str): Path to the Python function app directory. + - azure_credentials_json (str): Path to the Azure credentials + JSON file. + - azure_function_path (str): Path to the Python function + app directory. Returns: None @@ -28,7 +30,6 @@ def deploy_to_azure_functions(azure_credentials_json, azure_function_path): LOCATION = "eastus" STORAGE_ACCOUNT_NAME = "mystorageaccount123" FUNCTION_APP_NAME = "my-python-function-app" - APP_SERVICE_PLAN_NAME = "myAppServicePlan" # Authenticate using DefaultAzureCredential credential = DefaultAzureCredential() @@ -56,13 +57,22 @@ def deploy_to_azure_functions(azure_credentials_json, azure_function_path): # Create Function App (with a Consumption Plan) site_config = { "location": LOCATION, - "server_farm_id": f"/subscriptions/{SUBSCRIPTION_ID}/resourceGroups/{RESOURCE_GROUP_NAME}/providers/Microsoft.Web/serverfarms/{APP_SERVICE_PLAN_NAME}", + "server_farm_id": f"/subscriptions/{SUBSCRIPTION_ID}" + + "/resourceGroups" + + "/{RESOURCE_GROUP_NAME}/providers/Microsoft.Web/" + + "serverfarms/{APP_SERVICE_PLAN_NAME}", "reserved": True, # This is necessary for Linux-based function apps "site_config": { "app_settings": [ - {"name": "FUNCTIONS_WORKER_RUNTIME", "value": "python"}, - {"name": "AzureWebJobsStorage", - "value": f"DefaultEndpointsProtocol=https;AccountName={STORAGE_ACCOUNT_NAME};AccountKey="}, + { + "name": "FUNCTIONS_WORKER_RUNTIME", "value": "python" + }, + { + "name": "AzureWebJobsStorage", + "value": "DefaultEndpointsProtocol=https;" + \ + f"AccountName={STORAGE_ACCOUNT_NAME}" + \ + ";AccountKey=account_key>", + } ] }, "kind": "functionapp", diff --git a/investing_algorithm_framework/domain/__init__.py b/investing_algorithm_framework/domain/__init__.py index c8dbb8e6..68b294b0 100644 --- a/investing_algorithm_framework/domain/__init__.py +++ b/investing_algorithm_framework/domain/__init__.py @@ -30,7 +30,6 @@ load_backtest_report, convert_polars_to_pandas, \ csv_to_list, StoppableThread, pretty_print_backtest_reports_evaluation, \ pretty_print_backtest, load_csv_into_dict, load_backtest_reports -from .graphs import create_prices_graph, create_ema_graph, create_rsi_graph, create_line_graph from .metrics import get_price_efficiency_ratio __all__ = [ @@ -116,11 +115,7 @@ "RoundingService", "BacktestDateRange", "load_backtest_report", - "create_prices_graph", - "create_ema_graph", - "create_rsi_graph", "get_price_efficiency_ratio", - "create_line_graph", "convert_polars_to_pandas", "DateRange" ] diff --git a/investing_algorithm_framework/domain/graphs.py b/investing_algorithm_framework/domain/graphs.py deleted file mode 100644 index b68a805a..00000000 --- a/investing_algorithm_framework/domain/graphs.py +++ /dev/null @@ -1,418 +0,0 @@ -import pandas as pd -import plotly.graph_objs as go - - -def create_rsi_graph(data: pd.DataFrame): - """ - Create a graph for the RSI metric. - :param data: DataFrame with a 'RSI' column and a Datetime index - :return: Plotly graph object - """ - - # Check if the index is of type datetime - if not isinstance(data.index, pd.DatetimeIndex): - raise ValueError("The index of the data should be of type datetime") - - # Check if the 'RSI' column exists - if 'RSI' not in data.columns: - raise ValueError("The data should have a 'RSI' column") - - return go.Scatter( - x=data.index, - y=data['RSI'], - mode='lines', - line=dict(color="green", width=1), - name="RSI" - ) - - -def create_prices_graph( - data: pd.DataFrame, - data_key="Close", - graph_name="Price", - color="blue", - line_width=1 -): - """ - Create a graph for the close prices. By default, the key is set to 'Close'. - - Args: - data (pd.DataFrame): The data to plot - data_key (str): The key to use for the prices - graph_name (str): The name of the graph - color (str): The color of the graph - line_width (int): The width of the line - - Returns: - go.Scatter: The Plotly graph object - """ - - # Check if the index is of type datetime - if not isinstance(data.index, pd.DatetimeIndex): - raise ValueError("The index of the data should be of type datetime") - - # Check if the 'Close' column exists - if data_key not in data.columns: - raise ValueError("The data should have a 'Close' column") - - return go.Scatter( - x=data.index, - y=data[data_key], - mode='lines', - line=dict(color=color, width=line_width), - name=graph_name - ) - -def create_line_graph( - data: pd.DataFrame, - data_key="Close", - graph_name="Line", - color="blue", - line_width=1 -): - """ - Create a graph for the close prices. By default, the key is set to 'Close'. - - Args: - data (pd.DataFrame): The data to plot - data_key (str): The key to use for the prices - graph_name (str): The name of the graph - color (str): The color of the graph - line_width (int): The width of the line - - Returns: - go.Scatter: The Plotly graph object - """ - - # Check if the index is of type datetime - if not isinstance(data.index, pd.DatetimeIndex): - raise ValueError("The index of the data should be of type datetime") - - # Check if the data key column exists - if data_key not in data.columns: - raise ValueError(f"The data should have a '{data_key}' column") - - return go.Scatter( - x=data.index, - y=data[data_key], - mode='lines', - line=dict(color=color, width=line_width), - name=graph_name - ) - -def create_adx_graph(data: pd.DataFrame): - """ - Create a graph for the ADX metric. - :param data: DataFrame with a 'ADX' column and a Datetime index - :return: Plotly graph object - """ - - # Check if the index is of type datetime - if not isinstance(data.index, pd.DatetimeIndex): - raise ValueError("The index of the data should be of type datetime") - - # Check if the 'ADX' column exists - if 'ADX' not in data.columns: - raise ValueError("The data should have a 'ADX' column") - - return go.Scatter( - x=data.index, - y=data['ADX'], - mode='lines', - line=dict(color="green", width=1), - name="ADX" - ) - - -def create_di_plus_graph(data: pd.DataFrame): - """ - Create a graph for the DI+ metric. - :param data: DataFrame with a '+DI' column and a Datetime index - :return: Plotly graph object - """ - - # Check if the index is of type datetime - if not isinstance(data.index, pd.DatetimeIndex): - raise ValueError("The index of the data should be of type datetime") - - # Check if the '+DI' column exists - if '+DI' not in data.columns: - raise ValueError("The data should have a '+DI' column") - - return go.Scatter( - x=data.index, - y=data['+DI'], - mode='lines', - line=dict(color="orange", width=1), - name="+DI" - ) - - -def create_di_minus_graph(data: pd.DataFrame): - """ - Create a graph for the DI- metric. - :param data: DataFrame with a '-DI' column and a Datetime index - :return: Plotly graph object - """ - - # Check if the index is of type datetime - if not isinstance(data.index, pd.DatetimeIndex): - raise ValueError("The index of the data should be of type datetime") - - # Check if the '-DI' column exists - if '-DI' not in data.columns: - raise ValueError("The data should have a '-DI' column") - - return go.Scatter( - x=data.index, - y=data['-DI'], - mode='lines', - line=dict(color="purple", width=1), - name="-DI" - ) - - -def create_di_plus_di_minus_crossover_graph(data: pd.DataFrame): - """ - Create a graph for the DI- and DI+ crossover. - """ - - # Check if the index is of type datetime - if not isinstance(data.index, pd.DatetimeIndex): - raise ValueError("The index of the data should be of type datetime") - - # Check if the '-DI' and '+DI' columns exist - if '-DI' not in data.columns or '+DI' not in data.columns: - raise ValueError("The data should have a '-DI' and '+DI' column") - - # Get all crossover indexes - crossover_index = data[(data['+DI'] < data['-DI']) & - (data['+DI'].shift(1) > data['-DI'].shift(1))].index - - # Use .loc to get the corresponding 'Close' values - crossover_close_values = data.loc[crossover_index, '+DI'] - - return go.Scatter( - x=crossover_index, - y=crossover_close_values, - mode='markers', - marker=dict(symbol='circle', size=10, color='blue'), - name='DI- DI+ Crossover' - ) - - -def create_ema_graph(data: pd.DataFrame, key, color="blue"): - # Check if the index is of type datetime - if not isinstance(data.index, pd.DatetimeIndex): - raise ValueError("The index of the data should be of type datetime") - - # Check if the key columns exist - if key not in data.columns: - raise ValueError(f"The data should have a {key} column") - - return go.Scatter( - x=data.index, - y=data[key], - mode='lines', - line=dict(color=color, width=1), - name=key - ) - - -def create_crossover_graph(data: pd.DataFrame, key_one, key_two, color="blue"): - # Check if the index is of type datetime - if not isinstance(data.index, pd.DatetimeIndex): - raise ValueError("The index of the data should be of type datetime") - - # Check if the key columns exist - if key_one not in data.columns or key_two not in data.columns: - raise ValueError(f"The data should have a {key_one} " - f"and {key_two} column") - - # Get all crossover indexes - crossover_index = data[ - (data[key_one] <= data[key_two]) & - (data[key_one].shift(1) >= data[key_two].shift(1)) - ].index - - # Use .loc to get the corresponding 'Close' values - crossover_close_values = data.loc[crossover_index, key_one] - - return go.Scatter( - x=crossover_index, - y=crossover_close_values, - mode='markers', - marker=dict(symbol='circle', size=10, color=color), - name=f'{key_one} {key_two} Crossover' - ) - - -def create_peaks_chart(data: pd.DataFrame, key="Close", order=5): - - # Check if the index is of type datetime - if not isinstance(data.index, pd.DatetimeIndex): - raise ValueError("The index of the data should be of type datetime") - - keys = [f'{key}_highs', f'{key}_lows'] - - for key_column in keys: - if key_column not in data.columns: - raise ValueError(f"The data should have a '{key_column}' column") - - # Get all peak indexes - hh_close_index = data[data[f'{key}_highs'] == 1].index - lh_close_index = data[data[f'{key}_highs'] == -1].index - ll_close_index = data[data[f'{key}_lows'] == 1].index - hl_close_index = data[data[f'{key}_lows'] == -1].index - - # Use .loc to get the corresponding 'Close' values if - # the index is in the DataFrame - hh_close_values = data.loc[hh_close_index, key] - lh_close_values = data.loc[lh_close_index, key] - ll_close_values = data.loc[ll_close_index, key] - hl_close_values = data.loc[hl_close_index, key] - - # Add higher highs - higher_high_graph = go.Scatter( - x=hh_close_index, - # x=dates[hh_close_index - order].values, - y=hh_close_values, - mode='markers', - marker=dict(symbol='triangle-up', size=10, color='blue'), - name='Higher High Confirmation' - ) - - # Add lower highs - lower_high_graph = go.Scatter( - x=lh_close_index, - y=lh_close_values, - mode='markers', - marker=dict(symbol='triangle-down', size=10, color='red'), - name='Lower High Confirmation' - ) - - # Add lower lows - lower_lows_graph = go.Scatter( - x=ll_close_index, - y=ll_close_values, - mode='markers', - marker=dict(symbol='triangle-down', size=10, color='green'), - name='Lower Lows Confirmation' - ) - - # Add higher lows - higher_lows = go.Scatter( - x=hl_close_index, - y=hl_close_values, - mode='markers', - marker=dict(symbol='triangle-up', size=10, color='purple'), - name='Higher Lows Confirmation' - ) - - return higher_high_graph, lower_high_graph, lower_lows_graph, higher_lows - - -def create_bullish_divergence_chart( - data: pd.DataFrame, key_one, key_two, color='red' -): - """ - A bullish divergence occurs when the "_lows" makes - a new low but the "_lows" makes a higher low. - - For example, if the RSI makes a new low but the close price - makes a higher low, then we have a bullish divergence. - """ - divergence_index = data[(data[f'{key_one}_lows'] == -1) - & (data[f'{key_two}_lows'] == 1)].index - divergence_close_values = data.loc[divergence_index, 'Close'] - - return go.Scatter( - x=divergence_index, - y=divergence_close_values, - mode='markers', - marker=dict(symbol='circle', size=10, color=color), - name='Bullish Divergence' - ) - - -def create_bearish_divergence_chart( - data: pd.DataFrame, key_one, key_two, color='red' -): - """ - A bearish divergence occurs when the "_highs" makes a - new high but the "_highs" makes a lower high. - - For example, if the RSI makes a new high but the close price makes - a lower high, then we have a bearish divergence. - """ - - # Add divergence charts - divergence_index = data[(data[f'{key_one}_highs'] == -1) - & (data[f'{key_two}_highs'] == 1)].index - divergence_close_values = data.loc[divergence_index, 'Close'] - - return go.Scatter( - x=divergence_index, - y=divergence_close_values, - mode='markers', - marker=dict(symbol='circle', size=10, color=color), - name='Bearish Divergence' - ) - - -def create_entry_graph(data: pd.DataFrame): - # Iterate over each row in the DataFrame and check if there is a - # bullish divergence between the RSI and the close price - # and if there is a crossover between the DI+ and DI- for - # the last 12 hours (6 candles) - # Get all crossover indexes - crossover_index = data[(data['+DI'] <= data['-DI']) & - (data['+DI'].shift(1) >= data['-DI'].shift(1))]\ - .index - data['di_crossover'] = 0 - data.loc[crossover_index, 'di_crossover'] = 1 - - entry_indexes = [] - - for row in data.itertuples(): - - if row.di_crossover == 1: - match = False - # Check if there was a bullish divergence between - # the RSI and the close price in the last 2 days - rsi_window = data.loc[ - row.Index - pd.Timedelta(days=2):row.Index, - 'RSI_lows' - ] - close_window = data.loc[ - row.Index - pd.Timedelta(days=2):row.Index, - 'Close_lows' - ] - - # Go over each row and check if there is a bullish - # divergence between the RSI and the close price - for rsi_row, close_row in zip(rsi_window, close_window): - - if rsi_row == -1 and close_row == 1: - entry_indexes.append(row.Index) - match = True - break - - if not match: - # Check if the RSI had decreased - rsi_window = data.loc[ - row.Index - pd.Timedelta(days=1):row.Index, 'RSI' - ] - rsi_diff = rsi_window.diff().mean() - - if rsi_diff < -2: - entry_indexes.append(row.Index) - - entry_close_values = data.loc[entry_indexes, 'Close'] - return go.Scatter( - x=entry_indexes, - y=entry_close_values, - mode='markers', - marker=dict(symbol='circle', size=10, color='green'), - name='Entry Signal' - ) diff --git a/investing_algorithm_framework/domain/models/date_range.py b/investing_algorithm_framework/domain/models/date_range.py index c0c8fe25..a737f58c 100644 --- a/investing_algorithm_framework/domain/models/date_range.py +++ b/investing_algorithm_framework/domain/models/date_range.py @@ -1,13 +1,22 @@ from datetime import datetime from typing import Union + class DateRange: """ - DateRange class. This class is used to define a date range and the name of the range. - Also, it can be used to store trading metadata such as classification of the trend (Up or Down). + DateRange class. This class is used to define a date range and the name of + the range. Also, it can be used to store trading metadata such as + classification of the trend (Up or Down). """ - def __init__(self, start_date: datetime, end_date: datetime, name: str, up_trend: bool = False, down_trend: bool = False): + def __init__( + self, + start_date: datetime, + end_date: datetime, + name: str, + up_trend: bool = False, + down_trend: bool = False + ): self.start_date = start_date self.end_date = end_date self.name = name @@ -21,7 +30,7 @@ def up_trend(self) -> Union[bool, None]: return True else: return None - + @up_trend.setter def up_trend(self, value: bool): self._up_trend = value @@ -33,13 +42,15 @@ def down_trend(self) -> Union[bool, None]: return True else: return None - + @down_trend.setter def down_trend(self, value: bool): self._down_trend = value - + def __str__(self): return f"DateRange({self.start_date}, {self.end_date}, {self.name})" - + def __repr__(self): - return f"DateRange(Name: {self.name} Start date: {self.start_date} End date: {self.end_date})" + return f"DateRange(Name: {self.name} " + \ + f"Start date: {self.start_date} " + \ + f"End date: {self.end_date})" diff --git a/investing_algorithm_framework/domain/services/market_data_sources.py b/investing_algorithm_framework/domain/services/market_data_sources.py index 5a7a0002..d400e08e 100644 --- a/investing_algorithm_framework/domain/services/market_data_sources.py +++ b/investing_algorithm_framework/domain/services/market_data_sources.py @@ -160,7 +160,7 @@ def __init__( identifier, market, symbol, - storage_path = None + storage_path=None ): self._identifier = identifier self._market = market @@ -200,11 +200,11 @@ def symbol(self): def get_symbol(self): return self.symbol - + @property def storage_path(self): return self._storage_path - + def get_storage_path(self): return self.storage_path @@ -236,29 +236,32 @@ def market_credential_service(self, value): @staticmethod def get_file_name_symbol(file_path): """ - Static function that extracts the symbol from a give data filepath, given that the - data file path is in the format - {DATA_TYPE}_{TARGET_SYMBOL}_{TRADING_SYMBOL}_{MARKET}_{time_frame}_{START_DATETIME}_{END_DATETIME}.csv + Static function that extracts the symbol from a give data filepath, + given that the data file path is in the format + {DATA_TYPE}_{TARGET_SYMBOL}_{TRADING_SYMBOL}_{MARKET}_ + {time_frame}_{START_DATETIME}_{END_DATETIME}.csv Parameters: - file_path: str - the given file path of the data storage file + file_path: str - the given file path of + the data storage file Returns: string representing the symbol """ parts = file_path.split("_") - + if len(parts) < 6: return None - + return "".join([parts[1], '/', parts[2]]) - + @staticmethod def get_file_name_time_frame(file_path): """ - Static function that extracts the time_frame from a give data filepath, given that the - data file path is in the format - {DATA_TYPE}_{TARGET_SYMBOL}_{TRADING_SYMBOL}_{MARKET}_{time_frame}_{START_DATETIME}_{END_DATETIME}.csv + Static function that extracts the time_frame from a give data filepath, + given that the data file path is in the format + {DATA_TYPE}_{TARGET_SYMBOL}_{TRADING_SYMBOL}_{MARKET}_ + {time_frame}_{START_DATETIME}_{END_DATETIME}.csv Parameters: file_path: str - the given file path of the data storage file @@ -267,18 +270,19 @@ def get_file_name_time_frame(file_path): string representing the time_frame """ parts = file_path.split("_") - + if len(parts) < 6: return None - + return TimeFrame.from_string(parts[4]) - + @staticmethod def get_file_name_market(file_path): """ - Static function that extracts the time_frame from a give data filepath, given that the - data file path is in the format - {DATA_TYPE}_{TARGET_SYMBOL}_{TRADING_SYMBOL}_{MARKET}_{time_frame}_{START_DATETIME}_{END_DATETIME}.csv + Static function that extracts the time_frame from a give data filepath, + given that the data file path is in the format + {DATA_TYPE}_{TARGET_SYMBOL}_{TRADING_SYMBOL}_{MARKET} + _{time_frame}_{START_DATETIME}_{END_DATETIME}.csv Parameters: file_path: str - the given file path of the data storage file @@ -287,18 +291,19 @@ def get_file_name_market(file_path): string representing the market """ parts = file_path.split("_") - + if len(parts) < 6: return None - + return TimeFrame.from_string(parts[3]) - + @staticmethod def get_file_name_start_datetime(file_path): """ - Static function that extracts the time_frame from a give data filepath, given that the - data file path is in the format - {DATA_TYPE}_{TARGET_SYMBOL}_{TRADING_SYMBOL}_{MARKET}_{time_frame}_{START_DATETIME}_{END_DATETIME}.csv + Static function that extracts the time_frame from a give data filepath, + given that the data file path is in the format + {DATA_TYPE}_{TARGET_SYMBOL}_{TRADING_SYMBOL}_{MARKET}_ + {time_frame}_{START_DATETIME}_{END_DATETIME}.csv Parameters: file_path: str - the given file path of the data storage file @@ -307,18 +312,20 @@ def get_file_name_start_datetime(file_path): string representing the start datetime """ parts = file_path.split("_") - + if len(parts) < 6: return None - + return TimeFrame.from_string(parts[5]) - + @staticmethod def get_file_name_end_datetime(file_path): """ - Static function that extracts the time_frame from a give data filepath, given that the - data file path is in the format - {DATA_TYPE}_{TARGET_SYMBOL}_{TRADING_SYMBOL}_{MARKET}_{time_frame}_{START_DATETIME}_{END_DATETIME}.csv + Static function that extracts the time_frame + from a give data filepath, given that the data file + path is in the format + {DATA_TYPE}_{TARGET_SYMBOL}_{TRADING_SYMBOL}_{MARKET}_ + {time_frame}_{START_DATETIME}_{END_DATETIME}.csv Parameters: file_path: str - the given file path of the data storage file @@ -327,12 +334,12 @@ def get_file_name_end_datetime(file_path): string representing the end datetime """ parts = file_path.split("_") - + if len(parts) < 6: return None - + return TimeFrame.from_string(parts[6]) - + @staticmethod def create_storage_file_path( storage_path, @@ -362,11 +369,12 @@ def create_storage_file_path( target_symbol, trading_symbol = symbol.split('/') path = os.path.join( storage_path, - f"{data_type}_{target_symbol}_{trading_symbol}_{market}_{time_frame}_{start_datetime}_{end_datetime}.csv" + f"{data_type}_{target_symbol}_{trading_symbol}_{market}_" + + f"{time_frame}_{start_datetime}_{end_datetime}.csv" ) - print(path) return path + class OHLCVMarketDataSource(MarketDataSource, ABC): """ Abstract class for ohlcv market data sources. diff --git a/investing_algorithm_framework/domain/utils/polars.py b/investing_algorithm_framework/domain/utils/polars.py index 5d446b4d..3358dd25 100644 --- a/investing_algorithm_framework/domain/utils/polars.py +++ b/investing_algorithm_framework/domain/utils/polars.py @@ -1,18 +1,27 @@ from pandas import to_datetime from polars import DataFrame as PolarsDataFrame -def convert_polars_to_pandas(data: PolarsDataFrame, remove_duplicates = True, add_index=True, datetime_column_name = "Datetime"): + +def convert_polars_to_pandas( + data: PolarsDataFrame, + remove_duplicates=True, + add_index=True, + datetime_column_name="Datetime" +): """ - Function to convert polars dataframe to pandas dataframe. + Function to convert polars dataframe to pandas dataframe. Parameters: - data:Polars Dataframe - The original polars dataframe - remove_duplicates: Boolean - If set to true, all duplicate dates will be removed from the dataframe - add_index: Boolean - If set to true, an index will be added to the dataframe - datetime_column_name: String - the column name that has the datetime object. By default this is set to column name Datetime + data:Polars Dataframe - The original polars dataframe + remove_duplicates: Boolean - If set to true, all duplicate + dates will be removed from the dataframe + add_index: Boolean - If set to true, an index will + be added to the dataframe + datetime_column_name: String - the column name that has the + datetime object. By default this is set to column name Datetime This is only used if add_index is set to True - Returns: + Returns: Pandas DataFrame that has been converted from a Polars DataFrame """ data = data.to_pandas().copy() @@ -29,4 +38,4 @@ def convert_polars_to_pandas(data: PolarsDataFrame, remove_duplicates = True, ad # Remove duplicate dates data = data[~data.index.duplicated(keep='first')] - return data \ No newline at end of file + return data diff --git a/investing_algorithm_framework/indicators/advanced.py b/investing_algorithm_framework/indicators/advanced.py index 01683626..7b03adb5 100644 --- a/investing_algorithm_framework/indicators/advanced.py +++ b/investing_algorithm_framework/indicators/advanced.py @@ -1,17 +1,17 @@ import importlib.util if importlib.util.find_spec("scipy") is None \ - or importlib.util.find_spec("tulipy") is None \ - or importlib.util.find_spec("numpy") is None \ - or importlib.util.find_spec("collections") is None: + or importlib.util.find_spec("tulipy") is None \ + or importlib.util.find_spec("numpy") is None \ + or importlib.util.find_spec("collections") is None: raise ImportError("You have not installed the the indicators package") from scipy.signal import argrelextrema from collections import deque -import tulipy as tp import numpy as np + def get_higher_lows(data: np.array, order=5, K=2): ''' Finds consecutive higher lows in price pattern. @@ -41,6 +41,7 @@ def get_higher_lows(data: np.array, order=5, K=2): return extrema + def get_lower_highs(data: np.array, order=5, K=2): ''' Finds consecutive lower highs in price pattern. @@ -72,6 +73,7 @@ def get_lower_highs(data: np.array, order=5, K=2): return extrema + def get_higher_highs(data: np.array, order=5, K=None): ''' Finds consecutive higher highs in price pattern. @@ -83,11 +85,11 @@ def get_higher_highs(data: np.array, order=5, K=None): # Get highs high_idx = argrelextrema(data, np.greater_equal, order=order)[0] highs = data[high_idx] - + # Ensure consecutive highs are higher than previous highs extrema = [] ex_deque = deque(maxlen=K) - + for i, idx in enumerate(high_idx): if i == 0: @@ -105,6 +107,7 @@ def get_higher_highs(data: np.array, order=5, K=None): idx = idx[np.where(idx < len(data))] return idx + def get_lower_lows(data: np.array, order=5, K=2): ''' Finds consecutive lower lows in price pattern. @@ -151,6 +154,7 @@ def get_lower_lows(data: np.array, order=5, K=2): return extrema + def get_higher_high_index(data: np.array, order=5, K=2): # extrema = get_higher_highs(data, order, K) # idx = np.array([i[-1] + order for i in extrema]) @@ -163,16 +167,19 @@ def get_lower_highs_index(data: np.array, order=5, K=2): idx = np.array([i[-1] + order for i in extrema]) return idx[np.where(idx < len(data))] + def get_lower_lows_index(data: np.array, order=5, K=2): extrema = get_lower_lows(data, order, K) idx = np.array([i[-1] + order for i in extrema]) return idx[np.where(idx < len(data))] + def get_higher_lows_index(data: np.array, order=5, K=2): extrema = get_higher_lows(data, order, K) idx = np.array([i[-1] + order for i in extrema]) return idx[np.where(idx < len(data))] + def get_peaks(data, key, order=5, k=None): """ Get peaks in for the given key in the data DataFrame. diff --git a/investing_algorithm_framework/indicators/trend.py b/investing_algorithm_framework/indicators/trend.py index 835dd4ed..51ccf716 100644 --- a/investing_algorithm_framework/indicators/trend.py +++ b/investing_algorithm_framework/indicators/trend.py @@ -4,22 +4,29 @@ from datetime import timedelta import tulipy as tp import numpy as np -from investing_algorithm_framework.domain import OperationalException, DateRange +from investing_algorithm_framework.domain import OperationalException, \ + DateRange if importlib.util.find_spec("scipy") is None \ - or importlib.util.find_spec("tulipy") is None \ - or importlib.util.find_spec("numpy") is None \ - or importlib.util.find_spec("collections") is None: - raise ImportError("You have not installed the the indicators package") + or importlib.util.find_spec("tulipy") is None \ + or importlib.util.find_spec("numpy") is None \ + or importlib.util.find_spec("collections") is None: + raise ImportError( + "You have not installed the the indicators package" + ) """ -This module contains functions for trend analysis. Trend analysis is the process of analyzing the direction -of the price of an asset. -or the direction of the market as a whole. This is done by analyzing the moving averages of the price data or +This module contains functions for trend analysis. Trend analysis is +the process of analyzing the direction of the price of an asset. +or the direction of the market as a whole. This is done by analyzing the +moving averages of the price data or indicators such as the RSI. """ -def is_uptrend(data: Union[pd.DataFrame, pd.Series], fast_key="SMA_50", slow_key="SMA_200") -> bool: + +def is_uptrend( + data: Union[pd.DataFrame, pd.Series], fast_key="SMA_50", slow_key="SMA_200" +) -> bool: """ Check if the price data is in a upturn. @@ -27,67 +34,74 @@ def is_uptrend(data: Union[pd.DataFrame, pd.Series], fast_key="SMA_50", slow_key data: pd.DataFrame or pd.Series - The input pandas DataFrame or Series. fast_key: str - The key for the fast moving average. slow_key: str - The key for the slow moving average. - + Returns: - - Boolean indicating if the price data is in an upturn. + - Boolean indicating if the price data is in an upturn. """ if not isinstance(data, pd.Series) and not isinstance(data, pd.DataFrame): - raise OperationalException("Provided data must be of type pandas series or pandas dataframe") - + raise OperationalException( + "Provided data must be of type pandas series or pandas dataframe" + ) + if isinstance(data, pd.Series): # Check if the data keys are present in the data if fast_key not in data.index or slow_key not in data.index: raise OperationalException("Data keys not present in the data.") - + return data[fast_key] > data[slow_key] - + # Check if the data keys are present in the data if fast_key not in data.columns or slow_key not in data.columns: raise OperationalException("Data keys not present in the data.") - + # Check if the index of the data is a datetime index if not isinstance(data.index, pd.DatetimeIndex): raise OperationalException("Data index must be a datetime index.") - + # Check if the data is not empty if len(data) == 0: return False - + return data[fast_key].iloc[-1] > data[slow_key].iloc[-1] -def is_downtrend(data: Union[pd.DataFrame, pd.Series], fast_key="SMA_50", slow_key="SMA_200") -> bool: +def is_downtrend( + data: Union[pd.DataFrame, pd.Series], fast_key="SMA_50", slow_key="SMA_200" +) -> bool: """ Check if the price data is in a downturn. """ if not isinstance(data, pd.Series) and not isinstance(data, pd.DataFrame): - raise OperationalException("Provided data must be of type pandas series or pandas dataframe") - + raise OperationalException( + "Provided data must be of type pandas series or pandas dataframe" + ) + if isinstance(data, pd.Series): # Check if the data keys are present in the data if fast_key not in data.index or slow_key not in data.index: raise OperationalException("Data keys not present in the data.") - + return data[fast_key] < data[slow_key] # Check if the data keys are present in the data if fast_key not in data.columns or slow_key not in data.columns: raise ValueError("Data keys not present in the data.") - + # Check if the index of the data is a datetime index if not isinstance(data.index, pd.DatetimeIndex): raise ValueError("Data index must be a datetime index.") - + # Check if the data is not empty if len(data) == 0: return False - + return data[fast_key].iloc[-1] < data[slow_key].iloc[-1] + def is_crossover(data, key1, key2, strict=True) -> bool: """ Check if the given keys have crossed over. @@ -104,14 +118,15 @@ def is_crossover(data, key1, key2, strict=True) -> bool: if len(data) < 2: return False - + if strict: return data[key1].iloc[-1] > data[key2].iloc[-1] \ and data[key1].iloc[-2] < data[key2].iloc[-2] - + return data[key1].iloc[-1] >= data[key2].iloc[-1] \ and data[key1].iloc[-2] <= data[key2].iloc[-2] + def is_crossunder(data, key1, key2, strict=True) -> bool: """ Check if the given keys have crossed under. @@ -121,21 +136,24 @@ def is_crossunder(data, key1, key2, strict=True) -> bool: - key1: str - The first key to compare. - key2: str - The second key to compare. - strict: bool - Whether to check for a strict crossover. - + Returns: - Boolean indicating if the keys have crossed under. """ if len(data) < 2: return False - + if strict: return data[key1].iloc[-1] < data[key2].iloc[-1] \ and data[key1].iloc[-2] > data[key2].iloc[-2] - + return data[key1].iloc[-1] <= data[key2].iloc[-1] \ and data[key1].iloc[-2] >= data[key2].iloc[-2] -def has_crossed_upward(data: pd.DataFrame, key, threshold, strict=True) -> bool: + +def has_crossed_upward( + data: pd.DataFrame, key, threshold, strict=True +) -> bool: """ Check if the given key has crossed upward. @@ -146,9 +164,10 @@ def has_crossed_upward(data: pd.DataFrame, key, threshold, strict=True) -> bool: - strict: bool - Whether to check for a strict crossover. Returns: - - Boolean indicating if the key has crossed upward through the threshold within the given data frame. + - Boolean indicating if the key has crossed upward through the + threshold within the given data frame. """ - + # Ensure the key exists in the DataFrame if key not in data.columns: raise KeyError(f"Key '{key}' not found in DataFrame") @@ -161,25 +180,34 @@ def has_crossed_upward(data: pd.DataFrame, key, threshold, strict=True) -> bool: below_threshold = data[key] <= threshold above_threshold = data[key] >= threshold - # Check if there is any point where a value is below the threshold followed by a value above the threshold - crossed_upward = (below_threshold.shift(1, fill_value=False) & above_threshold).any() + # Check if there is any point where a value is below the threshold + # followed by a value above the threshold + crossed_upward = ( + below_threshold.shift(1, fill_value=False) & above_threshold + ).any() return crossed_upward + def get_up_and_downtrends(data: pd.DataFrame) -> List[DateRange]: """ Function to get the up and down trends of a pandas dataframe. Params: - data: pd.Dataframe - instance of pandas Dateframe containing OHLCV data. - + data: pd.Dataframe - instance of pandas Dateframe + containing OHLCV data. + Returns: - List of date ranges that with up_trend and down_trend flags specified. + List of date ranges that with up_trend and down_trend + flags specified. """ # Check if the data is larger then 200 data points if len(data) < 200: - raise OperationalException("The data must be larger than 200 data points to determine up and down trends.") - + raise OperationalException( + "The data must be larger than 200 data " + + "points to determine up and down trends." + ) + copy = data.copy() copy = get_sma(copy, source_column_name="Close", period=50) copy = get_sma(copy, source_column_name="Close", period=200) @@ -195,16 +223,28 @@ def get_up_and_downtrends(data: pd.DataFrame) -> List[DateRange]: selected_rows = selection.iloc[:idx] # Check if last row is null for the SMA_50 and SMA_200 - if pd.isnull(selected_rows["SMA_Close_50"].iloc[-1]) or pd.isnull(selected_rows["SMA_Close_200"].iloc[-1]): + if pd.isnull(selected_rows["SMA_Close_50"].iloc[-1]) \ + or pd.isnull(selected_rows["SMA_Close_200"].iloc[-1]): continue - if is_uptrend(selected_rows, fast_key="SMA_Close_50", slow_key="SMA_Close_200"): - + if is_uptrend( + selected_rows, fast_key="SMA_Close_50", slow_key="SMA_Close_200" + ): + if current_trend != 'Up': if current_trend is not None: - end_date = selection.loc[row.Index - timedelta(days=1)].name - date_ranges.append(DateRange(start_date=start_date_range, end_date=end_date, name=current_trend, down_trend=True)) + end_date = selection.loc[ + row.Index - timedelta(days=1) + ].name + date_ranges.append( + DateRange( + start_date=start_date_range, + end_date=end_date, + name=current_trend, + down_trend=True + ) + ) start_date_range = row.Index current_trend = 'Up' else: @@ -215,8 +255,17 @@ def get_up_and_downtrends(data: pd.DataFrame) -> List[DateRange]: if current_trend != 'Down': if current_trend is not None: - end_date = selection.loc[row.Index - timedelta(days=1)].name - date_ranges.append(DateRange(start_date=start_date_range, end_date=end_date, name=current_trend, up_trend=True)) + end_date = selection.loc[ + row.Index - timedelta(days=1) + ].name + date_ranges.append( + DateRange( + start_date=start_date_range, + end_date=end_date, + name=current_trend, + up_trend=True + ) + ) start_date_range = row.Index current_trend = 'Down' else: @@ -227,36 +276,68 @@ def get_up_and_downtrends(data: pd.DataFrame) -> List[DateRange]: end_date = selection.index[-1] if current_trend == 'Up': - date_ranges.append(DateRange(start_date=start_date_range, end_date=end_date, name=current_trend, up_trend=True)) + date_ranges.append( + DateRange( + start_date=start_date_range, + end_date=end_date, + name=current_trend, + up_trend=True + ) + ) else: - date_ranges.append(DateRange(start_date=start_date_range, end_date=end_date, name=current_trend, down_trend=True)) + date_ranges.append( + DateRange( + start_date=start_date_range, + end_date=end_date, + name=current_trend, + down_trend=True + ) + ) return date_ranges - -def get_sma(data: pd.DataFrame, period=50, source_column_name="Close", result_column_name = None): + + +def get_sma( + data: pd.DataFrame, + period=50, + source_column_name="Close", + result_column_name=None +): """ Function to add Smoothed moving average to a pandas dataframe. Params: data: pd.Dataframe - instance of pandas Dateframe. - period: int - the number of data points the SMA needs to take into account. - source_column_name: str - the source_column_name that will be used to calculate the SMA. - result_column_name: (option) str - if set, this will be used as column in the - dataframe where the result will be written to. If not set the result column is + period: int - the number of data points the SMA needs + to take into account. + source_column_name: str - the source_column_name that + will be used to calculate the SMA. + result_column_name: (option) str - if set, this + will be used as column in the + dataframe where the result will be written to. If + not set the result column is named 'SMA_{key}_{period}'. - + Returns: - Pandas dataframe with SMA column added, named 'sma_{key}_{period}' or name + Pandas dataframe with SMA column added, + named 'sma_{key}_{period}' or name according to the result_column_name """ # Check if the period is larger than the data if period > len(data): - raise OperationalException(f"The period {period} is larger than the data.") - - sma_values = tp.sma(data[source_column_name].to_numpy(), period=period) + raise OperationalException( + f"The period {period} is larger than the data." + ) + + sma_values = tp.sma( + data[source_column_name].to_numpy(), + period=period + ) # Pad NaN values for initial rows with a default value, e.g., 0 - sma_values = np.concatenate((np.full(period - 1, None), sma_values)) + sma_values = np.concatenate( + (np.full(period - 1, None), sma_values) + ) if result_column_name: data[result_column_name] = sma_values diff --git a/investing_algorithm_framework/infrastructure/models/market_data_sources/ccxt.py b/investing_algorithm_framework/infrastructure/models/market_data_sources/ccxt.py index c80b70ac..9f11f862 100644 --- a/investing_algorithm_framework/infrastructure/models/market_data_sources/ccxt.py +++ b/investing_algorithm_framework/infrastructure/models/market_data_sources/ccxt.py @@ -288,7 +288,8 @@ def prepare_data( When downloading the data it will use the ccxt library. """ - total_minutes = TimeFrame.from_string(self.time_frame).amount_of_minutes + total_minutes = TimeFrame.from_string(self.time_frame)\ + .amount_of_minutes self.backtest_data_start_date = \ backtest_start_date - timedelta(minutes=total_minutes) self.backtest_data_end_date = backtest_end_date @@ -427,14 +428,20 @@ def get_data(self, **kwargs): This implementation uses the CCXTMarketService to get the OHLCV data. Parameters: - window_size: int (optional) - the total amount of candle sticks that need to be returned - start_date: datetime (optional) - the start date of the data. The first candle stick should close to this date. - end_date: datetime (optional) - the end date of the data. The last candle stick should close to this date. - storage_path: string (optional) - the storage path specifies the directory where the data is written to or read from. - If set the data provider will write all its downloaded data to this location. Also, it will check if the - data already exists at the storage location. If this is the case it will return this. - - Returns + window_size: int (optional) - the total amount of candle + sticks that need to be returned + start_date: datetime (optional) - the start date of the data. The + first candle stick should close to this date. + end_date: datetime (optional) - the end date of the data. The last + candle stick should close to this date. + storage_path: string (optional) - the storage path specifies the + directory where the data is written to or read from. + If set the data provider will write all its downloaded data + to this location. Also, it will check if the + data already exists at the storage location. If this is the + case it will return this. + + Returns polars.DataFrame with the OHLCV data """ market_service = CCXTMarketService( @@ -484,12 +491,12 @@ def get_data(self, **kwargs): raise OperationalException( "start_date should be a datetime object" ) - + if "storage_path" in kwargs: storage_path = kwargs["storage_path"] else: storage_path = self.get_storage_path() - + data = None if storage_path is not None: @@ -524,7 +531,7 @@ def get_data(self, **kwargs): to_timestamp=end_date, market=self.market ) - + return data def to_backtest_market_data_source(self) -> BacktestMarketDataSource: @@ -549,12 +556,13 @@ def _get_data_from_storage( Function to get data from the storage path: Parameters: - storage_path: string - the storage path where the data should be in. + storage_path: string - the storage path where the + data should be in. Return: Polars dataframe. """ - + if not os.path.isdir(storage_path): return None @@ -563,7 +571,7 @@ def _get_data_from_storage( for filename in os.listdir(storage_path): path = os.path.join(storage_path, filename) - + if os.path.isfile(path) or path.split('.')[-1] != ".csv": continue @@ -574,12 +582,12 @@ def _get_data_from_storage( file_name_end_date = self.get_file_name_end_date(path) if file_name_symbol == symbol \ - and file_name_market == market \ - and file_name_time_frame == time_frame \ - and file_name_start_date >= from_timestamp \ - and file_name_end_date <= to_timestamp: + and file_name_market == market \ + and file_name_time_frame == time_frame \ + and file_name_start_date >= from_timestamp \ + and file_name_end_date <= to_timestamp: return polars.read_csv(path) - + return None def write_data_to_storage( @@ -596,8 +604,10 @@ def write_data_to_storage( Function to write data to the storage path: Parameters: - data: polars.DataFrame - the data that should be written to the storage path. - storage_path: string - the storage path where the data should be written to. + data: polars.DataFrame - the data that should be written to the + storage path. + storage_path: string - the storage path where the data should + be written to. symbol: string - the symbol of the data. time_frame: string - the time_frame of the data. from_timestamp: datetime - the start date of the data. @@ -607,10 +617,10 @@ def write_data_to_storage( Return: None """ - + if not os.path.isdir(storage_path): os.mkdir(storage_path) - + file_path = self.create_storage_file_path( storage_path=storage_path, symbol=symbol, @@ -622,7 +632,7 @@ def write_data_to_storage( if os.path.isfile(file_path): return - + else: try: with open(file_path, 'w') as _: @@ -632,7 +642,7 @@ def write_data_to_storage( raise OperationalException( f"Could not create data file {file_path}" ) - + data.write_csv(file_path) diff --git a/investing_algorithm_framework/infrastructure/models/market_data_sources/pandas.py b/investing_algorithm_framework/infrastructure/models/market_data_sources/pandas.py index 8c645ee9..caf2a57c 100644 --- a/investing_algorithm_framework/infrastructure/models/market_data_sources/pandas.py +++ b/investing_algorithm_framework/infrastructure/models/market_data_sources/pandas.py @@ -56,7 +56,7 @@ def _validate_dataframe_with_ohlcv_structure(self): ) if not self.dataframe.columns == \ - ["Datetime", "Open", "High", "Low", "Close", "Volume"]: + ["Datetime", "Open", "High", "Low", "Close", "Volume"]: raise OperationalException( "Provided dataframe does not have all required columns. " "Your pandas dataframe should have the following columns: " @@ -150,17 +150,17 @@ def get_data(self, **kwargs): ) # Slice the pandas dataframe object - return self.dataframe[ "Datetime" >= start_date] - + return self.dataframe["Datetime" >= start_date] def to_backtest_market_data_source(self) -> BacktestMarketDataSource: - return CCXTOHLCVBacktestMarketDataSource( - identifier=self.identifier, - market=self.market, - symbol=self.symbol, - time_frame=self.time_frame, - window_size=self.window_size - ) + # return CCXTOHLCVBacktestMarketDataSource( + # identifier=self.identifier, + # market=self.market, + # symbol=self.symbol, + # time_frame=self.time_frame, + # window_size=self.window_size + # ) + return None def _validate_dataframe_with_ohlcv_structure(self): """ @@ -174,7 +174,7 @@ def _validate_dataframe_with_ohlcv_structure(self): ) if not self.dataframe.columns == \ - ["Datetime", "Open", "High", "Low", "Close", "Volume"]: + ["Datetime", "Open", "High", "Low", "Close", "Volume"]: raise OperationalException( "Provided dataframe does not have all required columns. " "Your pandas dataframe should have the following columns: "