diff --git a/stream_viz/feature_drift/f_drift_detector.py b/stream_viz/feature_drift/f_drift_detector.py
index babe6f1..ad24b36 100644
--- a/stream_viz/feature_drift/f_drift_detector.py
+++ b/stream_viz/feature_drift/f_drift_detector.py
@@ -26,15 +26,19 @@ class FeatureDriftDetector(DriftDetector):
Size of the gap between segments when computing gradual drift (default is 50).
p_val_threshold : float, optional
P-value threshold for gradual drift detection (default is 0.0001).
+ psi_threshold : float, optional
+ psi threshold for Population Stability Index (default is 0.0001).
"""
def __init__(
self,
features_list: List[str],
+ categorical_features: List[str],
window_size: int = 300,
ks_test_pval: float = 0.001,
gap_size: int = 50,
p_val_threshold: float = 0.0001,
+ psi_threshold: float = 0.12,
) -> None:
self._drift_records: List[Dict[str, str]] = []
self._valid_keys: set[str] = get_fd_drift_type_keys()
@@ -45,8 +49,10 @@ def __init__(
self._moving_avg: pd.DataFrame = pd.DataFrame(columns=features_list)
self.p_val: float = ks_test_pval
self.p_val_grad: float = p_val_threshold
+ self.psi_threshold: float = psi_threshold
self._drift_tp_df: pd.DataFrame = pd.DataFrame(columns=features_list)
self._feature_data_df: pd.DataFrame = pd.DataFrame(columns=features_list)
+ self.categorical_features: List[str] = categorical_features
def update(self, x_i: Dict[str, float], y_i: int, tpt: int) -> None:
"""
@@ -78,9 +84,14 @@ def detect_drift(self, tpt: int) -> None:
"""
window_df = pd.DataFrame(self._window)
for feature in window_df.columns:
- drift_detected, drift_type = self._detect_drift_using_ks(
- window_df[feature].values
- )
+ if feature in self.categorical_features:
+ drift_detected, drift_type = self._detect_drift_using_psi(
+ window_df[feature].values
+ )
+ else:
+ drift_detected, drift_type = self._detect_drift_using_ks(
+ window_df[feature].values
+ )
if drift_detected:
self._drift_tp_df.loc[tpt, feature] = drift_type
@@ -125,6 +136,44 @@ def _detect_drift_using_ks(
return False, None
+ def _detect_drift_using_psi(
+ self, window_data: np.ndarray
+ ) -> Tuple[bool, Optional[str]]:
+ first_half = window_data[: self.window_size // 2]
+ second_half = window_data[self.window_size // 2 :]
+
+ grad_first_part = window_data[: (self.window_size // 2) - (self.gap_size // 2)]
+ grad_second_part = window_data[(self.window_size // 2) + (self.gap_size // 2) :]
+
+ psi_value = self.calculate_psi(first_half, second_half)
+ grad_psi_value = self.calculate_psi(grad_first_part, grad_second_part)
+
+ if psi_value > self.psi_threshold:
+ mean_diff = np.mean(second_half) - np.mean(first_half)
+ if np.abs(mean_diff) > np.std(window_data):
+ return True, "sudden_drift"
+ elif mean_diff > 0:
+ return True, "linear_drift"
+
+ if grad_psi_value > self.psi_threshold:
+ return True, "gradual_drift"
+
+ return False, None
+
+ def calculate_psi(self, expected, actual, buckets=10):
+ expected_percents = np.histogram(expected, bins=buckets, range=(0, 1))[0] / len(
+ expected
+ )
+ actual_percents = np.histogram(actual, bins=buckets, range=(0, 1))[0] / len(
+ actual
+ )
+ expected_percents = np.where(expected_percents == 0, 0.01, expected_percents)
+ actual_percents = np.where(actual_percents == 0, 0.01, actual_percents)
+ psi_values = (actual_percents - expected_percents) * np.log(
+ actual_percents / expected_percents
+ )
+ return np.sum(psi_values)
+
def plot(self, feature_name: str, window_size: Optional[int] = None) -> None:
"""
Plot the feature values over time, highlighting detected drift points.
@@ -228,40 +277,35 @@ def drift_records(self, drift_record: FeatureDriftType) -> None:
normal.read_csv_data(_NORMAL_DATA_PATH)
normal.encode_data()
+ # Create a mapping of original to encoded column names
+ encoded_categorical_cols = normal.X_encoded_data.columns[
+ normal.X_encoded_data.columns.str.startswith("c")
+ ]
+ original_to_encoded_categorical_cols = {
+ original: encoded
+ for original, encoded in zip(
+ normal.original_categorical_cols, encoded_categorical_cols
+ )
+ }
+
# As the KS test is only for numerical features
X_numerical = normal.X_encoded_data[normal.original_numerical_cols]
- # X_categorical = normal.X_encoded_data[normal.original_categorical_cols]
- dt_streamer = DataStreamer(
- fd_detector_obj=FeatureDriftDetector(X_numerical.columns)
+ X_categorical = normal.X_encoded_data[encoded_categorical_cols]
+ all_features = X_numerical.columns.tolist() + X_categorical.columns.tolist()
+ fd_detector = FeatureDriftDetector(
+ features_list=all_features,
+ categorical_features=encoded_categorical_cols.tolist(),
)
- dt_streamer.stream_data(X_df=X_numerical, y_df=normal.y_encoded_data)
+ dt_streamer = DataStreamer(fd_detector_obj=fd_detector)
+ dt_streamer.stream_data(X_df=normal.X_encoded_data, y_df=normal.y_encoded_data)
+
+ # Plot feature drift for a numerical features
dt_streamer.fd_detector_obj.plot(feature_name=X_numerical.columns[0])
- # ----- Test: Feature Drift Detection for numerical variables on Dummy drift data -----
- # features_list = ["n_feature_1", "n_feature_2"]
- # drift_detector = FeatureDriftDetector(
- # features_list=features_list, window_size=100, ks_test_pval=0.001
- # )
- #
- # # Generate data for 3 distributions for each feature
- # random_state = np.random.RandomState(seed=42)
- # dist_a_f1 = random_state.normal(0.8, 0.05, 1000)
- # dist_b_f1 = random_state.normal(0.4, 0.02, 1000)
- # dist_c_f1 = random_state.normal(0.6, 0.1, 1000)
- #
- # dist_a_f2 = random_state.normal(0.3, 0.04, 1000)
- # dist_b_f2 = random_state.normal(0.7, 0.03, 1000)
- # dist_c_f2 = random_state.normal(0.5, 0.05, 1000)
- #
- # # Concatenate data to simulate a data stream with 2 drifts for each feature
- # stream_f1 = np.concatenate((dist_a_f1, dist_b_f1, dist_c_f1))
- # stream_f2 = np.concatenate((dist_a_f2, dist_b_f2, dist_c_f2))
- #
- # # Simulate streaming data update
- # for i, (val_f1, val_f2) in enumerate(zip(stream_f1, stream_f2)):
- # x_i = {"n_feature_1": val_f1, "n_feature_2": val_f2}
- # drift_detector.update(x_i, 1, i)
- #
- # drift_detector._drift_tp_df.head()
- # drift_detector._moving_avg_df.head()
+ # Plot feature drift for a categorical features
+ dt_streamer.fd_detector_obj.plot(feature_name=X_categorical.columns[0])
+
+ # dt = FeatureDriftDetector(fd_detector_obj=normal)
+ # dt.plot("n0")
+ # dt.plot("c5_b")
diff --git a/stream_viz/tutorial/UserGuide.ipynb b/stream_viz/tutorial/UserGuide.ipynb
index 41a2466..633d4ca 100644
--- a/stream_viz/tutorial/UserGuide.ipynb
+++ b/stream_viz/tutorial/UserGuide.ipynb
@@ -6,8 +6,8 @@
"id": "cccfcee9-3f1b-4036-a67e-e71ea4d8fc27",
"metadata": {
"ExecuteTime": {
- "end_time": "2024-07-26T15:05:23.577620Z",
- "start_time": "2024-07-26T15:05:23.573711Z"
+ "end_time": "2024-07-29T18:50:14.438404Z",
+ "start_time": "2024-07-29T18:50:14.425996Z"
}
},
"outputs": [
@@ -15,7 +15,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Current Directory: G:\\github-aditya0by0\\stream-viz\\stream_viz\n"
+ "Current Directory: /Users/shreeyacy/GitHub/stream-viz-1/stream_viz\n"
]
}
],
@@ -35,8 +35,8 @@
"id": "b8eb7a87-0ea0-44ca-9a29-b26885cda470",
"metadata": {
"ExecuteTime": {
- "end_time": "2024-07-26T15:05:26.497041Z",
- "start_time": "2024-07-26T15:05:26.488055Z"
+ "end_time": "2024-07-29T18:50:17.289843Z",
+ "start_time": "2024-07-29T18:50:17.273457Z"
}
},
"outputs": [
@@ -44,7 +44,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Current Directory: G:\\github-aditya0by0\\stream-viz\n"
+ "Current Directory: /Users/shreeyacy/GitHub/stream-viz-1\n"
]
}
],
@@ -116,129 +116,15 @@
"id": "21e4215b-5dc5-4773-8f3d-f78ac3c4fbc9",
"metadata": {
"ExecuteTime": {
- "end_time": "2024-07-26T15:06:14.381562Z",
- "start_time": "2024-07-26T15:06:14.329113Z"
+ "end_time": "2024-07-29T18:50:20.741166Z",
+ "start_time": "2024-07-29T18:50:20.686728Z"
}
},
"outputs": [
{
"data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " c5_b \n",
- " c6_b \n",
- " c7_b \n",
- " c8_b \n",
- " c9_b \n",
- " n0 \n",
- " n1 \n",
- " n2 \n",
- " n3 \n",
- " n4 \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0.528245 \n",
- " 0.598345 \n",
- " 0.558432 \n",
- " 0.482846 \n",
- " 0.612024 \n",
- " \n",
- " \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 1 \n",
- " 0.662432 \n",
- " 0.423329 \n",
- " 0.487623 \n",
- " 0.454495 \n",
- " 0.452664 \n",
- " \n",
- " \n",
- " 2 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 1 \n",
- " 0.562990 \n",
- " 0.576429 \n",
- " 0.545916 \n",
- " 0.370166 \n",
- " 0.543403 \n",
- " \n",
- " \n",
- " 3 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 1 \n",
- " 0.475311 \n",
- " 0.566046 \n",
- " 0.539992 \n",
- " 0.421434 \n",
- " 0.544852 \n",
- " \n",
- " \n",
- " 4 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " 0.370579 \n",
- " 0.554642 \n",
- " 0.536804 \n",
- " 0.223743 \n",
- " 0.392332 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " c5_b c6_b c7_b c8_b c9_b n0 n1 n2 n3 \\\n",
- "0 0 0 1 0 0 0.528245 0.598345 0.558432 0.482846 \n",
- "1 0 0 0 1 1 0.662432 0.423329 0.487623 0.454495 \n",
- "2 0 0 0 1 1 0.562990 0.576429 0.545916 0.370166 \n",
- "3 0 0 0 1 1 0.475311 0.566046 0.539992 0.421434 \n",
- "4 1 0 0 1 0 0.370579 0.554642 0.536804 0.223743 \n",
- "\n",
- " n4 \n",
- "0 0.612024 \n",
- "1 0.452664 \n",
- "2 0.543403 \n",
- "3 0.544852 \n",
- "4 0.392332 "
- ]
+ "text/plain": " c5_b c6_b c7_b c8_b c9_b n0 n1 n2 n3 \\\n0 0 0 1 0 0 0.528245 0.598345 0.558432 0.482846 \n1 0 0 0 1 1 0.662432 0.423329 0.487623 0.454495 \n2 0 0 0 1 1 0.562990 0.576429 0.545916 0.370166 \n3 0 0 0 1 1 0.475311 0.566046 0.539992 0.421434 \n4 1 0 0 1 0 0.370579 0.554642 0.536804 0.223743 \n\n n4 \n0 0.612024 \n1 0.452664 \n2 0.543403 \n3 0.544852 \n4 0.392332 ",
+ "text/html": "\n\n
\n \n \n \n c5_b \n c6_b \n c7_b \n c8_b \n c9_b \n n0 \n n1 \n n2 \n n3 \n n4 \n \n \n \n \n 0 \n 0 \n 0 \n 1 \n 0 \n 0 \n 0.528245 \n 0.598345 \n 0.558432 \n 0.482846 \n 0.612024 \n \n \n 1 \n 0 \n 0 \n 0 \n 1 \n 1 \n 0.662432 \n 0.423329 \n 0.487623 \n 0.454495 \n 0.452664 \n \n \n 2 \n 0 \n 0 \n 0 \n 1 \n 1 \n 0.562990 \n 0.576429 \n 0.545916 \n 0.370166 \n 0.543403 \n \n \n 3 \n 0 \n 0 \n 0 \n 1 \n 1 \n 0.475311 \n 0.566046 \n 0.539992 \n 0.421434 \n 0.544852 \n \n \n 4 \n 1 \n 0 \n 0 \n 1 \n 0 \n 0.370579 \n 0.554642 \n 0.536804 \n 0.223743 \n 0.392332 \n \n \n
\n
"
},
"execution_count": 4,
"metadata": {},
@@ -263,129 +149,15 @@
"id": "b4e79879-9239-45db-8267-81561a3effac",
"metadata": {
"ExecuteTime": {
- "end_time": "2024-07-26T15:06:36.950639Z",
- "start_time": "2024-07-26T15:06:36.743080Z"
+ "end_time": "2024-07-29T18:50:23.196805Z",
+ "start_time": "2024-07-29T18:50:22.980401Z"
}
},
"outputs": [
{
"data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " c5_b \n",
- " c6_b \n",
- " c7_b \n",
- " c8_b \n",
- " c9_b \n",
- " n0 \n",
- " n1 \n",
- " n2 \n",
- " n3 \n",
- " n4 \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 0.0 \n",
- " 0.0 \n",
- " 1.0 \n",
- " 0.0 \n",
- " 0.0 \n",
- " 0.530356 \n",
- " 0.598345 \n",
- " 0.519161 \n",
- " 0.478557 \n",
- " 0.620371 \n",
- " \n",
- " \n",
- " 1 \n",
- " 0.0 \n",
- " 0.0 \n",
- " 0.0 \n",
- " 1.0 \n",
- " 1.0 \n",
- " 0.672618 \n",
- " 0.423329 \n",
- " 0.442055 \n",
- " 0.449888 \n",
- " 0.458838 \n",
- " \n",
- " \n",
- " 2 \n",
- " 0.0 \n",
- " 0.0 \n",
- " 0.0 \n",
- " 1.0 \n",
- " 1.0 \n",
- " 0.567192 \n",
- " 0.576429 \n",
- " 0.505532 \n",
- " 0.364614 \n",
- " 0.550814 \n",
- " \n",
- " \n",
- " 3 \n",
- " 0.0 \n",
- " 0.0 \n",
- " 0.0 \n",
- " 1.0 \n",
- " 1.0 \n",
- " 0.474236 \n",
- " 0.566046 \n",
- " 0.499081 \n",
- " 0.416457 \n",
- " 0.552283 \n",
- " \n",
- " \n",
- " 4 \n",
- " 1.0 \n",
- " 0.0 \n",
- " 0.0 \n",
- " 1.0 \n",
- " 0.0 \n",
- " 0.363202 \n",
- " 0.554642 \n",
- " 0.495610 \n",
- " 0.216550 \n",
- " 0.397683 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " c5_b c6_b c7_b c8_b c9_b n0 n1 n2 n3 \\\n",
- "0 0.0 0.0 1.0 0.0 0.0 0.530356 0.598345 0.519161 0.478557 \n",
- "1 0.0 0.0 0.0 1.0 1.0 0.672618 0.423329 0.442055 0.449888 \n",
- "2 0.0 0.0 0.0 1.0 1.0 0.567192 0.576429 0.505532 0.364614 \n",
- "3 0.0 0.0 0.0 1.0 1.0 0.474236 0.566046 0.499081 0.416457 \n",
- "4 1.0 0.0 0.0 1.0 0.0 0.363202 0.554642 0.495610 0.216550 \n",
- "\n",
- " n4 \n",
- "0 0.620371 \n",
- "1 0.458838 \n",
- "2 0.550814 \n",
- "3 0.552283 \n",
- "4 0.397683 "
- ]
+ "text/plain": " c5_b c6_b c7_b c8_b c9_b n0 n1 n2 n3 \\\n0 0.0 0.0 1.0 0.0 0.0 0.530356 0.598345 0.519161 0.478557 \n1 0.0 0.0 0.0 1.0 1.0 0.672618 0.423329 0.442055 0.449888 \n2 0.0 0.0 0.0 1.0 1.0 0.567192 0.576429 0.505532 0.364614 \n3 0.0 0.0 0.0 1.0 1.0 0.474236 0.566046 0.499081 0.416457 \n4 1.0 0.0 0.0 1.0 0.0 0.363202 0.554642 0.495610 0.216550 \n\n n4 \n0 0.620371 \n1 0.458838 \n2 0.550814 \n3 0.552283 \n4 0.397683 ",
+ "text/html": "\n\n
\n \n \n \n c5_b \n c6_b \n c7_b \n c8_b \n c9_b \n n0 \n n1 \n n2 \n n3 \n n4 \n \n \n \n \n 0 \n 0.0 \n 0.0 \n 1.0 \n 0.0 \n 0.0 \n 0.530356 \n 0.598345 \n 0.519161 \n 0.478557 \n 0.620371 \n \n \n 1 \n 0.0 \n 0.0 \n 0.0 \n 1.0 \n 1.0 \n 0.672618 \n 0.423329 \n 0.442055 \n 0.449888 \n 0.458838 \n \n \n 2 \n 0.0 \n 0.0 \n 0.0 \n 1.0 \n 1.0 \n 0.567192 \n 0.576429 \n 0.505532 \n 0.364614 \n 0.550814 \n \n \n 3 \n 0.0 \n 0.0 \n 0.0 \n 1.0 \n 1.0 \n 0.474236 \n 0.566046 \n 0.499081 \n 0.416457 \n 0.552283 \n \n \n 4 \n 1.0 \n 0.0 \n 0.0 \n 1.0 \n 0.0 \n 0.363202 \n 0.554642 \n 0.495610 \n 0.216550 \n 0.397683 \n \n \n
\n
"
},
"execution_count": 5,
"metadata": {},
@@ -749,7 +521,7 @@
"source": [
"### FeatureDriftDetector Class\n",
"\n",
- "The `FeatureDriftDetector` class is designed to monitor and detect feature drift in streaming data using the Kolmogorov-Smirnov (KS) test. This class can handle numerical features and provides functionality to identify sudden, linear, and gradual drifts in the data. Drift detection is crucial for maintaining the reliability and accuracy of machine learning models, especially in dynamic environments where data distributions can change over time.\n",
+ "The `FeatureDriftDetector` class is designed to monitor and detect feature drift in streaming data using the Kolmogorov-Smirnov (KS) test and Population Stability Index Test (PSI test). This class can handle numerical features and categorical features, and provides functionality to identify sudden, linear, and gradual drifts in the data. Drift detection is crucial for maintaining the reliability and accuracy of machine learning models, especially in dynamic environments where data distributions can change over time.\n",
"\n",
"#### Parameters\n",
"\n",
@@ -768,21 +540,29 @@
"- **p_val_threshold** (`float`, optional): \n",
" The p-value threshold for gradual drift detection. The default value is 0.0001. This threshold is used to determine the significance of gradual changes in the data, providing an additional layer of drift detection sensitivity.\n",
"\n",
+ "- **psi_threshold** (`float`, optional):\n",
+ " The psi threshold for the Population Stability Index. The default value is 0.12. This threshold helps in determining the significance of the detected drift, ensuring that meaningfu changes are detected or flagged.\n",
+ "\n",
"#### Drift Detection Methodology\n",
"\n",
- "Drift detection in the `FeatureDriftDetector` is performed using the Kolmogorov-Smirnov (KS) test, which compares the distributions of feature values within a sliding window of data. The following steps outline the drift detection process:\n",
+ "Drift detection in the `FeatureDriftDetector` is performed using the Kolmogorov-Smirnov (KS) test for numerical features and Population Stability Index Test (PSI test) for categorical features, which compares the distributions of feature values within a sliding window of data. The following steps outline the drift detection process:\n",
"\n",
"1. **Window Data Segmentation**:\n",
" - The feature values within the current window are split into two halves: the first half and the second half.\n",
" - Additionally, for detecting gradual drift, the data is further segmented into overlapping parts by introducing a gap between segments. Specifically, the first half of the data ends before the gap, and the second half of the data starts after the gap. This method ensures that gradual changes are not masked by immediate changes around the midpoint of the window.\n",
"\n",
"2. **Kolmogorov-Smirnov Test**:\n",
- " - The KS test is applied to compare the distributions of the first and second halves.\n",
+ " - The KS test is applied for numerical features to compare the distributions of the first and second halves.\n",
" - If the p-value from the KS test is below the specified threshold (`ks_test_pval`), it indicates a significant difference between the two halves, suggesting potential drift.\n",
+ "\n",
+ "3. **Population Stability Index Test**:\n",
+ " - The PSI test is applied for categorical features to compare the distributions of the first and second halves.\n",
+ " - If the psi value from the PSI test is above the specified threshold (`psi_threshold`), it indicates a significant difference between two halves, indicating potential drift.\n",
" \n",
"3. **Gradual Drift Detection**:\n",
- " - Another KS test is conducted on the overlapping parts of the data to detect gradual drift. The overlapping parts are defined by excluding the gap in the middle of the window.\n",
+ " - Another KS test and PSI test are conducted on the overlapping parts of the data to detect gradual drift. The overlapping parts are defined by excluding the gap in the middle of the window.\n",
" - If the p-value from this test is below the gradual drift threshold (`p_val_threshold`), it suggests gradual changes in the data distribution.\n",
+ " - Similarly, if the psi value from this test is above the psi threshold, it indicates gradual drift in data distribution.\n",
"\n",
"4. **Drift Type Identification**:\n",
" - If drift is detected, the type of drift is determined based on the mean difference between the halves:\n",
@@ -829,12 +609,69 @@
]
},
{
- "cell_type": "markdown",
- "id": "f5a0e9f6-1660-40db-bc3a-1c6f0893f3c2",
- "metadata": {},
+ "cell_type": "code",
+ "execution_count": 6,
+ "outputs": [],
"source": [
- "---"
- ]
+ "from stream_viz.data_streamer import DataStreamer\n",
+ "from stream_viz.feature_drift.f_drift_detector import FeatureDriftDetector\n",
+ "\n",
+ "# Feature Drift Detector for Numerical Columns as of now\n",
+ "# Create a mapping of original to encoded column names\n",
+ "encoded_categorical_cols = normal_encoder.X_encoded_data.columns[\n",
+ " normal_encoder.X_encoded_data.columns.str.startswith(\"c\")\n",
+ "]\n",
+ "original_to_encoded_categorical_cols = {\n",
+ " original: encoded\n",
+ " for original, encoded in zip(\n",
+ " normal_encoder.original_categorical_cols, encoded_categorical_cols\n",
+ " )\n",
+ "}\n",
+ "\n",
+ "X_numerical = normal_encoder.X_encoded_data[normal_encoder.original_numerical_cols]\n",
+ "X_categorical = normal_encoder.X_encoded_data[encoded_categorical_cols]\n",
+ "all_features = X_numerical.columns.tolist() + X_categorical.columns.tolist()\n",
+ "fd_detector = FeatureDriftDetector(\n",
+ " features_list=all_features, categorical_features=encoded_categorical_cols.tolist()\n",
+ ")\n",
+ "dt_streamer = DataStreamer(fd_detector_obj=fd_detector)\n",
+ "dt_streamer.stream_data(\n",
+ " X_df=normal_encoder.X_encoded_data, y_df=normal_encoder.y_encoded_data\n",
+ ")"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-07-29T18:51:08.913157Z",
+ "start_time": "2024-07-29T18:50:31.390314Z"
+ }
+ },
+ "id": "24eb4aecc0546bc4"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "",
+ "image/png": ""
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "dt_streamer.fd_detector_obj.plot(feature_name=\"c5_b\")"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-07-29T18:51:14.188688Z",
+ "start_time": "2024-07-29T18:51:13.158617Z"
+ }
+ },
+ "id": "d6babfd255f88f3a"
},
{
"cell_type": "markdown",
@@ -936,10 +773,7 @@
"cell_type": "markdown",
"id": "66fbcf044689c97c",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"### Stream Graphs \n",
@@ -957,10 +791,7 @@
"end_time": "2024-07-26T15:08:06.514803Z",
"start_time": "2024-07-26T15:08:06.358078Z"
},
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"outputs": [
{
@@ -1082,10 +913,7 @@
"end_time": "2024-07-26T15:08:49.978417Z",
"start_time": "2024-07-26T15:08:49.847105Z"
},
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"outputs": [
{
@@ -1111,10 +939,7 @@
"cell_type": "markdown",
"id": "f3ccba5ae2b49026",
"metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"source": [
"### Scatter Plot\n",
@@ -1133,10 +958,7 @@
"end_time": "2024-07-26T15:11:04.958626Z",
"start_time": "2024-07-26T15:11:04.796875Z"
},
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"outputs": [
{
@@ -1166,10 +988,7 @@
"end_time": "2024-07-26T15:11:09.053475Z",
"start_time": "2024-07-26T15:11:08.455548Z"
},
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- }
+ "collapsed": false
},
"outputs": [
{