With precommit checks

aditya0by0 · Jul 26, 2024 · a116e08 · a116e08
1 parent 96cbad7
commit a116e08
Show file tree

Hide file tree

Showing 3 changed files with 93 additions and 44 deletions.
diff --git a/stream_viz/data_missingness/missingness.py b/stream_viz/data_missingness/missingness.py
@@ -241,18 +241,22 @@ def plot(self, start, end, features):
     def _add_interactive_plot(self):
         super()._add_interactive_plot()
 
-class StackedBarGraph (Plotter):
+
+class StackedBarGraph(Plotter):
 
     def __init__(
-            self,
-            missing_encoder_obj: MissingDataEncoder,
+        self,
+        missing_encoder_obj: MissingDataEncoder,
     ):
         self._missing_encoder = missing_encoder_obj
         self._data_df = self._missing_encoder.X_encoded_data
-    def plot(self,feature,chunk_size):
+
+    def plot(self, feature, chunk_size):
         df = self._data_df
         num_chunks = len(df) // chunk_size
-        chunk_ranges = [(i * chunk_size, (i + 1) * chunk_size) for i in range(num_chunks)]
+        chunk_ranges = [
+            (i * chunk_size, (i + 1) * chunk_size) for i in range(num_chunks)
+        ]
 
         counts = []
         for start, end in chunk_ranges:
@@ -262,28 +266,33 @@ def plot(self,feature,chunk_size):
             count_nan = chunk[feature].isna().sum()
             counts.append([count_0, count_1, count_nan])
 
-        counts_df = pd.DataFrame(counts, columns=['A', 'B', 'Missing'], index=range(num_chunks))
+        counts_df = pd.DataFrame(
+            counts, columns=["A", "B", "Missing"], index=range(num_chunks)
+        )
 
-        counts_df.plot(kind='bar', stacked=True, figsize=(12, 8))
-        plt.xlabel('Time period')
-        plt.ylabel('Count')
-        plt.title(f'Stacked Bar Graph of {feature} for each time period of {chunk_size} Instances')
-        plt.legend(title=feature, loc='upper right')
+        counts_df.plot(kind="bar", stacked=True, figsize=(12, 8))
+        plt.xlabel("Time period")
+        plt.ylabel("Count")
+        plt.title(
+            f"Stacked Bar Graph of {feature} for each time period of {chunk_size} Instances"
+        )
+        plt.legend(title=feature, loc="upper right")
         plt.show()
 
 
 class ScatterPlotter(Plotter):
     def __init__(
-            self,
-            normal_encoder_obj: NormalDataEncoder,
-            missing_encoder_obj: MissingDataEncoder,
+        self,
+        normal_encoder_obj: NormalDataEncoder,
+        missing_encoder_obj: MissingDataEncoder,
     ):
         self._normal_encoder = normal_encoder_obj
         self._missing_encoder = missing_encoder_obj
 
     def plot(self):
         """Dummy implementation of the abstract method 'plot'."""
         pass
+
     def plot_numerical(self, feature):
         normal_df = self._normal_encoder.X_encoded_data
         missing_df = self._missing_encoder.X_encoded_data
@@ -292,9 +301,23 @@ def plot_numerical(self, feature):
         missing_attr = missing_df[feature]
         missing_mask = np.isnan(missing_attr)
         plt.figure(figsize=(10, 6))
-        plt.scatter(np.arange(len(attr)), attr, color='blue', label='Not missing', alpha=0.5, s=20)
-        plt.scatter(np.where(missing_mask)[0], attr[missing_mask], color='red', label='Missing', alpha=0.5, s=20)
-        plt.xlabel('Time Points')
+        plt.scatter(
+            np.arange(len(attr)),
+            attr,
+            color="blue",
+            label="Not missing",
+            alpha=0.5,
+            s=20,
+        )
+        plt.scatter(
+            np.where(missing_mask)[0],
+            attr[missing_mask],
+            color="red",
+            label="Missing",
+            alpha=0.5,
+            s=20,
+        )
+        plt.xlabel("Time Points")
         plt.ylabel(feature)
         plt.xticks(np.arange(0, 14000, 1000))
         plt.legend()
@@ -315,11 +338,23 @@ def plot_categorical(self, feature):
 
         missing_mask = np.isnan(selected_missing_df)
         plt.figure(figsize=(10, 6))
-        plt.scatter(np.arange(len(selected_normal_df)), selected_normal_df, color='blue', label='Not missing',
-                    alpha=0.5, s=20)
-        plt.scatter(np.where(missing_mask)[0], selected_normal_df[missing_mask], color='red', label='Missing',
-                    alpha=0.5, s=20)
-        plt.xlabel('Time Points')
+        plt.scatter(
+            np.arange(len(selected_normal_df)),
+            selected_normal_df,
+            color="blue",
+            label="Not missing",
+            alpha=0.5,
+            s=20,
+        )
+        plt.scatter(
+            np.where(missing_mask)[0],
+            selected_normal_df[missing_mask],
+            color="red",
+            label="Missing",
+            alpha=0.5,
+            s=20,
+        )
+        plt.xlabel("Time Points")
         plt.ylabel(feature)
         plt.xticks(np.arange(0, 110, 10))
         plt.legend()
@@ -354,10 +389,10 @@ def plot_categorical(self, feature):
     # mar_hm.plot(start_tpt=200, end_tpt=500, significance_level=0.05)
 
     # ------------ Test Run : For StackedBarGraph -----------------
-    #bargraph = StackedBarGraph(missing_encoder_obj=missing)
-    #bargraph.plot('c5_b', 1000)
+    # bargraph = StackedBarGraph(missing_encoder_obj=missing)
+    # bargraph.plot('c5_b', 1000)
 
     # ------------ Test Run : For ScatterPlotter -----------------
     scatter = ScatterPlotter(normal_encoder_obj=normal, missing_encoder_obj=missing)
-    scatter.plot_numerical('n0')
-    scatter.plot_categorical('c5_b')
+    scatter.plot_numerical("n0")
+    scatter.plot_categorical("c5_b")
diff --git a/stream_viz/tutorial/UserGuide.ipynb b/stream_viz/tutorial/UserGuide.ipynb
diff --git a/stream_viz/velocity/velocity_charts.py b/stream_viz/velocity/velocity_charts.py
@@ -1,10 +1,11 @@
 import itertools
 from typing import Iterable, List, Union
 
+import matplotlib.ticker as mticker
 import numpy as np
 import pandas as pd
 from matplotlib import pyplot as plt
-import matplotlib.ticker as mticker
+
 from stream_viz.base import Velocity
 from stream_viz.data_encoders.cfpdss_data_encoder import CfpdssDataEncoder
 
@@ -293,6 +294,8 @@ def plot(self, features: Union[str, Iterable[str]], *args, **kwargs) -> None:
             "Features parameter should be either a string for a categorical feature or "
             "an iterable for numerical features."
         )
+
+
 class StreamGraph(Velocity):
 
     def __init__(self, data_obj: CfpdssDataEncoder) -> None:
@@ -306,16 +309,18 @@ def __init__(self, data_obj: CfpdssDataEncoder) -> None:
         """
         self._data_obj: CfpdssDataEncoder = data_obj
 
-    def get_timepoint(self,window_size):
+    def get_timepoint(self, window_size):
         time_points = list(range(0, 13000, window_size))
         return time_points
 
-    def count_categories_in_chunks(self,column, chunk_size=50):
+    def count_categories_in_chunks(self, column, chunk_size=50):
         # Initialize an empty dictionary to hold the counts for each category
         category_counts = {}
 
         # Calculate the number of chunks
-        num_chunks = len(column) // chunk_size + (1 if len(column) % chunk_size != 0 else 0)
+        num_chunks = len(column) // chunk_size + (
+            1 if len(column) % chunk_size != 0 else 0
+        )
 
         for i in range(num_chunks):
             # Get the start and end indices for the current chunk
@@ -337,7 +342,9 @@ def count_categories_in_chunks(self,column, chunk_size=50):
             # Ensure all categories have a list of the correct length
             for category in category_counts:
                 if len(category_counts[category]) < num_chunks:
-                    category_counts[category].extend([0] * (num_chunks - len(category_counts[category])))
+                    category_counts[category].extend(
+                        [0] * (num_chunks - len(category_counts[category]))
+                    )
 
         return category_counts
 
@@ -347,12 +354,17 @@ def plot(self, feature):
         result = self.count_categories_in_chunks(self._data_obj.X_encoded_data[feature])
         fig, ax = plt.subplots()
         fig.set_size_inches(10, 6)
-        ax.stackplot(time_points, result.values(),
-                     labels=result.keys(), alpha=0.8, baseline='wiggle')
-        ax.legend(loc='upper right', reverse=True)
-        ax.set_title(f'Velocity of {feature}')
-        ax.set_xlabel('Time')
-        ax.set_ylabel('Height')
+        ax.stackplot(
+            time_points,
+            result.values(),
+            labels=result.keys(),
+            alpha=0.8,
+            baseline="wiggle",
+        )
+        ax.legend(loc="upper right", reverse=True)
+        ax.set_title(f"Velocity of {feature}")
+        ax.set_xlabel("Time")
+        ax.set_ylabel("Height")
         ax.xaxis.set_major_locator(mticker.MultipleLocator(1000))
         plt.show()
 
@@ -386,4 +398,4 @@ def plot(self, feature):
     # numerical_features = ["n0", "n1"]
     # roll_mean_obj.plot_velocity(missing.X_encoded_data, numerical_features, window_size=10)
     stream_graph = StreamGraph(normal)
-    stream_graph.plot('c5_b')
+    stream_graph.plot("c5_b")