Skip to content

Commit

Permalink
Type and style fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
HardNorth committed Oct 23, 2024
1 parent 34774d6 commit 37981ad
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 22 deletions.
2 changes: 1 addition & 1 deletion app/commons/similarity_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self, config: dict[str, Any], similarity_model: WeightedSimilarityC
}
self.artificial_columns = ["namespaces_stacktrace"]

def find_similarity(self, all_results: list[tuple[dict, dict]], fields: list[str]) -> None:
def find_similarity(self, all_results: list[tuple[dict[str, Any], dict[str, Any]]], fields: list[str]) -> None:
for field in fields:
if field in self.similarity_dict:
continue
Expand Down
46 changes: 27 additions & 19 deletions app/machine_learning/boosting_featurizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@ class BoostingFeaturizer:
feature_ids: list[int]
feature_functions: dict[int, tuple[Callable, dict[str, Any], list[int]]]
previously_gathered_features: dict[int, list[list[float]]]
raw_results: list[tuple[dict[str, Any], dict[str, Any]]]
all_results: list[tuple[dict[str, Any], list[dict[str, Any]]]]
total_normalized_score: float

def __init__(self, all_results: list[tuple[dict[str, Any], list[dict[str, Any]]]], config: dict[str, Any],
def __init__(self, results: list[tuple[dict[str, Any], dict[str, Any]]], config: dict[str, Any],
feature_ids: str | list[int],
weighted_log_similarity_calculator: Optional[WeightedSimilarityCalculator] = None) -> None:
self.config = config
Expand Down Expand Up @@ -105,28 +106,28 @@ def __init__(self, all_results: list[tuple[dict[str, Any], list[dict[str, Any]]]
}

fields_to_calc_similarity = self.find_columns_to_find_similarities_for()
all_results = self._perform_additional_text_processing(all_results)
processed_results = self._perform_additional_text_processing(results)

if "filter_min_should_match" in self.config and len(self.config["filter_min_should_match"]) > 0:
self.similarity_calculator.find_similarity(
all_results, self.config["filter_min_should_match"] + ["merged_small_logs"])
processed_results, self.config["filter_min_should_match"] + ["merged_small_logs"])
for field in self.config["filter_min_should_match"]:
all_results = self.filter_by_min_should_match(all_results, field=field)
processed_results = self.filter_by_min_should_match(processed_results, field=field)
if "filter_min_should_match_any" in self.config and len(self.config["filter_min_should_match_any"]) > 0:
self.similarity_calculator.find_similarity(
all_results, self.config["filter_min_should_match_any"] + ["merged_small_logs"])
all_results = self.filter_by_min_should_match_any(
all_results, fields=self.config["filter_min_should_match_any"])
self.test_item_log_stats = self._calculate_stats_by_test_item_ids(all_results)
processed_results, self.config["filter_min_should_match_any"] + ["merged_small_logs"])
processed_results = self.filter_by_min_should_match_any(
processed_results, fields=self.config["filter_min_should_match_any"])
self.test_item_log_stats = self._calculate_stats_by_test_item_ids(processed_results)
if "filter_by_all_logs_should_be_similar" in self.config:
if self.config["filter_by_all_logs_should_be_similar"]:
all_results = self.filter_by_all_logs_should_be_similar(all_results)
processed_results = self.filter_by_all_logs_should_be_similar(processed_results)
if "filter_by_test_case_hash" in self.config and self.config["filter_by_test_case_hash"]:
all_results = self.filter_by_test_case_hash(all_results)
processed_results = self.filter_by_test_case_hash(processed_results)
if "calculate_similarities" not in self.config or self.config["calculate_similarities"]:
self.similarity_calculator.find_similarity(all_results, fields_to_calc_similarity)
self.raw_results = all_results
self.all_results = self.normalize_results(all_results)
self.similarity_calculator.find_similarity(processed_results, fields_to_calc_similarity)
self.raw_results = processed_results
self.all_results = self.normalize_results(processed_results)
self.scores_by_type = None
self.defect_type_predict_model = None
self.used_model_info = set()
Expand Down Expand Up @@ -202,7 +203,7 @@ def _calculate_test_item_logs_similar_percent(self) -> dict[str, float]:
return sim_logs_num_scores

@staticmethod
def _perform_additional_text_processing(all_results):
def _perform_additional_text_processing(all_results: list[tuple[dict[str, Any], dict[str, Any]]]):
for log, res in all_results:
for r in res["hits"]["hits"]:
if "found_tests_and_methods" in r["_source"]:
Expand Down Expand Up @@ -283,7 +284,9 @@ def is_text_of_particular_defect_type(self, label_type: str) -> dict[str, int]:
issue_type_stats[issue_type] = int(label_type == rel_item_issue_type.lower()[:2])
return issue_type_stats

def filter_by_all_logs_should_be_similar(self, all_results):
def filter_by_all_logs_should_be_similar(
self, all_results: list[tuple[dict[str, Any], dict[str, Any]]]
) -> list[tuple[dict[str, Any], dict[str, Any]]]:
new_results = []
for log, res in all_results:
new_elastic_res = []
Expand All @@ -295,7 +298,8 @@ def filter_by_all_logs_should_be_similar(self, all_results):
return new_results

@staticmethod
def filter_by_test_case_hash(all_results):
def filter_by_test_case_hash(
all_results: list[tuple[dict[str, Any], dict[str, Any]]]) -> list[tuple[dict[str, Any], dict[str, Any]]]:
new_results = []
for log, res in all_results:
test_case_hash_dict = {}
Expand Down Expand Up @@ -435,7 +439,7 @@ def is_only_merged_small_logs(self) -> dict[str, int]:
similarity_percent_by_type[issue_type] = int(sim_obj["both_empty"])
return similarity_percent_by_type

def filter_by_min_should_match(self, all_results, field="message"):
def filter_by_min_should_match(self, all_results: list[tuple[dict[str, Any], dict[str, Any]]], field="message"):
new_results = []
for log, res in all_results:
new_elastic_res = []
Expand All @@ -451,7 +455,9 @@ def filter_by_min_should_match(self, all_results, field="message"):
new_results.append((log, {"hits": {"hits": new_elastic_res}}))
return new_results

def filter_by_min_should_match_any(self, all_results, fields: list[str]):
def filter_by_min_should_match_any(
self, all_results: list[tuple[dict[str, Any], dict[str, Any]]], fields: list[str]
) -> list[tuple[dict[str, Any], dict[str, Any]]]:
if not fields:
return all_results
new_results = []
Expand Down Expand Up @@ -576,7 +582,9 @@ def _calculate_percent_count_items_and_mean(self, return_val_name: str = 'mean_s
issue_scores['cnt_items_percent'] /= cnt_items_glob
return {item: results[return_val_name] for item, results in cnt_items_by_issue_type.items()}

def normalize_results(self, all_elastic_results) -> list[tuple[dict[str, Any], list[dict[str, Any]]]]:
def normalize_results(
self, all_elastic_results: list[tuple[dict[str, Any], dict[str, Any]]]
) -> list[tuple[dict[str, Any], list[dict[str, Any]]]]:
all_results = []
max_score = 0
self.total_normalized_score = 0.0
Expand Down
4 changes: 2 additions & 2 deletions app/machine_learning/suggest_boosting_featurizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@

class SuggestBoostingFeaturizer(boosting_featurizer.BoostingFeaturizer):

def __init__(self, all_results: list[tuple[dict[str, Any], list[dict[str, Any]]]], config,
def __init__(self, results: list[tuple[dict[str, Any], dict[str, Any]]], config,
feature_ids: str | list[int],
weighted_log_similarity_calculator: WeightedSimilarityCalculator = None) -> None:
super().__init__(
all_results, config, feature_ids, weighted_log_similarity_calculator=weighted_log_similarity_calculator)
results, config, feature_ids, weighted_log_similarity_calculator=weighted_log_similarity_calculator)

def _calculate_percent_issue_types(self) -> dict[str, float]:
scores_by_issue_type = self.find_most_relevant_by_type()
Expand Down

0 comments on commit 37981ad

Please sign in to comment.