diff --git a/.flake8 b/.flake8 index a298d113..89747507 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,4 @@ [flake8] +ignore = E741, W503 exclude = .git,venv,env,fixtures max-line-length = 119 diff --git a/.github/workflows/build-dev-image.yml b/.github/workflows/build-dev-image.yml index 9dbee6bb..64996573 100644 --- a/.github/workflows/build-dev-image.yml +++ b/.github/workflows/build-dev-image.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Create variables id: vars diff --git a/.github/workflows/build-feature-image.yaml b/.github/workflows/build-feature-image.yaml index 757cf2eb..936361a8 100644 --- a/.github/workflows/build-feature-image.yaml +++ b/.github/workflows/build-feature-image.yaml @@ -13,7 +13,7 @@ jobs: if: (!startsWith(github.head_ref, 'rc/') || !startsWith(github.head_ref, 'hotfix/') || !startsWith(github.head_ref, 'master') || !startsWith(github.head_ref, 'main')) steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Create variables id: vars diff --git a/.github/workflows/build-rc-image.yaml b/.github/workflows/build-rc-image.yaml index 38ac4e19..8a1c44f3 100644 --- a/.github/workflows/build-rc-image.yaml +++ b/.github/workflows/build-rc-image.yaml @@ -13,7 +13,7 @@ jobs: environment: rc steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Create variables id: vars diff --git a/.github/workflows/dockerhub-release.yaml b/.github/workflows/dockerhub-release.yaml index 1bc718d1..240c4669 100644 --- a/.github/workflows/dockerhub-release.yaml +++ b/.github/workflows/dockerhub-release.yaml @@ -20,7 +20,7 @@ jobs: if: github.event.pull_request.base.ref == 'master' || github.event.pull_request.base.ref == 'main' steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v2 diff --git a/.github/workflows/sync-jira-versions.yml b/.github/workflows/sync-jira-versions.yml new file mode 100644 index 00000000..f31c5d45 --- /dev/null +++ b/.github/workflows/sync-jira-versions.yml @@ -0,0 +1,13 @@ +name: Add GitHub release version to Jira issues + +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + call-jira-sync: + name: Call Jira versions update + uses: reportportal/.github/.github/workflows/update-jira-versions.yaml@main + with: + jira-server: ${{ vars.JIRA_SERVER }} + secrets: inherit \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2ed08e2f..e7fe356e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -18,14 +18,15 @@ on: [ push, pull_request ] jobs: build: runs-on: ubuntu-latest + timeout-minutes: 20 steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.11' - name: Install dependencies run: | @@ -41,7 +42,8 @@ jobs: run: pytest --cov-config=.coveragerc --cov --cov-report=xml test/ -s -vv - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 with: + token: ${{ secrets.CODECOV_TOKEN }} files: coverage.xml flags: unittests diff --git a/Dockerfile b/Dockerfile index b657babf..89f9006d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ -FROM --platform=${BUILDPLATFORM} bitnami/python:3.10.14 AS test -RUN apt-get update && apt-get install -y build-essential \ - && rm -rf /var/lib/apt/lists/* \ +FROM registry.access.redhat.com/ubi8/python-311:latest AS test +USER root +RUN dnf -y upgrade \ && python -m venv /venv \ && mkdir /build ENV VIRTUAL_ENV=/venv @@ -13,10 +13,12 @@ RUN "${VIRTUAL_ENV}/bin/pip" install --upgrade pip \ RUN "${VIRTUAL_ENV}/bin/pip" install --no-cache-dir -r requirements-dev.txt RUN make test-all - -FROM --platform=${BUILDPLATFORM} bitnami/python:3.10.14 AS builder -RUN apt-get update && apt-get install -y build-essential libpcre3 libpcre3-dev \ - && rm -rf /var/lib/apt/lists/* \ +FROM registry.access.redhat.com/ubi8/python-311:latest AS builder +USER root +RUN dnf -y upgrade && dnf -y install pcre-devel \ + && dnf -y remove emacs-filesystem libjpeg-turbo libtiff libpng wget \ + && dnf -y autoremove \ + && dnf clean all \ && python -m venv /venv \ && mkdir /build ENV VIRTUAL_ENV=/venv @@ -24,6 +26,7 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" WORKDIR /build COPY ./ ./ RUN "${VIRTUAL_ENV}/bin/pip" install --upgrade pip \ + && "${VIRTUAL_ENV}/bin/pip" install --upgrade setuptools \ && LIBRARY_PATH=/lib:/usr/lib /bin/sh -c "${VIRTUAL_ENV}/bin/pip install --no-cache-dir -r requirements.txt" \ && "${VIRTUAL_ENV}/bin/python3" -m nltk.downloader -d /usr/share/nltk_data stopwords ARG APP_VERSION="" @@ -35,15 +38,18 @@ RUN mkdir /backend \ && cp -r /build/app /backend/ \ && cp -r /build/res /backend/ - -FROM --platform=${BUILDPLATFORM} bitnami/python:3.10.14 +FROM registry.access.redhat.com/ubi8/python-311:latest +USER root WORKDIR /backend/ COPY --from=builder /backend ./ COPY --from=builder /venv /venv COPY --from=builder /usr/share/nltk_data /usr/share/nltk_data/ -RUN apt-get update && apt-get -y upgrade \ - && apt-get install -y libxml2 libgomp1 curl libpcre3 libpcre3-dev \ - && rm -rf /var/lib/apt/lists/* \ +RUN dnf -y upgrade && dnf -y install pcre-devel \ + && dnf -y remove emacs-filesystem libjpeg-turbo libtiff libpng wget \ + && dnf -y autoremove \ + && dnf clean all \ + && pip install --upgrade pip \ + && pip install --upgrade setuptools \ && mkdir -p -m 0700 /backend/storage \ && groupadd uwsgi && useradd -g uwsgi uwsgi \ && chown -R uwsgi: /usr/share/nltk_data \ diff --git a/README.md b/README.md index a7879996..0bd52a0e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Tests](https://github.com/reportportal/service-auto-analyzer/actions/workflows/tests.yml/badge.svg)](https://github.com/reportportal/service-auto-analyzer/actions/workflows/tests.yml) [![codecov](https://codecov.io/github/reportportal/service-auto-analyzer/branch/master/graph/badge.svg?token=Y3llbuAYLr)](https://codecov.io/github/reportportal/service-auto-analyzer) -[![Join Slack chat!](https://slack.epmrpp.reportportal.io/badge.svg)](https://slack.epmrpp.reportportal.io/) +[![Join Slack chat!](https://img.shields.io/badge/slack-join-brightgreen.svg)](https://slack.epmrpp.reportportal.io/) [![stackoverflow](https://img.shields.io/badge/reportportal-stackoverflow-orange.svg?style=flat)](http://stackoverflow.com/questions/tagged/reportportal) [![Build with Love](https://img.shields.io/badge/build%20with-❤%EF%B8%8F%E2%80%8D-lightgrey.svg)](http://reportportal.io?style=flat) @@ -58,9 +58,9 @@ | PATTERN_LABEL_MIN_PERCENT | float | 0.9 | the value of minimum percent of the same issue type for pattern to be suggested as a pattern with a label | | PATTERN_LABEL_MIN_COUNT | integer | 5 | the value of minimum count of pattern occurrence to be suggested as a pattern with a label | | PATTERN_MIN_COUNT | integer | 10 | the value of minimum count of pattern occurrence to be suggested as a pattern without a label | -| MAX_LOGS_FOR_DEFECT_TYPE_MODEL | integer | 10000 | the value of maximum count of logs per defect type to add into defect type model training. Default value is chosen in cosideration of having space for analyzer_train docker image setuo of 1GB, if you can give more GB you can linearly allow more logs to be considered. | -| PROB_CUSTOM_MODEL_SUGGESTIONS | float | 0.7 | the probability of custom retrained model to be used for running when suggestions are requested. The maximum value is 0.8, because we want at least 20% of requests to process with a global model not to overfit for project too much. The bigger the value of this env varibale the more often custom retrained model will be used. | -| PROB_CUSTOM_MODEL_AUTO_ANALYSIS | float | 0.5 | the probability of custom retrained model to be used for running when auto-analysis is performed. The maximum value is 1.0. The bigger the value of this env varibale the more often custom retrained model will be used. | +| MAX_LOGS_FOR_DEFECT_TYPE_MODEL | integer | 10000 | the value of maximum count of logs per defect type to add into defect type model training. Default value is chosen in consideration of having space for analyzer_train docker image setuo of 1GB, if you can give more GB you can linearly allow more logs to be considered. | +| PROB_CUSTOM_MODEL_SUGGESTIONS | float | 0.7 | the probability of custom retrained model to be used for running when suggestions are requested. The maximum value is 0.8, because we want at least 20% of requests to process with a global model not to overfit for project too much. The bigger the value of this env variable the more often custom retrained model will be used. | +| PROB_CUSTOM_MODEL_AUTO_ANALYSIS | float | 0.5 | the probability of custom retrained model to be used for running when auto-analysis is performed. The maximum value is 1.0. The bigger the value of this env variable the more often custom retrained model will be used. | | MAX_SUGGESTIONS_NUMBER | integer | 3 | the maximum number of suggestions shown in the ML suggestions area in the defect type editor. | ## Instructions for analyzer setup without Docker diff --git a/app/amqp/amqp.py b/app/amqp/amqp.py index ee8a34ff..acc77050 100644 --- a/app/amqp/amqp.py +++ b/app/amqp/amqp.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import os + import pika +from app.commons import logging from app.utils import text_processing logger = logging.getLogger("analyzerApp.amqp") @@ -23,13 +24,16 @@ class AmqpClient: """AmqpClient handles communication with rabbitmq""" - def __init__(self, amqpUrl): - self.connection = AmqpClient.create_ampq_connection(amqpUrl) + + connection: pika.BlockingConnection + + def __init__(self, amqp_url): + self.connection = AmqpClient.create_ampq_connection(amqp_url) @staticmethod - def create_ampq_connection(amqpUrl): + def create_ampq_connection(amqp_url): """Creates AMQP client""" - amqp_full_url = amqpUrl.rstrip("\\").rstrip("/") + "?heartbeat=600" + amqp_full_url = amqp_url.rstrip("\\").rstrip("/") + "?heartbeat=600" logger.info("Try connect to %s" % text_processing.remove_credentials_from_url(amqp_full_url)) return pika.BlockingConnection(pika.connection.URLParameters(amqp_full_url)) @@ -37,20 +41,19 @@ def create_ampq_connection(amqpUrl): def bind_queue(channel, name, exchange_name): """AmqpClient binds a queue with an exchange for rabbitmq""" try: - result = channel.queue_declare(queue=name, durable=False, - exclusive=False, auto_delete=True, + result = channel.queue_declare(queue=name, durable=False, exclusive=False, auto_delete=True, arguments=None) - except Exception as err: - logger.error("Failed to open a channel pid(%d)", os.getpid()) - logger.error(err) + except Exception as exc: + logger.error(f'Failed to declare a queue "{name}" pid({os.getpid()})') + logger.exception(exc) os.kill(os.getpid(), 9) return False logger.info("Queue '%s' has been declared pid(%d)", result.method.queue, os.getpid()) try: channel.queue_bind(exchange=exchange_name, queue=result.method.queue, routing_key=name) - except Exception as err: - logger.error("Failed to open a channel pid(%d)", os.getpid()) - logger.error(err) + except Exception as exc: + logger.error(f'Failed to bind a queue "{name}" pid({os.getpid()})') + logger.exception(exc) os.kill(os.getpid(), 9) return True @@ -59,16 +62,16 @@ def consume_queue(channel, queue, auto_ack, exclusive, msg_callback): """AmqpClient shows how to handle a message from the queue""" try: channel.basic_qos(prefetch_count=1, prefetch_size=0) - except Exception as err: + except Exception as exc: logger.error("Failed to configure Qos pid(%d)", os.getpid()) - logger.error(err) + logger.exception(exc) os.kill(os.getpid(), 9) try: channel.basic_consume(queue=queue, auto_ack=auto_ack, exclusive=exclusive, on_message_callback=msg_callback) - except Exception as err: + except Exception as exc: logger.error("Failed to register a consumer pid(%d)", os.getpid()) - logger.error(err) + logger.exception(exc) os.kill(os.getpid(), 9) def receive(self, exchange_name, queue, auto_ack, exclusive, msg_callback): @@ -79,18 +82,15 @@ def receive(self, exchange_name, queue, auto_ack, exclusive, msg_callback): AmqpClient.consume_queue(channel, queue, auto_ack, exclusive, msg_callback) logger.info("started consuming pid(%d) on the queue %s", os.getpid(), queue) channel.start_consuming() - except Exception as err: + except Exception as exc: logger.error("Failed to consume messages pid(%d) in queue %s", os.getpid(), queue) - logger.error(err) + logger.exception(exc) os.kill(os.getpid(), 9) - def send_to_inner_queue(self, exchange_name, queue, data): + def send_to_inner_queue(self, exchange_name: str, queue: str, data: str) -> None: try: channel = self.connection.channel() - channel.basic_publish( - exchange=exchange_name, - routing_key=queue, - body=data) - except Exception as err: + channel.basic_publish(exchange=exchange_name, routing_key=queue, body=bytes(data, 'utf-8')) + except Exception as exc: logger.error("Failed to publish messages in queue %s", queue) - logger.error(err) + logger.exception(exc) diff --git a/app/amqp/amqp_handler.py b/app/amqp/amqp_handler.py index 514d99bb..322c238e 100644 --- a/app/amqp/amqp_handler.py +++ b/app/amqp/amqp_handler.py @@ -12,134 +12,156 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import json -import pika -from app.commons import launch_objects +import uuid +from typing import Callable, Any, Optional + +from pika.adapters.blocking_connection import BlockingChannel +from pika.spec import Basic, BasicProperties + +from app.commons import logging +from app.commons.model import launch_objects, ml logger = logging.getLogger("analyzerApp.amqpHandler") -def prepare_launches(launches): +def prepare_launches(launches: list) -> list[launch_objects.Launch]: """Function for deserializing array of launches""" return [launch_objects.Launch(**launch) for launch in launches] -def prepare_suggest_info_list(suggest_info_list): +def prepare_suggest_info_list(suggest_info_list: list) -> list[launch_objects.SuggestAnalysisResult]: """Function for deserializing array of suggest info results""" return [launch_objects.SuggestAnalysisResult(**res) for res in suggest_info_list] -def prepare_search_logs(search_data): +def prepare_search_logs(search_data: dict) -> launch_objects.SearchLogs: """Function for deserializing search logs object""" return launch_objects.SearchLogs(**search_data) -def prepare_launch_info(launch_info): +def prepare_launch_info(launch_info: dict) -> launch_objects.LaunchInfoForClustering: """Function for deserializing search logs object""" return launch_objects.LaunchInfoForClustering(**launch_info) -def prepare_clean_index(clean_index): +def prepare_clean_index(clean_index: dict) -> launch_objects.CleanIndex: """Function for deserializing clean index object""" return launch_objects.CleanIndex(**clean_index) -def prepare_delete_index(body): +def prepare_delete_index(body: Any) -> int: """Function for deserializing index id object""" return int(body) -def prepare_test_item_info(test_item_info): +def prepare_test_item_info(test_item_info: Any) -> launch_objects.TestItemInfo: """Function for deserializing test item info for suggestions""" return launch_objects.TestItemInfo(**test_item_info) -def prepare_search_response_data(response): +def prepare_train_info(train_info: dict) -> ml.TrainInfo: + """Function for deserializing train info object""" + return ml.TrainInfo(**train_info) + + +def prepare_search_response_data(response: list | dict) -> str: """Function for serializing response from search request""" return json.dumps(response) -def prepare_analyze_response_data(response): +def prepare_analyze_response_data(response: list) -> str: """Function for serializing response from analyze request""" return json.dumps([resp.dict() for resp in response]) -def prepare_index_response_data(response): +def prepare_index_response_data(response: Any) -> str: """Function for serializing response from index request and other objects, which are pydantic objects""" return response.json() -def output_result(response): +def output_result(response: Any) -> str: """Function for serializing int object""" return str(response) -def handle_amqp_request(channel, method, props, body, - request_handler, prepare_data_func=prepare_launches, - prepare_response_data=prepare_search_response_data, - publish_result=True): - """Function for handling amqp reuqest: index, search and analyze""" - logger.debug("Started processing %s method %s props", method, props) - logger.debug("Started processing data %s", body) +def __get_correlation_id() -> str: + return str(uuid.uuid4()) + + +def handle_amqp_request(channel: BlockingChannel, method: Basic.Deliver, props: BasicProperties, body: bytes, + request_handler: Callable[[Any], Any], + prepare_data_func: Callable[[Any], Any] = prepare_launches, + prepare_response_data: Callable[[Any], str] = prepare_search_response_data, + publish_result: bool = True) -> None: + """Function for handling amqp request: index, search and analyze.""" + logging.new_correlation_id() + logger.debug(f'Started message processing:\n--Method: {method}\n' + f'--Properties: {props}\n--Body: {body}') try: - launches = json.loads(body, strict=False) - except Exception as err: - logger.error("Failed to load json from body") - logger.error(err) - return False + message = json.loads(body, strict=False) + except Exception as exc: + logger.error('Failed to parse message body to JSON') + logger.exception(exc) + return try: - launches = prepare_data_func(launches) - except Exception as err: - logger.error("Failed to transform body into objects") - logger.error(err) - return False + message = prepare_data_func(message) + except Exception as exc: + logger.error('Failed to prepare message body') + logger.exception(exc) + return try: - response = request_handler(launches) - except Exception as err: - logger.error("Failed to process launches") - logger.error(err) - return False + response = request_handler(message) + except Exception as exc: + logger.error('Failed to handle message') + logger.exception(exc) + return try: response_body = prepare_response_data(response) - except Exception as err: - logger.error("Failed to dump launches result") - logger.error(err) - return False + except Exception as exc: + logger.error('Failed to prepare response body') + logger.exception(exc) + return if publish_result: try: if props.reply_to: - channel.basic_publish(exchange='', - routing_key=props.reply_to, - properties=pika.BasicProperties( - correlation_id=props.correlation_id, - content_type="application/json"), - mandatory=False, - body=response_body) - except Exception as err: - logger.error("Failed to publish result") - logger.error(err) - logger.debug("Finished processing %s method", method) - return True - - -def handle_inner_amqp_request(channel, method, props, body, request_handler): - """Function for handling inner amqp reuqests""" - logger.debug("Started processing %s method %s props", method, props) - logger.debug("Started processing data %s", body) + channel.basic_publish( + exchange='', routing_key=props.reply_to, + properties=BasicProperties(correlation_id=props.correlation_id, content_type='application/json'), + mandatory=False, body=bytes(response_body, 'utf-8')) + except Exception as exc: + logger.error('Failed to publish result') + logger.exception(exc) + return + logger.debug('Finished processing message') + + +def handle_inner_amqp_request(_: BlockingChannel, method: Basic.Deliver, props: BasicProperties, body: bytes, + request_handler: Callable[[Any], Any], + prepare_data_func: Optional[Callable[[Any], Any]] = None): + """Function for handling inner amqp requests.""" + logging.new_correlation_id() + logger.debug(f'Started inner message processing.\n--Method: {method}\n' + f'--Properties: {props}\n--Body: {body}') try: - stats_info = json.loads(body, strict=False) - except Exception as err: - logger.error("Failed to load json from body") - logger.error(err) - return False + message = json.loads(body, strict=False) + except Exception as exc: + logger.error('Failed to parse message body to JSON') + logger.exception(exc) + return + if prepare_data_func: + try: + message = prepare_data_func(message) + except Exception as exc: + logger.error('Failed to prepare message body') + logger.exception(exc) + return try: - request_handler(stats_info) - except Exception as err: - logger.error("Failed to process stats info") - logger.error(err) - return False - logger.debug("Finished processing %s method", method) - return True + request_handler(message) + except Exception as exc: + logger.error('Failed to handle message') + logger.exception(exc) + return + logger.debug('Finished processing message') diff --git a/app/boosting_decision_making/boosting_decision_maker.py b/app/boosting_decision_making/boosting_decision_maker.py deleted file mode 100644 index 22868c1b..00000000 --- a/app/boosting_decision_making/boosting_decision_maker.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright 2023 EPAM Systems -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from xgboost import XGBClassifier -from sklearn.metrics import classification_report, confusion_matrix -import os -import pickle -import logging - -from app.utils import text_processing -from app.boosting_decision_making import feature_encoder - -logger = logging.getLogger("analyzerApp.boosting_decision_maker") - - -class BoostingDecisionMaker: - - def __init__(self, folder="", n_estimators=75, max_depth=5, monotonous_features="", is_global=True): - self.n_estimators = n_estimators - self.max_depth = max_depth - self.folder = folder - self.monotonous_features = text_processing.transform_string_feature_range_into_list( - monotonous_features) - self.is_global = is_global - self.features_dict_with_saved_objects = {} - if folder.strip(): - self.load_model() - else: - self.xg_boost = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=43) - - def get_model_info(self): - folder_name = os.path.basename(self.folder.strip("/").strip("\\")).strip() - if folder_name: - tags = [folder_name] - if not self.is_global: - return tags + ["custom boosting model"] - return tags + ["global boosting model"] - return [] - - def get_feature_ids(self): - return text_processing.transform_string_feature_range_into_list(self.feature_ids)\ - if isinstance(self.feature_ids, str) else self.feature_ids - - def get_feature_names(self): - feature_ids = self.get_feature_ids() - feature_names = [] - for _id in feature_ids: - if _id in self.features_dict_with_saved_objects: - feature_names_from_encodings = self.features_dict_with_saved_objects[_id].get_feature_names() - feature_names.extend( - [str(_id) + "_" + feature_name for feature_name in feature_names_from_encodings]) - else: - feature_names.append(str(_id)) - return feature_names - - def add_config_info(self, full_config, features, monotonous_features): - self.full_config = full_config - self.feature_ids = features - self.monotonous_features = monotonous_features - - def transform_feature_encoders_to_dict(self): - features_dict_with_saved_objects = {} - for feature in self.features_dict_with_saved_objects: - feature_info = self.features_dict_with_saved_objects[feature].save_to_feature_info() - features_dict_with_saved_objects[feature] = feature_info - return features_dict_with_saved_objects - - def transform_feature_encoders_to_objects(self, features_dict_with_saved_objects): - _features_dict_with_saved_objects = {} - for feature in features_dict_with_saved_objects: - _feature_encoder = feature_encoder.FeatureEncoder() - _feature_encoder.load_from_feature_info(features_dict_with_saved_objects[feature]) - _features_dict_with_saved_objects[feature] = _feature_encoder - return _features_dict_with_saved_objects - - def load_model(self): - with open(os.path.join(self.folder, "boost_model.pickle"), "rb") as f: - self.n_estimators, self.max_depth, self.xg_boost = pickle.load(f) - with open(os.path.join(self.folder, "data_features_config.pickle"), "rb") as f: - self.full_config, self.feature_ids, self.monotonous_features = pickle.load(f) - if os.path.exists(os.path.join(self.folder, "features_dict_with_saved_objects.pickle")): - features_dict_with_saved_objects = {} - with open(os.path.join(self.folder, "features_dict_with_saved_objects.pickle"), "rb") as f: - features_dict_with_saved_objects = pickle.load(f) - self.features_dict_with_saved_objects = self.transform_feature_encoders_to_objects( - features_dict_with_saved_objects) - else: - self.features_dict_with_saved_objects = {} - - def save_model(self, folder): - if not os.path.exists(folder): - os.makedirs(folder) - with open(os.path.join(folder, "boost_model.pickle"), "wb") as f: - pickle.dump([self.n_estimators, self.max_depth, self.xg_boost], f) - with open(os.path.join(folder, "data_features_config.pickle"), "wb") as f: - pickle.dump([self.full_config, self.feature_ids, self.monotonous_features], f) - with open(os.path.join(folder, "features_dict_with_saved_objects.pickle"), "wb") as f: - pickle.dump(self.transform_feature_encoders_to_dict(), f) - - def train_model(self, train_data, labels): - mon_features = [ - (1 if feature in self.monotonous_features else 0) for feature in self.get_feature_ids()] - mon_features_prepared = "(" + ",".join([str(f) for f in mon_features]) + ")" - self.xg_boost = XGBClassifier(n_estimators=self.n_estimators, - max_depth=self.max_depth, random_state=43, - monotone_constraints=mon_features_prepared) - self.xg_boost.fit(train_data, labels) - logger.info("Train score: %s", self.xg_boost.score(train_data, labels)) - logger.info("Feature importances: %s", self.xg_boost.feature_importances_) - - def validate_model(self, valid_test_set, valid_test_labels): - res, res_prob = self.predict(valid_test_set) - f1_score = self.xg_boost.score(valid_test_set, valid_test_labels) - logger.info("Valid dataset F1 score: %s", f1_score) - logger.info(confusion_matrix(valid_test_labels, res)) - logger.info(classification_report(valid_test_labels, res)) - return f1_score - - def predict(self, data): - if not len(data): - return [], [] - return self.xg_boost.predict(data), self.xg_boost.predict_proba(data) diff --git a/app/boosting_decision_making/custom_boosting_decision_maker.py b/app/boosting_decision_making/custom_boosting_decision_maker.py deleted file mode 100644 index f784f70f..00000000 --- a/app/boosting_decision_making/custom_boosting_decision_maker.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright 2023 EPAM Systems -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from app.boosting_decision_making.boosting_decision_maker import BoostingDecisionMaker -from app.commons.object_saving.object_saver import ObjectSaver -import os -import logging - -logger = logging.getLogger("analyzerApp.custom_boosting_decision_maker") - - -class CustomBoostingDecisionMaker(BoostingDecisionMaker): - - def __init__(self, app_config, project_id, folder=""): - self.project_id = project_id - self.object_saver = ObjectSaver(app_config) - super().__init__(folder=folder) - self.is_global = False - - def load_model(self): - self.n_estimators, self.max_depth, self.xg_boost = self.object_saver.get_project_object( - self.project_id, os.path.join(self.folder, "boost_model"), - using_json=False) - assert self.xg_boost is not None - self.full_config, self.feature_ids, self.monotonous_features = self.object_saver.get_project_object( - self.project_id, os.path.join(self.folder, "data_features_config"), - using_json=False) - assert len(self.full_config) > 0 - if self.object_saver.does_object_exists( - self.project_id, os.path.join(self.folder, "features_dict_with_saved_objects")): - features_dict_with_saved_objects = self.object_saver.get_project_object( - self.project_id, os.path.join(self.folder, "features_dict_with_saved_objects"), - using_json=False) - self.features_dict_with_saved_objects = self.transform_feature_encoders_to_objects( - features_dict_with_saved_objects) - else: - self.features_dict_with_saved_objects = {} - - def save_model(self, folder): - self.object_saver.put_project_object( - [self.n_estimators, self.max_depth, self.xg_boost], - self.project_id, os.path.join(folder, "boost_model"), - using_json=False) - self.object_saver.put_project_object( - [self.full_config, self.feature_ids, self.monotonous_features], - self.project_id, os.path.join(folder, "data_features_config"), - using_json=False) - self.object_saver.put_project_object( - self.transform_feature_encoders_to_dict(), - self.project_id, os.path.join(folder, "features_dict_with_saved_objects"), - using_json=False) diff --git a/app/boosting_decision_making/custom_defect_type_model.py b/app/boosting_decision_making/custom_defect_type_model.py deleted file mode 100644 index a4f11e28..00000000 --- a/app/boosting_decision_making/custom_defect_type_model.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2023 EPAM Systems -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from app.boosting_decision_making.defect_type_model import DefectTypeModel -from app.commons.object_saving.object_saver import ObjectSaver -import os - - -class CustomDefectTypeModel(DefectTypeModel): - - def __init__(self, app_config, project_id, folder=""): - self.project_id = project_id - self.object_saver = ObjectSaver(app_config) - super(CustomDefectTypeModel, self).__init__(folder=folder) - self.is_global = False - - def load_model(self, folder): - self.count_vectorizer_models = self.object_saver.get_project_object( - self.project_id, os.path.join(folder, "count_vectorizer_models"), - using_json=False) - assert len(self.count_vectorizer_models) > 0 - self.models = self.object_saver.get_project_object( - self.project_id, os.path.join(folder, "models"), - using_json=False) - assert len(self.models) > 0 - - def save_model(self, folder): - self.object_saver.put_project_object( - self.count_vectorizer_models, - self.project_id, os.path.join(folder, "count_vectorizer_models"), - using_json=False) - self.object_saver.put_project_object( - self.models, - self.project_id, os.path.join(folder, "models"), - using_json=False) diff --git a/app/boosting_decision_making/defect_type_model.py b/app/boosting_decision_making/defect_type_model.py deleted file mode 100644 index 69737be9..00000000 --- a/app/boosting_decision_making/defect_type_model.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright 2023 EPAM Systems -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.ensemble import RandomForestClassifier -from sklearn.metrics import f1_score, accuracy_score -from sklearn.metrics import classification_report, confusion_matrix - -from app.utils import text_processing -import pandas as pd -import os -import pickle -from collections import Counter - - -class DefectTypeModel: - - def __init__(self, folder=""): - self.folder = folder - self.count_vectorizer_models = {} - self.models = {} - self.is_global = True - if self.folder: - self.load_model(folder) - - def get_model_info(self): - folder_name = os.path.basename(self.folder.strip("/").strip("\\")).strip() - if folder_name: - tags = [folder_name] - if not self.is_global: - return tags + ["custom defect type model"] - return tags + ["global defect type model"] - return [] - - def load_model(self, folder): - with open(os.path.join(folder, "count_vectorizer_models.pickle"), "rb") as f: - self.count_vectorizer_models = pickle.load(f) - with open(os.path.join(folder, "models.pickle"), "rb") as f: - self.models = pickle.load(f) - - def save_model(self, folder): - os.makedirs(folder, exist_ok=True) - with open(os.path.join(folder, "count_vectorizer_models.pickle"), "wb") as f: - pickle.dump(self.count_vectorizer_models, f) - with open(os.path.join(folder, "models.pickle"), "wb") as f: - pickle.dump(self.models, f) - - def train_model(self, name, train_data_x, labels): - self.count_vectorizer_models[name] = TfidfVectorizer( - binary=True, stop_words="english", min_df=5, - token_pattern=r"[\w\._]+", analyzer=text_processing.preprocess_words) - transformed_values = self.count_vectorizer_models[name].fit_transform(train_data_x) - print("Length of train data: ", len(labels)) - print("Label distribution:", Counter(labels)) - model = RandomForestClassifier(class_weight="balanced") - x_train_values = pd.DataFrame( - transformed_values.toarray(), - columns=self.count_vectorizer_models[name].get_feature_names_out()) - model.fit(x_train_values, labels) - self.models[name] = model - - def train_models(self, train_data): - for name, train_data_x, labels in train_data: - self.train_model(name, train_data_x, labels) - - def validate_model(self, name, test_data_x, labels): - assert name in self.models - print("Label distribution:", Counter(labels)) - print("Model name: %s" % name) - res, res_prob = self.predict(test_data_x, name) - print("Valid dataset F1 score: ", f1_score(y_pred=res, y_true=labels)) - print(confusion_matrix(y_pred=res, y_true=labels)) - print(classification_report(y_pred=res, y_true=labels)) - f1 = f1_score(y_pred=res, y_true=labels) - if f1 != f1: - f1 = 0.0 - accuracy = accuracy_score(y_pred=res, y_true=labels) - if accuracy != accuracy: - accuracy = 0.0 - return f1, accuracy - - def validate_models(self, test_data): - results = [] - for name, test_data_x, labels in test_data: - f1, accuracy = self.validate_model( - name, test_data_x, labels) - results.append((name, f1, accuracy)) - return results - - def predict(self, data, model_name): - assert model_name in self.models - if len(data) == 0: - return [], [] - transformed_values = self.count_vectorizer_models[model_name].transform(data) - x_test_values = pd.DataFrame( - transformed_values.toarray(), - columns=self.count_vectorizer_models[model_name].get_feature_names_out()) - predicted_labels = self.models[model_name].predict(x_test_values) - predicted_probs = self.models[model_name].predict_proba(x_test_values) - return predicted_labels, predicted_probs diff --git a/app/boosting_decision_making/feature_encoder.py b/app/boosting_decision_making/feature_encoder.py deleted file mode 100644 index a9ceadfa..00000000 --- a/app/boosting_decision_making/feature_encoder.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright 2023 EPAM Systems -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -from sklearn.feature_extraction.text import HashingVectorizer -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.preprocessing import OneHotEncoder -import re - -from app.utils import text_processing - -logger = logging.getLogger("analyzerApp.feature_encoder") - - -class FeatureEncoder: - - def __init__(self, field_name="", encoding_type="", max_features=50, ngram_max=2): - self.field_name = field_name - self.encoding_type = encoding_type - self.max_features = max_features - self.additional_info = {} - self.ngram_max = ngram_max - self.encoder = None - self.prepare_text_functions = { - "launch_name": FeatureEncoder.prepare_text_launch_name, - "detected_message": FeatureEncoder.prepare_text_message, - "stacktrace": FeatureEncoder.prepare_stacktrace, - "test_item_name": FeatureEncoder.prepare_test_item_name, - "found_exceptions": FeatureEncoder.prepare_found_exceptions - } - - @staticmethod - def add_default_value(texts, default_value): - return [(text if text.strip() else default_value) for text in texts] - - @staticmethod - def prepare_text_message(data): - messages = [" ".join(text_processing.split_words(text)).replace(".", "_") for text in data] - return FeatureEncoder.add_default_value(messages, "nomessage") - - @staticmethod - def prepare_stacktrace(data): - stacktraces = [ - " ".join([w for w in text_processing.split_words(text) if "." in w]).replace(".", "_") for text in data] - return FeatureEncoder.add_default_value(stacktraces, "nostacktrace") - - @staticmethod - def prepare_found_exceptions(data): - found_exceptions = [text.replace(".", "_") for text in data] - return FeatureEncoder.add_default_value(found_exceptions, "noexception") - - @staticmethod - def prepare_text_launch_name(data): - launch_names = [re.sub(r"\d+", " ", text.replace("-", " ").replace("_", " ")) for text in data] - return FeatureEncoder.add_default_value(launch_names, "nolaunchname") - - @staticmethod - def prepare_test_item_name(data): - test_item_names = [re.sub(r"\d+", " ", text).replace(".", "_") for text in data] - return FeatureEncoder.add_default_value(test_item_names, "notestitemname") - - @staticmethod - def encode_categories(data, categories_data, include_zero=False): - encoded_data = [] - for d_ in data: - if d_ in categories_data: - encoded_data.append([categories_data[d_]]) - elif include_zero: - encoded_data.append([0]) - return encoded_data - - def get_feature_names(self) -> list[str]: - feature_names = [] - if self.encoding_type == "one_hot": - for _key in sorted(self.additional_info.items(), key=lambda x: x[1]): - feature_names.append(_key[0]) - elif self.encoding_type == "hashing": - feature_names = [str(x_) for x_ in range(self.max_features)] - else: - feature_names = self.encoder.get_feature_names_out().tolist() - return feature_names - - def extract_data(self, logs): - data_gathered = [] - for log in logs: - if self.field_name in log["_source"]: - data_gathered.append(log["_source"][self.field_name]) - return data_gathered - - def get_categories(self, data): - data_frequency = {} - for d_ in data: - if d_ not in data_frequency: - data_frequency[d_] = 0 - data_frequency[d_] += 1 - sorted_freq = sorted(data_frequency.items(), key=lambda x: x[1], reverse=True) - idx = 1 - categories_labelling = {} - for name, cnt in sorted_freq[:self.max_features]: - categories_labelling[name] = idx - idx += 1 - return categories_labelling - - def prepare_data_for_encoding(self, data, include_zero=False): - if self.encoding_type == "one_hot": - data = FeatureEncoder.encode_categories(data, self.additional_info, include_zero=include_zero) - else: - if self.field_name in self.prepare_text_functions: - data = self.prepare_text_functions[self.field_name](data) - else: - logger.error("Prepare text function is not defined for the field '%s'" % self.field_name) - return data - - def fit(self, texts): - if self.encoding_type == "one_hot": - self.encoder = OneHotEncoder(handle_unknown='ignore') - elif self.encoding_type == "hashing": - self.encoder = HashingVectorizer( - n_features=self.max_features, ngram_range=(1, self.ngram_max), stop_words="english") - elif self.encoding_type == "count_vector": - self.encoder = CountVectorizer( - max_features=self.max_features, ngram_range=(1, self.ngram_max), - binary=True, stop_words="english") - elif self.encoding_type == "tf_idf": - self.encoder = TfidfVectorizer( - max_features=self.max_features, ngram_range=(1, self.ngram_max), stop_words="english") - else: - logger.error("Encoding type '%s' is not found", self.encoding_type) - if self.encoder: - extracted_data = self.extract_data(texts) - logger.debug("Extracted data %d", len(extracted_data)) - if self.encoding_type == "one_hot": - self.additional_info = self.get_categories(extracted_data) - prepared_data = self.prepare_data_for_encoding(extracted_data) - logger.debug("Prepared data %d", len(prepared_data)) - self.encoder.fit(prepared_data) - logger.debug("Fit data with encoding '%s'" % self.encoding_type) - - def transform(self, data): - if self.encoder: - prepared_data = self.prepare_data_for_encoding(data, include_zero=True) - return self.encoder.transform(prepared_data) - else: - logger.error("Encoder was not fit") - return [] - - def load_from_feature_info(self, feature_info): - self.field_name = feature_info["field_name"] - self.encoding_type = feature_info["encoding_type"] - self.max_features = feature_info["max_features"] - self.additional_info = feature_info["additional_info"] - self.ngram_max = feature_info["ngram_max"] - self.encoder = feature_info["encoder"] - - def save_to_feature_info(self): - feature_info = {"field_name": self.field_name, "encoding_type": self.encoding_type, - "max_features": self.max_features, "additional_info": self.additional_info, - "encoder": self.encoder, "ngram_max": self.ngram_max} - return feature_info diff --git a/app/boosting_decision_making/feature_encoding_configurer.py b/app/boosting_decision_making/feature_encoding_configurer.py deleted file mode 100644 index e64940c2..00000000 --- a/app/boosting_decision_making/feature_encoding_configurer.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright 2023 EPAM Systems -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from app.boosting_decision_making import feature_encoder - - -class FeatureEncodingConfigurer: - - def __init__(self): - self.feature_dict_with_encodings = {} - - def initialize_encoders_from_config(self, config_str): - self.feature_dict_with_encodings = {} - for str_part in config_str.split(";"): - if not str_part.strip(): - continue - config_parts = str_part.split("|") - feature_name = int(config_parts[0]) - field_name = config_parts[1] - encoding_type = config_parts[2] - max_features = int(config_parts[3]) - ngram_max = int(config_parts[4]) - self.feature_dict_with_encodings[feature_name] = feature_encoder.FeatureEncoder( - field_name=field_name, encoding_type=encoding_type, - max_features=max_features, ngram_max=ngram_max) - - def initialize_encoders_from_objects(self, feature_info): - self.feature_dict_with_encodings = {} - for feature in feature_info: - _feature_encoder = feature_encoder.FeatureEncoder() - self.feature_dict_with_encodings[feature_info] = _feature_encoder.load_from_feature_info( - feature_info[feature]) - - def prepare_encoders(self, data): - for feature in self.feature_dict_with_encodings: - self.feature_dict_with_encodings[feature].fit(data) diff --git a/app/boosting_decision_making/log_similarity_calculator.py b/app/boosting_decision_making/log_similarity_calculator.py deleted file mode 100644 index 94981661..00000000 --- a/app/boosting_decision_making/log_similarity_calculator.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright 2023 EPAM Systems -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import pickle -import math - -from app.utils import text_processing - - -class LogSimilarityCalculator: - - def __init__(self, block_to_split=10, min_log_number_in_block=1, folder=""): - self.block_to_split = block_to_split - self.min_log_number_in_block = min_log_number_in_block - self.folder = folder - self.weights = None - self.softmax_weights = None - if folder.strip() != "": - self.load_model(folder) - - def load_model(self, folder): - self.folder = folder - if not os.path.exists(os.path.join(folder, "weights.pickle")): - return - with open(os.path.join(folder, "weights.pickle"), "rb") as f: - self.block_to_split, self.min_log_number_in_block, self.weights, self.softmax_weights =\ - pickle.load(f) - if not os.path.exists(os.path.join(folder, "config.pickle")): - return - try: - with open(os.path.join(folder, "config.pickle"), "wb") as f: - self.config = pickle.load(f) - except: # noqa - pass - - def add_config_info(self, config): - self.config = config - - def save_model(self, folder): - if not os.path.exists(folder): - os.makedirs(folder) - if self.weights is not None: - with open(os.path.join(folder, "weights.pickle"), "wb") as f: - pickle.dump([self.block_to_split, self.min_log_number_in_block, - self.weights, self.softmax_weights], f) - try: - if self.config: - with open(os.path.join(folder, "config.pickle"), "wb") as f: - pickle.dump(self.config, f) - except: # noqa - pass - - def message_to_array(self, detected_message_res, stacktrace_res): - all_lines = [" ".join(text_processing.split_words(detected_message_res))] - split_log_lines = text_processing.filter_empty_lines( - [" ".join(text_processing.split_words(line)) for line in stacktrace_res.split("\n")]) - split_log_lines_num = len(split_log_lines) - data_in_block = max(self.min_log_number_in_block, - math.ceil(split_log_lines_num / self.block_to_split)) - blocks_num = math.ceil(split_log_lines_num / data_in_block) - - for block in range(blocks_num): - all_lines.append("\n".join( - split_log_lines[block * data_in_block: (block + 1) * data_in_block])) - if len([line for line in all_lines if line.strip() != ""]) == 0: - return [] - return all_lines - - def weigh_data_rows(self, data_rows, use_softmax=False): - padded_data_rows = np.concatenate([data_rows, - np.zeros((max(0, self.block_to_split + 1 - len(data_rows)), - data_rows.shape[1]))], axis=0) - result = None - if use_softmax: - result = np.dot(np.reshape(self.softmax_weights, [-1]), padded_data_rows) - else: - result = np.dot(np.reshape(self.weights, [-1]), padded_data_rows) - return np.clip(result, a_min=0, a_max=1) diff --git a/app/boosting_decision_making/training_models/training_analysis_model.py b/app/boosting_decision_making/training_models/training_analysis_model.py deleted file mode 100644 index c279340c..00000000 --- a/app/boosting_decision_making/training_models/training_analysis_model.py +++ /dev/null @@ -1,451 +0,0 @@ -# Copyright 2023 EPAM Systems -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from app.boosting_decision_making import boosting_decision_maker, custom_boosting_decision_maker -from app.boosting_decision_making.suggest_boosting_featurizer import SuggestBoostingFeaturizer -from app.boosting_decision_making import weighted_similarity_calculator -from app.boosting_decision_making.feature_encoding_configurer import FeatureEncodingConfigurer -from sklearn.model_selection import train_test_split -import elasticsearch -import elasticsearch.helpers -from app.commons.esclient import EsClient -from app.commons import namespace_finder -from imblearn.over_sampling import SMOTE -from app.utils import utils, text_processing -from time import time -import scipy.stats as stats -import numpy as np -import logging -from datetime import datetime -import os -import pickle - -logger = logging.getLogger("analyzerApp.trainingAnalysisModel") - - -class AnalysisModelTraining: - - def __init__(self, model_chooser, app_config, search_cfg): - self.app_config = app_config - self.search_cfg = search_cfg - self.due_proportion = 0.05 - self.due_proportion_to_smote = 0.4 - self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg) - self.baseline_folders = { - "suggestion": self.search_cfg["SuggestBoostModelFolder"], - "auto_analysis": self.search_cfg["BoostModelFolder"]} - self.model_config = { - "suggestion": self.search_cfg["RetrainSuggestBoostModelConfig"], - "auto_analysis": self.search_cfg["RetrainAutoBoostModelConfig"]} - self.weighted_log_similarity_calculator = None - if self.search_cfg["SimilarityWeightsFolder"].strip(): - self.weighted_log_similarity_calculator = weighted_similarity_calculator.\ - WeightedSimilarityCalculator(folder=self.search_cfg["SimilarityWeightsFolder"]) - self.namespace_finder = namespace_finder.NamespaceFinder(app_config) - self.model_chooser = model_chooser - self.metrics_calculations = { - "F1": self.calculate_F1, - "Mean Reciprocal Rank": self.calculate_MRR - } - - def calculate_F1(self, model, x_test, y_test, test_item_ids_with_pos): - return model.validate_model(x_test, y_test) - - def calculate_MRR(self, model, x_test, y_test, test_item_ids_with_pos): - res_labels, prob_labels = model.predict(x_test) - test_item_ids_res = {} - for i in range(len(test_item_ids_with_pos)): - test_item = test_item_ids_with_pos[i] - if test_item not in test_item_ids_res: - test_item_ids_res[test_item] = [] - test_item_ids_res[test_item].append((res_labels[i], prob_labels[i][1], y_test[i])) - MRR = 0 - cnt_to_use = 0 - for test_item in test_item_ids_res: - res = sorted(test_item_ids_res[test_item], key=lambda x: x[1], reverse=True) - has_positives = False - for r in res: - if r[2] == 1: - has_positives = True - break - if not has_positives: - continue - RR_test_item = 0 - for idx, r in enumerate(res): - if r[2] == 1 and r[0] == 1: - RR_test_item = 1 / (idx + 1) - break - MRR += RR_test_item - cnt_to_use += 1 - if cnt_to_use: - MRR /= cnt_to_use - return MRR - - def get_config_for_boosting(self, numberOfLogLines, boosting_model_name, namespaces): - return { - "max_query_terms": self.search_cfg["MaxQueryTerms"], - "min_should_match": 0.4, - "min_word_length": self.search_cfg["MinWordLength"], - "filter_min_should_match": [], - "filter_min_should_match_any": [], - "number_of_log_lines": numberOfLogLines, - "filter_by_test_case_hash": False, - "boosting_model": self.baseline_folders[boosting_model_name], - "chosen_namespaces": namespaces, - "calculate_similarities": False, - "time_weight_decay": self.search_cfg["TimeWeightDecay"]} - - def get_info_template(self, project_info, baseline_model, model_name, metric_name): - return {"method": "training", "sub_model_type": "all", "model_type": project_info["model_type"], - "baseline_model": [baseline_model], "new_model": [model_name], - "project_id": str(project_info["project_id"]), "model_saved": 0, "p_value": 1.0, - "data_proportion": 0.0, "baseline_mean_metric": 0.0, "new_model_mean_metric": 0.0, - "bad_data_proportion": 0, "metric_name": metric_name, "errors": [], "errors_count": 0} - - def calculate_metrics(self, model, x_test, y_test, - metrics_to_gather, test_item_ids_with_pos, new_model_results): - for metric in metrics_to_gather: - metric_res = 0.0 - if metric in self.metrics_calculations: - metric_res = self.metrics_calculations[metric]( - model, x_test, y_test, test_item_ids_with_pos) - if metric not in new_model_results: - new_model_results[metric] = [] - new_model_results[metric].append(metric_res) - return new_model_results - - def deduplicate_data(self, data, labels): - data_wo_duplicates = [] - labels_wo_duplicates = [] - data_set = set() - for i in range(len(data)): - if tuple(data[i]) not in data_set: - data_set.add(tuple(data[i])) - data_wo_duplicates.append(data[i]) - labels_wo_duplicates.append(labels[i]) - return data_wo_duplicates, labels_wo_duplicates - - def split_data(self, data, labels, random_state, test_item_ids_with_pos): - x_ids = [i for i in range(len(data))] - x_train_ids, x_test_ids, y_train, y_test = train_test_split( - x_ids, labels, - test_size=0.1, random_state=random_state, stratify=labels) - x_train = np.asarray([data[idx] for idx in x_train_ids]) - x_test = np.asarray([data[idx] for idx in x_test_ids]) - test_item_ids_with_pos_test = [test_item_ids_with_pos[idx] for idx in x_test_ids] - return x_train, x_test, y_train, y_test, test_item_ids_with_pos_test - - def train_several_times(self, data, labels, features, test_item_ids_with_pos, metrics_to_gather): - new_model_results = {} - baseline_model_results = {} - random_states = [1257, 1873, 1917, 2477, 3449, - 353, 4561, 5417, 6427, 2029] - bad_data = False - - proportion_binary_labels = utils.calculate_proportions_for_labels(labels) - - if proportion_binary_labels < self.due_proportion: - logger.debug("Train data has a bad proportion: %.3f", proportion_binary_labels) - bad_data = True - - if not bad_data: - data, labels = self.deduplicate_data(data, labels) - for random_state in random_states: - x_train, x_test, y_train, y_test, test_item_ids_with_pos_test = self.split_data( - data, labels, random_state, test_item_ids_with_pos) - proportion_binary_labels = utils.calculate_proportions_for_labels(y_train) - if proportion_binary_labels < self.due_proportion_to_smote: - oversample = SMOTE(ratio="minority") - x_train, y_train = oversample.fit_resample(x_train, y_train) - self.new_model.train_model(x_train, y_train) - logger.debug("New model results") - new_model_results = self.calculate_metrics( - self.new_model, x_test, y_test, metrics_to_gather, - test_item_ids_with_pos_test, new_model_results) - logger.debug("Baseline results") - x_test_for_baseline = self.transform_data_from_feature_lists( - x_test, features, self.baseline_model.get_feature_ids()) - baseline_model_results = self.calculate_metrics( - self.baseline_model, x_test_for_baseline, y_test, - metrics_to_gather, test_item_ids_with_pos_test, baseline_model_results) - return baseline_model_results, new_model_results, bad_data - - def transform_data_from_feature_lists(self, feature_list, cur_features, desired_features): - previously_gathered_features = utils.fill_prevously_gathered_features(feature_list, cur_features) - gathered_data = utils.gather_feature_list(previously_gathered_features, desired_features) - return gathered_data - - def query_logs(self, project_id, log_ids_to_find): - log_ids_to_find = list(log_ids_to_find) - project_index_name = text_processing.unite_project_name( - str(project_id), self.app_config["esProjectIndexPrefix"]) - batch_size = 1000 - log_id_dict = {} - for i in range(int(len(log_ids_to_find) / batch_size) + 1): - log_ids = log_ids_to_find[i * batch_size: (i + 1) * batch_size] - if not log_ids: - continue - ids_query = { - "size": self.app_config["esChunkNumber"], - "query": { - "bool": { - "filter": [ - {"terms": {"_id": log_ids}} - ] - } - }} - for r in elasticsearch.helpers.scan(self.es_client.es_client, - query=ids_query, - index=project_index_name, - scroll="5m"): - log_id_dict[str(r["_id"])] = r - return log_id_dict - - def get_search_query_suggest(self): - return { - "sort": {"savedDate": "desc"}, - "size": self.app_config["esChunkNumber"], - "query": { - "bool": { - "must": [ - {"term": {"methodName": "suggestion"}} - ] - } - } - } - - def get_search_query_aa(self, user_choice): - return { - "sort": {"savedDate": "desc"}, - "size": self.app_config["esChunkNumber"], - "query": { - "bool": { - "must": [ - {"term": {"methodName": "auto_analysis"}}, - {"term": {"userChoice": user_choice}} - ] - } - } - } - - def stop_gathering_info_from_suggest_query(self, num_of_1s, num_of_0s, max_num): - if (num_of_1s + num_of_0s) == 0: - return False - percent_logs = (num_of_1s + num_of_0s) / max_num - percent_1s = num_of_1s / (num_of_1s + num_of_0s) - if percent_logs >= 0.8 and percent_1s <= 0.2: - return True - return False - - def query_es_for_suggest_info(self, project_id): - log_ids_to_find = set() - gathered_suggested_data = [] - log_id_pairs_set = set() - index_name = text_processing.unite_project_name( - str(project_id) + "_suggest", self.app_config["esProjectIndexPrefix"]) - max_number_of_logs = 30000 - cur_number_of_logs = 0 - cur_number_of_logs_0 = 0 - cur_number_of_logs_1 = 0 - unique_saved_features = set() - for query_name, query in [ - ("auto_analysis 0s", self.get_search_query_aa(0)), - ("suggestion", self.get_search_query_suggest()), - ("auto_analysis 1s", self.get_search_query_aa(1))]: - if cur_number_of_logs >= max_number_of_logs: - break - for res in elasticsearch.helpers.scan(self.es_client.es_client, - query=query, - index=index_name, - scroll="5m"): - if cur_number_of_logs >= max_number_of_logs: - break - saved_model_features = "{}|{}".format( - res["_source"]["modelFeatureNames"], - res["_source"]["modelFeatureValues"]) - if saved_model_features in unique_saved_features: - continue - unique_saved_features.add(saved_model_features) - log_ids_pair = (res["_source"]["testItemLogId"], res["_source"]["relevantLogId"]) - if log_ids_pair in log_id_pairs_set: - continue - log_id_pairs_set.add(log_ids_pair) - for col in ["testItemLogId", "relevantLogId"]: - log_id = str(res["_source"][col]) - if res["_source"]["isMergedLog"]: - log_id = log_id + "_m" - log_ids_to_find.add(log_id) - gathered_suggested_data.append(res) - cur_number_of_logs += 1 - if res["_source"]["userChoice"] == 1: - cur_number_of_logs_1 += 1 - else: - cur_number_of_logs_0 += 1 - if query_name == "suggestion" and self.stop_gathering_info_from_suggest_query( - cur_number_of_logs_1, cur_number_of_logs_0, max_number_of_logs): - break - logger.debug("Query: '%s', results number: %d, number of 1s: %d", - query_name, cur_number_of_logs, cur_number_of_logs_1) - log_id_dict = self.query_logs(project_id, log_ids_to_find) - return gathered_suggested_data, log_id_dict - - def prepare_encoders(self, features_encoding_config, logs_found): - _feature_encoding_configurer = FeatureEncodingConfigurer() - _feature_encoding_configurer.initialize_encoders_from_config(features_encoding_config) - _feature_encoding_configurer.prepare_encoders(logs_found) - return _feature_encoding_configurer.feature_dict_with_encodings - - def gather_data(self, model_type, project_id, features, defect_type_model_to_use, full_config): - namespaces = self.namespace_finder.get_chosen_namespaces(project_id) - gathered_suggested_data, log_id_dict = self.query_es_for_suggest_info(project_id) - features_dict_with_saved_objects = self.prepare_encoders( - full_config["features_encoding_config"], list(log_id_dict.values())) - full_data_features, labels, test_item_ids_with_pos = [], [], [] - for _suggest_res in gathered_suggested_data: - searched_res = [] - found_logs = {} - for col in ["testItemLogId", "relevantLogId"]: - log_id = str(_suggest_res["_source"][col]) - if _suggest_res["_source"]["isMergedLog"]: - log_id = log_id + "_m" - if log_id in log_id_dict: - found_logs[col] = log_id_dict[log_id] - if len(found_logs) == 2: - log_relevant = found_logs["relevantLogId"] - log_relevant["_score"] = _suggest_res["_source"]["esScore"] - searched_res = [ - (found_logs["testItemLogId"], {"hits": {"hits": [log_relevant]}})] - if searched_res: - _boosting_data_gatherer = SuggestBoostingFeaturizer( - searched_res, - self.get_config_for_boosting( - _suggest_res["_source"]["usedLogLines"], model_type, namespaces), - feature_ids=features, - weighted_log_similarity_calculator=self.weighted_log_similarity_calculator, - features_dict_with_saved_objects=features_dict_with_saved_objects) - _boosting_data_gatherer.set_defect_type_model(defect_type_model_to_use) - _boosting_data_gatherer.fill_prevously_gathered_features( - [utils.to_number_list(_suggest_res["_source"]["modelFeatureValues"])], - _suggest_res["_source"]["modelFeatureNames"]) - feature_data, _ = _boosting_data_gatherer.gather_features_info() - if feature_data: - full_data_features.extend(feature_data) - labels.append(_suggest_res["_source"]["userChoice"]) - test_item_ids_with_pos.append(_suggest_res["_source"]["testItem"]) - return (np.asarray(full_data_features), np.asarray(labels), test_item_ids_with_pos, - features_dict_with_saved_objects) - - def train(self, project_info): - time_training = time() - logger.debug("Started training model '%s'", project_info["model_type"]) - model_name = "%s_model_%s" % (project_info["model_type"], datetime.now().strftime("%d.%m.%y")) - - baseline_model_folder = os.path.basename( - self.baseline_folders[project_info["model_type"]].strip("/").strip("\\")) - self.baseline_model = boosting_decision_maker.BoostingDecisionMaker( - folder=self.baseline_folders[project_info["model_type"]]) - - full_config, features, monotonous_features = pickle.load( - open(self.model_config[project_info["model_type"]], "rb")) - self.new_model = custom_boosting_decision_maker.CustomBoostingDecisionMaker( - self.app_config, project_info["project_id"]) - self.new_model.add_config_info(full_config, features, monotonous_features) - - defect_type_model_to_use = self.model_chooser.choose_model(project_info["project_id"], "defect_type_model/") - - metrics_to_gather = ["F1", "Mean Reciprocal Rank"] - train_log_info = {} - for metric in metrics_to_gather: - train_log_info[metric] = self.get_info_template( - project_info, baseline_model_folder, model_name, metric) - - errors = [] - errors_count = 0 - train_data = [] - try: - logger.debug("Initialized training model '%s'", project_info["model_type"]) - train_data, labels, test_item_ids_with_pos, features_dict_with_saved_objects = self.gather_data( - project_info["model_type"], project_info["project_id"], - self.new_model.get_feature_ids(), defect_type_model_to_use, full_config) - self.new_model.features_dict_with_saved_objects = features_dict_with_saved_objects - - for metric in metrics_to_gather: - train_log_info[metric]["data_size"] = len(labels) - train_log_info[metric]["data_proportion"] = utils.calculate_proportions_for_labels(labels) - - logger.debug("Loaded data for training model '%s'", project_info["model_type"]) - baseline_model_results, new_model_results, bad_data = self.train_several_times( - train_data, labels, self.new_model.get_feature_ids(), - test_item_ids_with_pos, metrics_to_gather) - for metric in metrics_to_gather: - train_log_info[metric]["bad_data_proportion"] = int(bad_data) - - use_custom_model = False - if not bad_data: - for metric in metrics_to_gather: - logger.debug("Baseline test results %s", baseline_model_results[metric]) - logger.debug("New model test results %s", new_model_results[metric]) - f_value, p_value = stats.f_oneway(baseline_model_results[metric], new_model_results[metric]) - if p_value is None: - p_value = 1.0 - train_log_info[metric]["p_value"] = p_value - mean_f1 = np.mean(new_model_results[metric]) - train_log_info[metric]["baseline_mean_metric"] = np.mean(baseline_model_results[metric]) - train_log_info[metric]["new_model_mean_metric"] = mean_f1 - if p_value < 0.05 and mean_f1 > np.mean(baseline_model_results[metric]) and mean_f1 >= 0.4: - use_custom_model = True - logger.debug( - """Model training validation results: - p-value=%.3f mean baseline=%.3f mean new model=%.3f""", - p_value, np.mean(baseline_model_results[metric]), np.mean(new_model_results[metric])) - - if use_custom_model: - logger.debug("Custom model should be saved") - - proportion_binary_labels = utils.calculate_proportions_for_labels(labels) - if proportion_binary_labels < self.due_proportion_to_smote: - oversample = SMOTE(ratio="minority") - train_data, labels = oversample.fit_resample(train_data, labels) - proportion_binary_labels = utils.calculate_proportions_for_labels(labels) - if proportion_binary_labels < self.due_proportion: - logger.debug("Train data has a bad proportion: %.3f", proportion_binary_labels) - bad_data = True - for metric in metrics_to_gather: - train_log_info[metric]["bad_data_proportion"] = int(bad_data) - if not bad_data: - for metric in metrics_to_gather: - train_log_info[metric]["model_saved"] = 1 - self.new_model.train_model(train_data, labels) - else: - for metric in metrics_to_gather: - train_log_info[metric]["model_saved"] = 0 - self.model_chooser.delete_old_model( - "%s_model" % project_info["model_type"], project_info["project_id"]) - self.new_model.save_model( - "%s_model/%s/" % (project_info["model_type"], model_name)) - except Exception as err: - logger.error(err) - errors.append(utils.extract_exception(err)) - errors_count += 1 - - time_spent = (time() - time_training) - for metric in metrics_to_gather: - train_log_info[metric]["time_spent"] = time_spent - train_log_info[metric]["gather_date"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - train_log_info[metric]["module_version"] = [self.app_config["appVersion"]] - train_log_info[metric]["errors"].extend(errors) - train_log_info[metric]["errors_count"] += errors_count - - logger.info("Finished for %d s", time_spent) - return len(train_data), train_log_info diff --git a/app/boosting_decision_making/training_models/training_defect_type_model.py b/app/boosting_decision_making/training_models/training_defect_type_model.py deleted file mode 100644 index 917506b8..00000000 --- a/app/boosting_decision_making/training_models/training_defect_type_model.py +++ /dev/null @@ -1,368 +0,0 @@ -# Copyright 2023 EPAM Systems -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from app.boosting_decision_making import defect_type_model, custom_defect_type_model -from sklearn.model_selection import train_test_split -from app.commons.esclient import EsClient -from app.utils import utils, text_processing -from time import time -import scipy.stats as stats -import numpy as np -import logging -from datetime import datetime -import os -import re -from queue import Queue -import elasticsearch.helpers - -logger = logging.getLogger("analyzerApp.trainingDefectTypeModel") - - -class DefectTypeModelTraining: - - def __init__(self, model_chooser, app_config, search_cfg): - self.app_config = app_config - self.search_cfg = search_cfg - self.label2inds = {"ab": 0, "pb": 1, "si": 2} - self.due_proportion = 0.2 - self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg) - self.baseline_model = defect_type_model.DefectTypeModel( - folder=search_cfg["GlobalDefectTypeModelFolder"]) - self.model_chooser = model_chooser - - def return_similar_objects_into_sample(self, x_train_ind, y_train, data, additional_logs, label): - x_train = [] - x_train_add = [] - y_train_add = [] - - for idx, ind in enumerate(x_train_ind): - x_train.append(data[ind][0]) - label_to_use = y_train[idx] - if ind in additional_logs and label_to_use != 1: - for idx_ in additional_logs[ind]: - log_res, label_res, real_label = data[idx_] - if label_res == label: - label_to_use = 1 - break - if ind in additional_logs: - for idx_ in additional_logs[ind]: - x_train_add.append(data[idx_][0]) - y_train_add.append(label_to_use) - x_train.extend(x_train_add) - y_train.extend(y_train_add) - return x_train, y_train - - def split_train_test( - self, logs_to_train_idx, data, labels_filtered, - additional_logs, label, random_state=1257): - x_train_ind, x_test_ind, y_train, y_test = train_test_split( - logs_to_train_idx, labels_filtered, - test_size=0.1, random_state=random_state, stratify=labels_filtered) - x_train, y_train = self.return_similar_objects_into_sample( - x_train_ind, y_train, data, additional_logs, label) - x_test = [] - for ind in x_test_ind: - x_test.append(data[ind][0]) - return x_train, x_test, y_train, y_test - - def get_message_query_by_label(self, label): - return { - "_source": ["detected_message_without_params_extended", "issue_type", "launch_id"], - "sort": {"start_time": "desc"}, - "size": self.app_config["esChunkNumber"], - "query": { - "bool": { - "filter": [ - {"range": {"log_level": {"gte": utils.ERROR_LOGGING_LEVEL}}}, - {"exists": {"field": "issue_type"}}, - {"term": {"is_merged": False}} - ], - "must": [ - { - "bool": { - "should": [ - {"wildcard": {"issue_type": "{}*".format(label.upper())}}, - {"wildcard": {"issue_type": "{}*".format(label.lower())}}, - {"wildcard": {"issue_type": "{}*".format(label)}}, - ] - } - } - ], - "should": [ - {"term": {"is_auto_analyzed": {"value": "false", "boost": 1.0}}}, - ] - } - } - } - - def query_data(self, project, label): - message_launch_dict = set() - project_index_name = text_processing.unite_project_name( - str(project), self.app_config["esProjectIndexPrefix"]) - data = [] - for r in elasticsearch.helpers.scan(self.es_client.es_client, - query=self.get_message_query_by_label( - label), - index=project_index_name): - detected_message = r["_source"]["detected_message_without_params_extended"] - text_message_normalized = " ".join(sorted( - text_processing.split_words(detected_message, to_lower=True))) - message_info = (text_message_normalized, - r["_source"]["launch_id"], - r["_source"]["issue_type"]) - if message_info not in message_launch_dict: - data.append((detected_message, label, r["_source"]["issue_type"])) - message_launch_dict.add(message_info) - if len(data) >= self.search_cfg["MaxLogsForDefectTypeModel"]: - break - return data - - def perform_light_deduplication(self, data): - text_messages_set = {} - logs_to_train_idx = [] - additional_logs = {} - for idx, text_message_data in enumerate(data): - text_message = text_message_data[0] - text_message_normalized = " ".join(sorted( - text_processing.split_words(text_message, to_lower=True))) - if text_message_normalized not in text_messages_set: - logs_to_train_idx.append(idx) - text_messages_set[text_message_normalized] = idx - additional_logs[idx] = [] - else: - additional_logs[text_messages_set[text_message_normalized]].append(idx) - return additional_logs, logs_to_train_idx - - def get_info_template(self, project_info, label, baseline_model, model_name): - return {"method": "training", "sub_model_type": label, "model_type": project_info["model_type"], - "baseline_model": [baseline_model], "new_model": [model_name], - "project_id": project_info["project_id"], "model_saved": 0, "p_value": 1.0, - "data_proportion": 0.0, "baseline_mean_metric": 0.0, "new_model_mean_metric": 0.0, - "bad_data_proportion": 0, "metric_name": "F1", "errors": [], "errors_count": 0, - "time_spent": 0.0} - - def load_data_for_training(self, project_info, baseline_model, model_name): - train_log_info = {} - data = [] - found_sub_categories = {} - labels_to_find_queue = Queue() - errors = [] - errors_count = 0 - - for label in self.label2inds: - labels_to_find_queue.put(label) - while not labels_to_find_queue.empty(): - try: - label = labels_to_find_queue.get() - train_log_info[label] = self.get_info_template( - project_info, label, baseline_model, model_name) - time_querying = time() - logger.debug("Label to gather data %s", label) - found_data = self.query_data(project_info["project_id"], label) - for _, _, _issue_type in found_data: - if re.search(r"\w{2}_\w+", _issue_type) and _issue_type not in found_sub_categories: - found_sub_categories[_issue_type] = [] - labels_to_find_queue.put(_issue_type) - if label in self.label2inds: - data.extend(found_data) - else: - found_sub_categories[label] = found_data - time_spent = time() - time_querying - logger.debug("Finished quering for %d s", time_spent) - train_log_info[label]["time_spent"] = time_spent - train_log_info[label]["data_size"] = len(found_data) - except Exception as err: - logger.error(err) - errors.append(utils.extract_exception(err)) - errors_count += 1 - labels_to_find_queue.task_done() - logger.debug("Data gathered: %d" % len(data)) - train_log_info["all"] = self.get_info_template( - project_info, "all", baseline_model, model_name) - train_log_info["all"]["data_size"] = len(data) - train_log_info["all"]["errors"] = errors - train_log_info["all"]["errors_count"] = errors_count - return data, found_sub_categories, train_log_info - - def creating_binary_target_data(self, label, data, found_sub_categories): - data_to_train = data - if label in found_sub_categories: - data_to_train = [d for d in data if d[2] != label] + found_sub_categories[label] - additional_logs, logs_to_train_idx = self.perform_light_deduplication(data_to_train) - labels_filtered = [] - for ind in logs_to_train_idx: - if (data_to_train[ind][1] == label or data_to_train[ind][2] == label): - labels_filtered.append(1) - else: - labels_filtered.append(0) - proportion_binary_labels = utils.calculate_proportions_for_labels(labels_filtered) - if proportion_binary_labels < self.due_proportion: - logs_to_train_idx, labels_filtered, proportion_binary_labels = utils.rebalance_data( - logs_to_train_idx, labels_filtered, self.due_proportion) - return logs_to_train_idx, labels_filtered, data_to_train, additional_logs, proportion_binary_labels - - def copy_model_part_from_baseline(self, label): - if label not in self.baseline_model.models: - if label in self.new_model.models: - del self.new_model.models[label] - if label in self.new_model.count_vectorizer_models: - del self.new_model.count_vectorizer_models[label] - else: - self.new_model.models[label] = self.baseline_model.models[label] - _count_vectorizer = self.baseline_model.count_vectorizer_models[label] - self.new_model.count_vectorizer_models[label] = _count_vectorizer - - def train_several_times(self, label, data, found_sub_categories): - new_model_results = [] - baseline_model_results = [] - random_states = [1257, 1873, 1917, 2477, 3449, - 353, 4561, 5417, 6427, 2029] - bad_data = False - - logs_to_train_idx, labels_filtered, data_to_train,\ - additional_logs, proportion_binary_labels = self.creating_binary_target_data( - label, data, found_sub_categories) - - if proportion_binary_labels < self.due_proportion: - logger.debug("Train data has a bad proportion: %.3f", proportion_binary_labels) - bad_data = True - - if not bad_data: - for random_state in random_states: - x_train, x_test, y_train, y_test = self.split_train_test( - logs_to_train_idx, data_to_train, labels_filtered, - additional_logs, label, - random_state=random_state) - self.new_model.train_model(label, x_train, y_train) - logger.debug("New model results") - f1, accuracy = self.new_model.validate_model(label, x_test, y_test) - new_model_results.append(f1) - if label in found_sub_categories: - baseline_model_results.append(0.001) - else: - logger.debug("Baseline results") - f1, accuracy = self.baseline_model.validate_model(label, x_test, y_test) - baseline_model_results.append(f1) - return baseline_model_results, new_model_results, bad_data - - def train(self, project_info): - start_time = time() - model_name = "defect_type_model_%s" % datetime.now().strftime("%d.%m.%y") - baseline_model = os.path.basename( - self.search_cfg["GlobalDefectTypeModelFolder"].strip("/").strip("\\")) - self.new_model = custom_defect_type_model.CustomDefectTypeModel( - self.app_config, project_info["project_id"]) - - data, found_sub_categories, train_log_info = self.load_data_for_training( - project_info, baseline_model, model_name) - - data_proportion_min = 1.0 - p_value_max = 0.0 - all_bad_data = 1 - custom_models = [] - f1_chosen_models = [] - f1_baseline_models = [] - errors = [] - errors_count = 0 - for label in list(self.label2inds.keys()) + list(found_sub_categories.keys()): - try: - time_training = time() - logger.debug("Label to train the model %s", label) - - baseline_model_results, new_model_results, bad_data = self.train_several_times( - label, data, found_sub_categories) - - use_custom_model = False - if not bad_data: - logger.debug("Baseline test results %s", baseline_model_results) - logger.debug("New model test results %s", new_model_results) - f_value, p_value = stats.f_oneway(baseline_model_results, new_model_results) - if p_value is None: - p_value = 1.0 - train_log_info[label]["p_value"] = p_value - mean_f1 = np.mean(new_model_results) - train_log_info[label]["baseline_mean_metric"] = np.mean(baseline_model_results) - train_log_info[label]["new_model_mean_metric"] = mean_f1 - if p_value < 0.05 and mean_f1 > np.mean(baseline_model_results) and mean_f1 >= 0.4: - p_value_max = max(p_value_max, p_value) - use_custom_model = True - all_bad_data = 0 - logger.debug( - """Model training validation results: - p-value=%.3f mean baseline=%.3f mean new model=%.3f""", - p_value, np.mean(baseline_model_results), np.mean(new_model_results)) - train_log_info[label]["bad_data_proportion"] = int(bad_data) - - if use_custom_model: - logger.debug("Custom model '%s' should be saved" % label) - - logs_to_train_idx, labels_filtered, data_to_train,\ - additional_logs, proportion_binary_labels = self.creating_binary_target_data( - label, data, found_sub_categories) - if proportion_binary_labels < self.due_proportion: - logger.debug("Train data has a bad proportion: %.3f", proportion_binary_labels) - bad_data = True - train_log_info[label]["bad_data_proportion"] = int(bad_data) - train_log_info[label]["data_proportion"] = proportion_binary_labels - if not bad_data: - x_train, y_train = self.return_similar_objects_into_sample( - logs_to_train_idx, labels_filtered, data_to_train, additional_logs, label) - train_log_info[label]["model_saved"] = 1 - data_proportion_min = min( - train_log_info[label]["data_proportion"], data_proportion_min) - self.new_model.train_model(label, x_train, y_train) - custom_models.append(label) - if label not in found_sub_categories: - f1_baseline_models.append(train_log_info[label]["baseline_mean_metric"]) - f1_chosen_models.append(train_log_info[label]["new_model_mean_metric"]) - else: - train_log_info[label]["model_saved"] = 0 - else: - self.copy_model_part_from_baseline(label) - if train_log_info[label]["baseline_mean_metric"] > 0.001: - f1_baseline_models.append(train_log_info[label]["baseline_mean_metric"]) - f1_chosen_models.append(train_log_info[label]["baseline_mean_metric"]) - train_log_info[label]["time_spent"] += (time() - time_training) - except Exception as err: - logger.error(err) - train_log_info[label]["errors_count"] += 1 - train_log_info[label]["errors"].append(utils.extract_exception(err)) - errors.append(utils.extract_exception(err)) - errors_count += 1 - self.copy_model_part_from_baseline(label) - - logger.debug("Custom models were for labels: %s" % custom_models) - if len(custom_models): - logger.debug("The custom model should be saved") - train_log_info["all"]["model_saved"] = 1 - train_log_info["all"]["p_value"] = p_value_max - self.model_chooser.delete_old_model("defect_type_model", project_info["project_id"]) - self.new_model.save_model( - "defect_type_model/%s/" % model_name) - - time_spent = time() - start_time - logger.info("Finished for %d s", time_spent) - train_log_info["all"]["time_spent"] = time_spent - train_log_info["all"]["data_proportion"] = data_proportion_min - train_log_info["all"]["errors_count"] += errors_count - train_log_info["all"]["errors"].extend(errors) - train_log_info["all"]["baseline_mean_metric"] = np.mean( - f1_baseline_models) if f1_baseline_models else 0.0 - train_log_info["all"]["new_model_mean_metric"] = np.mean( - f1_chosen_models) if f1_chosen_models else 0.0 - train_log_info["all"]["bad_data_proportion"] = all_bad_data - for label in train_log_info: - train_log_info[label]["gather_date"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - train_log_info[label]["module_version"] = [self.app_config["appVersion"]] - return len(data), train_log_info diff --git a/app/boosting_decision_making/weighted_similarity_calculator.py b/app/boosting_decision_making/weighted_similarity_calculator.py deleted file mode 100644 index 94f8c8d7..00000000 --- a/app/boosting_decision_making/weighted_similarity_calculator.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright 2023 EPAM Systems -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import pickle -import math - -from app.utils import text_processing - - -class WeightedSimilarityCalculator: - - def __init__(self, block_to_split=10, min_log_number_in_block=1, folder=""): - self.block_to_split = block_to_split - self.min_log_number_in_block = min_log_number_in_block - self.folder = folder - self.weights = None - self.softmax_weights = None - if folder.strip(): - self.load_model(folder) - - def load_model(self, folder): - self.folder = folder - if not os.path.exists(os.path.join(folder, "weights.pickle")): - return - with open(os.path.join(folder, "weights.pickle"), "rb") as f: - self.block_to_split, self.min_log_number_in_block, self.weights, self.softmax_weights =\ - pickle.load(f) - if not os.path.exists(os.path.join(folder, "config.pickle")): - return - try: - with open(os.path.join(folder, "config.pickle"), "wb") as f: - self.config = pickle.load(f) - except: # noqa - pass - - def add_config_info(self, config): - self.config = config - - def save_model(self, folder): - if not os.path.exists(folder): - os.makedirs(folder) - if self.weights is not None: - with open(os.path.join(folder, "weights.pickle"), "wb") as f: - pickle.dump([self.block_to_split, self.min_log_number_in_block, - self.weights, self.softmax_weights], f) - try: - if self.config: - with open(os.path.join(folder, "config.pickle"), "wb") as f: - pickle.dump(self.config, f) - except: # noqa - pass - - def message_to_array(self, detected_message_res, stacktrace_res): - all_lines = [" ".join(text_processing.split_words(detected_message_res))] - split_log_lines = text_processing.filter_empty_lines( - [" ".join(text_processing.split_words(line)) for line in stacktrace_res.split("\n")]) - split_log_lines_num = len(split_log_lines) - data_in_block = max(self.min_log_number_in_block, - math.ceil(split_log_lines_num / self.block_to_split)) - blocks_num = math.ceil(split_log_lines_num / data_in_block) - - for block in range(blocks_num): - all_lines.append("\n".join( - split_log_lines[block * data_in_block: (block + 1) * data_in_block])) - if len([line for line in all_lines if line.strip()]) == 0: - return [] - return all_lines - - def weigh_data_rows(self, data_rows, use_softmax=False): - padded_data_rows = np.concatenate([data_rows, - np.zeros((max(0, self.block_to_split + 1 - len(data_rows)), - data_rows.shape[1]))], axis=0) - result = None - if use_softmax: - result = np.dot(np.reshape(self.softmax_weights, [-1]), padded_data_rows) - else: - result = np.dot(np.reshape(self.weights, [-1]), padded_data_rows) - return np.clip(result, a_min=0, a_max=1) diff --git a/app/commons/clusterizer.py b/app/commons/clusterizer.py index 20b3f399..30bea243 100644 --- a/app/commons/clusterizer.py +++ b/app/commons/clusterizer.py @@ -12,14 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import hashlib import heapq +from time import time + import numpy as np import sklearn from sklearn.feature_extraction.text import CountVectorizer -from time import time +from app.commons import logging from app.utils import utils, text_processing logger = logging.getLogger("analyzerApp.clusterizer") @@ -30,7 +31,7 @@ class Clusterizer: def __init__(self): pass - def calculate_hashes(self, messages, n_gram=2, n_permutations=64): + def calculate_hashes(self, messages: list[str], n_gram: int = 2, n_permutations: int = 64) -> list[list[str]]: hashes = [] for message in messages: words = message.split() @@ -38,11 +39,12 @@ def calculate_hashes(self, messages, n_gram=2, n_permutations=64): len_words = (len(words) - n_gram) if len(words) > n_gram else len(words) for i in range(len_words): hash_print.add(hashlib.md5(" ".join(words[i:i + n_gram]).encode("utf-8")).hexdigest()) - hash_print = list(heapq.nlargest(n_permutations, hash_print)) - hashes.append(hash_print) + hashes.append(list(heapq.nlargest(n_permutations, hash_print))) return hashes - def find_groups_by_similarity(self, messages, groups_to_check, threshold=0.95): + def find_groups_by_similarity( + self, messages: list[str], groups_to_check: dict[int, list[int]], + threshold: float = 0.95) -> dict[int, list[int]]: if len(messages) == 0: return {} rearranged_groups = {} @@ -66,7 +68,9 @@ def find_groups_by_similarity(self, messages, groups_to_check, threshold=0.95): logger.debug("Time for finding groups: %.2f s", time() - start_time) return rearranged_groups - def similarity_groupping(self, hash_prints, block_size=1000, for_text=True, threshold=0.95): + def similarity_groupping( + self, hash_prints: list[list[str]] | list[str], block_size: int = 1000, for_text: bool = True, + threshold: float = 0.95) -> dict[int, int]: num_of_blocks = int(np.ceil(len(hash_prints) / block_size)) hash_groups = {} global_ind = 0 @@ -104,7 +108,7 @@ def similarity_groupping(self, hash_prints, block_size=1000, for_text=True, thre hash_groups[j] = hash_groups[i] return hash_groups - def unite_groups_by_hashes(self, messages, threshold=0.95): + def unite_groups_by_hashes(self, messages: list[str], threshold: float = 0.95) -> dict[int, list[int]]: start_time = time() hash_prints = self.calculate_hashes(messages) has_no_empty = False @@ -124,7 +128,7 @@ def unite_groups_by_hashes(self, messages, threshold=0.95): logger.debug("Time for finding hash groups: %.2f s", time() - start_time) return rearranged_groups - def perform_light_deduplication(self, messages): + def perform_light_deduplication(self, messages: list[str]) -> tuple[list[str], dict[int, list[int]]]: text_messages_set = {} messages_to_cluster = [] ids_with_duplicates = {} @@ -141,11 +145,10 @@ def perform_light_deduplication(self, messages): ids_with_duplicates[text_messages_set[text_message_normalized]].append(idx) return messages_to_cluster, ids_with_duplicates - def find_clusters(self, messages, threshold=0.95): + def find_clusters(self, messages: list[str], threshold: float = 0.95) -> dict[int, list[int]]: messages_to_cluster, ids_with_duplicates = self.perform_light_deduplication(messages) hash_groups = self.unite_groups_by_hashes(messages_to_cluster, threshold=threshold) - groups = self.find_groups_by_similarity( - messages_to_cluster, hash_groups, threshold=threshold) + groups = self.find_groups_by_similarity(messages_to_cluster, hash_groups, threshold=threshold) new_groups = {} for cluster in groups: new_log_ids = [] diff --git a/app/commons/esclient.py b/app/commons/esclient.py index 9a7090bb..63258d5a 100644 --- a/app/commons/esclient.py +++ b/app/commons/esclient.py @@ -13,7 +13,6 @@ # limitations under the License. import json -import logging import traceback from collections import deque from time import time @@ -26,10 +25,11 @@ from urllib3.exceptions import InsecureRequestWarning from app.amqp.amqp import AmqpClient -from app.commons import launch_objects +from app.commons import logging +from app.commons.model.launch_objects import ApplicationConfig, Response, Launch, TestItem, BulkResponse +from app.commons.model.ml import TrainInfo, ModelType from app.commons.log_merger import LogMerger -from app.commons.log_preparation import LogPreparation -from app.commons.triggering_training.retraining_triggering import GATHERED_METRIC_TOTAL +from app.commons.log_requests import LogRequests from app.utils import utils, text_processing logger = logging.getLogger("analyzerApp.esclient") @@ -37,41 +37,41 @@ class EsClient: """Elasticsearch client implementation""" - - def __init__(self, app_config=None, search_cfg=None, es_client: elasticsearch.Elasticsearch = None): - if not app_config: - app_config = {} - if not search_cfg: - search_cfg = {} + app_config: ApplicationConfig + es_client: elasticsearch.Elasticsearch + host: str + log_requests: LogRequests + log_merger: LogMerger + tables_to_recreate: list[str] + + def __init__(self, app_config: ApplicationConfig, es_client: elasticsearch.Elasticsearch = None): self.app_config = app_config - self.host = app_config["esHost"] - self.search_cfg = search_cfg + self.host = app_config.esHost self.es_client = es_client or self.create_es_client(app_config) - self.log_preparation = LogPreparation() + self.log_requests = LogRequests() self.log_merger = LogMerger() - self.tables_to_recreate = ["rp_aa_stats", "rp_model_train_stats", - "rp_suggestions_info_metrics"] + self.tables_to_recreate = ["rp_aa_stats", "rp_model_train_stats", "rp_suggestions_info_metrics"] - def create_es_client(self, app_config) -> elasticsearch.Elasticsearch: - if not app_config["esVerifyCerts"]: + def create_es_client(self, app_config: ApplicationConfig) -> elasticsearch.Elasticsearch: + if not app_config.esVerifyCerts: urllib3.disable_warnings(InsecureRequestWarning) kwargs = { "timeout": 30, "max_retries": 5, "retry_on_timeout": True, - "use_ssl": app_config["esUseSsl"], - "verify_certs": app_config["esVerifyCerts"], - "ssl_show_warn": app_config["esSslShowWarn"], - "ca_certs": app_config["esCAcert"], - "client_cert": app_config["esClientCert"], - "client_key": app_config["esClientKey"], + "use_ssl": app_config.esUseSsl, + "verify_certs": app_config.esVerifyCerts, + "ssl_show_warn": app_config.esSslShowWarn, + "ca_certs": app_config.esCAcert, + "client_cert": app_config.esClientCert, + "client_key": app_config.esClientKey, } - if app_config["esUser"]: - kwargs["http_auth"] = (app_config["esUser"], - app_config["esPassword"]) + if app_config.esUser: + kwargs["http_auth"] = (app_config.esUser, + app_config.esPassword) - if app_config["turnOffSslVerification"]: + if app_config.turnOffSslVerification: kwargs["connection_class"] = RequestsHttpConnection return elasticsearch.Elasticsearch([self.host], **kwargs) @@ -80,7 +80,7 @@ def get_test_item_query(self, test_item_ids, is_merged, full_log): """Build test item query""" if full_log: return { - "size": self.app_config["esChunkNumber"], + "size": self.app_config.esChunkNumber, "query": { "bool": { "filter": [ @@ -92,7 +92,7 @@ def get_test_item_query(self, test_item_ids, is_merged, full_log): else: return { "_source": ["test_item"], - "size": self.app_config["esChunkNumber"], + "size": self.app_config.esChunkNumber, "query": { "bool": { "filter": [ @@ -106,7 +106,7 @@ def build_search_test_item_ids_query(self, log_ids): """Build search test item ids query""" return { "_source": ["test_item"], - "size": self.app_config["esChunkNumber"], + "size": self.app_config.esChunkNumber, "query": { "bool": { "filter": [ @@ -122,7 +122,7 @@ def is_healthy(self): """Check whether elasticsearch is healthy""" try: url = text_processing.build_url(self.host, ["_cluster/health"]) - res = utils.send_request(url, "GET", self.app_config["esUser"], self.app_config["esPassword"]) + res = utils.send_request(url, "GET", self.app_config.esUser, self.app_config.esPassword) return res["status"] in ["green", "yellow"] except Exception as err: logger.error("Elasticsearch is not healthy") @@ -142,30 +142,24 @@ def update_settings_after_read_only(self, es_host): logger.error(err) logger.error("Can't reset read only mode for elastic indices") - def create_index(self, index_name): + def create_index(self, index_name: str) -> Response: """Create index in elasticsearch""" - logger.info("Creating '%s' Elasticsearch index", str(index_name)) + logger.info("Creating '%s' Elasticsearch index", index_name) logger.info("ES Url %s", text_processing.remove_credentials_from_url(self.host)) - try: - response = self.es_client.indices.create(index=str(index_name), body={ - 'settings': utils.read_json_file("res", "index_settings.json", to_json=True), - 'mappings': utils.read_json_file("res", "index_mapping_settings.json", to_json=True) - }) - logger.debug("Created '%s' Elasticsearch index", str(index_name)) - return launch_objects.Response(**response) - except Exception as err: - logger.error("Couldn't create index") - logger.error("ES Url %s", text_processing.remove_credentials_from_url(self.host)) - logger.error(err) - return launch_objects.Response() + response = self.es_client.indices.create(index=index_name, body={ + 'settings': utils.read_json_file("res", "index_settings.json", to_json=True), + 'mappings': utils.read_json_file("res", "index_mapping_settings.json", to_json=True) + }) + logger.debug("Created '%s' Elasticsearch index", index_name) + return Response(**response) def list_indices(self): """Get all indices from elasticsearch""" url = text_processing.build_url(self.host, ["_cat", "indices?format=json"]) - res = utils.send_request(url, "GET", self.app_config["esUser"], self.app_config["esPassword"]) + res = utils.send_request(url, "GET", self.app_config.esUser, self.app_config.esPassword) return res - def index_exists(self, index_name, print_error=True): + def index_exists(self, index_name: str, print_error: bool = True): """Checks whether index exists""" try: index = self.es_client.indices.get(index=str(index_name)) @@ -190,16 +184,17 @@ def delete_index(self, index_name): logger.error(err) return False - def create_index_if_not_exists(self, index_name): + def create_index_if_not_exists(self, index_name: str) -> bool: """Creates index if it doesn't exist""" if not self.index_exists(index_name, print_error=False): - return self.create_index(index_name) + response = self.create_index(index_name) + return response.acknowledged return True def _to_launch_test_item_list( self, - launches: list[launch_objects.Launch] - ) -> deque[tuple[launch_objects.Launch, launch_objects.TestItem]]: + launches: list[Launch] + ) -> deque[tuple[Launch, TestItem]]: test_item_queue = deque() for launch in launches: test_items = launch.testItems @@ -214,7 +209,7 @@ def _to_launch_test_item_list( def _to_index_bodies( self, project_with_prefix: str, - test_item_queue: deque[tuple[launch_objects.Launch, launch_objects.TestItem]] + test_item_queue: deque[tuple[Launch, TestItem]] ) -> tuple[list[str], list[dict]]: bodies = [] test_item_ids = [] @@ -225,7 +220,7 @@ def _to_index_bodies( if log.logLevel < utils.ERROR_LOGGING_LEVEL or not log.message.strip(): continue - bodies.append(self.log_preparation._prepare_log(launch, test_item, log, project_with_prefix)) + bodies.append(LogRequests._prepare_log(launch, test_item, log, project_with_prefix)) logs_added = True if logs_added: test_item_ids.append(str(test_item.testItemId)) @@ -237,13 +232,13 @@ def index_logs(self, launches): logger.info("ES Url %s", text_processing.remove_credentials_from_url(self.host)) t_start = time() launch_ids = set(map(lambda launch_obj: launch_obj.launchId, launches)) - project = str(next(map(lambda launch_obj: launch_obj.project, launches))) + project = next(map(lambda launch_obj: launch_obj.project, launches)) test_item_queue = self._to_launch_test_item_list(launches) del launches if project is None: - return launch_objects.BulkResponse(took=0, errors=False) + return BulkResponse(took=0, errors=False) - project_with_prefix = text_processing.unite_project_name(project, self.app_config["esProjectIndexPrefix"]) + project_with_prefix = text_processing.unite_project_name(project, self.app_config.esProjectIndexPrefix) self.create_index_if_not_exists(project_with_prefix) test_item_ids, bodies = self._to_index_bodies(project_with_prefix, test_item_queue) logs_with_exceptions = utils.extract_all_exceptions(bodies) @@ -251,13 +246,11 @@ def index_logs(self, launches): result.logResults = logs_with_exceptions _, num_logs_with_defect_types = self._merge_logs(test_item_ids, project_with_prefix) try: - if "amqpUrl" in self.app_config and self.app_config["amqpUrl"].strip(): - AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue( - self.app_config["exchangeName"], "train_models", json.dumps({ - "model_type": "defect_type", - "project_id": project, - GATHERED_METRIC_TOTAL: num_logs_with_defect_types - })) + if self.app_config.amqpUrl: + AmqpClient(self.app_config.amqpUrl).send_to_inner_queue( + self.app_config.exchangeName, 'train_models', + TrainInfo(model_type=ModelType.defect_type, project=project, + gathered_metric_total=num_logs_with_defect_types).json()) except Exception as exc: logger.exception(exc) logger.info("Finished indexing logs for %d launches %s. It took %.2f sec.", @@ -334,10 +327,10 @@ def _recreate_index_if_needed(self, bodies, formatted_exception): def _bulk_index(self, bodies, refresh=True, chunk_size=None): if not bodies: - return launch_objects.BulkResponse(took=0, errors=False) + return BulkResponse(took=0, errors=False) start_time = time() logger.debug("Indexing %d logs...", len(bodies)) - es_chunk_number = self.app_config["esChunkNumber"] + es_chunk_number = self.app_config.esChunkNumber if chunk_size is not None: es_chunk_number = chunk_size try: @@ -360,17 +353,16 @@ def _bulk_index(self, bodies, refresh=True, chunk_size=None): if errors: logger.debug("Occured errors %s", errors) logger.debug("Finished indexing for %.2f s", time() - start_time) - return launch_objects.BulkResponse(took=success_count, errors=len(errors) > 0) + return BulkResponse(took=success_count, errors=len(errors) > 0) except Exception as exc: logger.error("Error in bulk") logger.error("ES Url %s", text_processing.remove_credentials_from_url(self.host)) logger.exception(exc) - return launch_objects.BulkResponse(took=0, errors=True) + return BulkResponse(took=0, errors=True) def delete_logs(self, clean_index): """Delete logs from elasticsearch""" - index_name = text_processing.unite_project_name( - str(clean_index.project), self.app_config["esProjectIndexPrefix"]) + index_name = text_processing.unite_project_name(clean_index.project, self.app_config.esProjectIndexPrefix) logger.info("Delete logs %s for the project %s", clean_index.ids, index_name) logger.info("ES Url %s", text_processing.remove_credentials_from_url(self.host)) @@ -447,7 +439,7 @@ def send_stats_info(self, stats_info): def get_test_items_by_ids_query(self, test_item_ids): return {"_source": ["test_item"], - "size": self.app_config["esChunkNumber"], + "size": self.app_config.esChunkNumber, "query": { "bool": { "filter": [ @@ -463,7 +455,7 @@ def defect_update(self, defect_update_info): defect_update_info["itemsToUpdate"] = { int(key_): val for key_, val in defect_update_info["itemsToUpdate"].items()} index_name = text_processing.unite_project_name( - str(defect_update_info["project"]), self.app_config["esProjectIndexPrefix"]) + defect_update_info['project'], self.app_config.esProjectIndexPrefix) if not self.index_exists(index_name): return test_item_ids batch_size = 1000 @@ -496,9 +488,9 @@ def defect_update(self, defect_update_info): self._bulk_index(log_update_queries) items_not_updated = list(set(test_item_ids) - found_test_items) logger.debug("Not updated test items: %s", items_not_updated) - if "amqpUrl" in self.app_config and self.app_config["amqpUrl"].strip(): - AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue( - self.app_config["exchangeName"], "update_suggest_info", json.dumps(defect_update_info)) + if self.app_config.amqpUrl: + AmqpClient(self.app_config.amqpUrl).send_to_inner_queue( + self.app_config.exchangeName, "update_suggest_info", json.dumps(defect_update_info)) logger.info("Finished updating defect types. It took %.2f sec", time() - t_start) return items_not_updated @@ -517,7 +509,7 @@ def remove_test_items(self, remove_items_info): logger.info("Started removing test items") t_start = time() index_name = text_processing.unite_project_name( - str(remove_items_info["project"]), self.app_config["esProjectIndexPrefix"]) + str(remove_items_info["project"]), self.app_config.esProjectIndexPrefix) deleted_logs = self.delete_by_query( index_name, remove_items_info["itemsToDelete"], self.build_delete_query_by_test_items) logger.debug("Removed %s logs by test item ids", deleted_logs) @@ -530,9 +522,7 @@ def remove_launches(self, remove_launches_info): launch_ids = remove_launches_info["launch_ids"] logger.info("Started removing launches") t_start = time() - index_name = text_processing.unite_project_name( - str(project), self.app_config["esProjectIndexPrefix"] - ) + index_name = text_processing.unite_project_name(project, self.app_config.esProjectIndexPrefix) deleted_logs = self.delete_by_query( index_name, launch_ids, @@ -567,18 +557,16 @@ def __time_range_query( ) -> dict: query = {"query": {"range": {time_field: {"gte": gte_time, "lte": lte_time}}}} if for_scan: - query["size"] = self.app_config["esChunkNumber"] + query["size"] = self.app_config.esChunkNumber return query @utils.ignore_warnings def get_launch_ids_by_start_time_range( self, project: int, start_date: str, end_date: str ) -> list[str]: - index_name = text_processing.unite_project_name( - str(project), self.app_config["esProjectIndexPrefix"] - ) + index_name = text_processing.unite_project_name(project, self.app_config.esProjectIndexPrefix) query = self.__time_range_query( - "launch_start_time", start_date, end_date, for_scan=True + 'launch_start_time', start_date, end_date, for_scan=True ) launch_ids = set() for log in elasticsearch.helpers.scan( @@ -591,9 +579,7 @@ def get_launch_ids_by_start_time_range( def remove_by_launch_start_time_range( self, project: int, start_date: str, end_date: str ) -> int: - index_name = text_processing.unite_project_name( - str(project), self.app_config["esProjectIndexPrefix"] - ) + index_name = text_processing.unite_project_name(project, self.app_config.esProjectIndexPrefix) query = self.__time_range_query("launch_start_time", start_date, end_date) delete_response = self.es_client.delete_by_query(index_name, body=query) return delete_response["deleted"] @@ -602,9 +588,7 @@ def remove_by_launch_start_time_range( def get_log_ids_by_log_time_range( self, project: int, start_date: str, end_date: str ) -> list[str]: - index_name = text_processing.unite_project_name( - str(project), self.app_config["esProjectIndexPrefix"] - ) + index_name = text_processing.unite_project_name(project, self.app_config.esProjectIndexPrefix) query = self.__time_range_query("log_time", start_date, end_date, for_scan=True) log_ids = set() for log in elasticsearch.helpers.scan( @@ -617,9 +601,7 @@ def get_log_ids_by_log_time_range( def remove_by_log_time_range( self, project: int, start_date: str, end_date: str ) -> int: - index_name = text_processing.unite_project_name( - str(project), self.app_config["esProjectIndexPrefix"] - ) + index_name = text_processing.unite_project_name(project, self.app_config.esProjectIndexPrefix) query = self.__time_range_query("log_time", start_date, end_date) delete_response = self.es_client.delete_by_query(index_name, body=query) return delete_response["deleted"] diff --git a/app/commons/log_merger.py b/app/commons/log_merger.py index 408e48b7..01513985 100644 --- a/app/commons/log_merger.py +++ b/app/commons/log_merger.py @@ -12,11 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from app.utils import text_processing import copy +from typing import Any, Optional + +from app.utils import text_processing class LogMerger: + fields_to_clean: list[str] + fields_to_merge: list[str] def __init__(self): self.fields_to_clean = ["message", "detected_message", @@ -31,9 +35,11 @@ def __init__(self): "paths", "message_params", "detected_message_without_params_extended", "whole_message"] - def merge_big_and_small_logs(self, logs, log_level_ids_to_add, - log_level_messages, log_level_ids_merged, logs_ids_in_merged_logs): - """Merge big message logs with small ones""" + def merge_big_and_small_logs( + self, logs: list[dict[str, Any]], log_level_ids_to_add: dict[int, list[int]], + log_level_messages: dict[str, dict[int, str]], log_level_ids_merged: dict[int, dict[str, Any]], + logs_ids_in_merged_logs: dict[int, list[int]]) -> tuple[list[dict[str, Any]], dict[str, list[int]]]: + """Merge big message logs with small ones.""" new_logs = [] for log in logs: if not log["_source"]["message"].strip(): @@ -47,14 +53,11 @@ def merge_big_and_small_logs(self, logs, log_level_ids_to_add, log_ids_for_merged_logs = {} for log_level in log_level_messages["message"]: - - if not log_level_ids_to_add[log_level] and\ - log_level_messages["message"][log_level].strip(): + if not log_level_ids_to_add[log_level] and log_level_messages["message"][log_level].strip(): log = log_level_ids_merged[log_level] merged_logs_id = str(log["_id"]) + "_m" new_log = self.prepare_new_log( - log, merged_logs_id, True, - text_processing.compress(log_level_messages["message"][log_level]), + log, merged_logs_id, True, text_processing.compress(log_level_messages["message"][log_level]), fields_to_clean=self.fields_to_clean) log_ids_for_merged_logs[merged_logs_id] = logs_ids_in_merged_logs[log_level] for field in log_level_messages: @@ -71,8 +74,9 @@ def merge_big_and_small_logs(self, logs, log_level_ids_to_add, new_logs.append(new_log) return new_logs, log_ids_for_merged_logs - def decompose_logs_merged_and_without_duplicates(self, logs): - """Merge big logs with small ones without duplcates""" + def decompose_logs_merged_and_without_duplicates( + self, logs: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], dict[str, list[int]]]: + """Merge big logs with small ones without duplicates.""" log_level_messages = {} for field in self.fields_to_merge: log_level_messages[field] = {} @@ -95,7 +99,7 @@ def decompose_logs_merged_and_without_duplicates(self, logs): if log_level not in logs_unique_log_level: logs_unique_log_level[log_level] = set() - if log["_source"]["original_message_lines"] <= 2 and\ + if log["_source"]["original_message_lines"] <= 2 and \ log["_source"]["original_message_words_number"] <= 100: if log_level not in log_level_ids_merged: log_level_ids_merged[log_level] = log @@ -115,18 +119,18 @@ def decompose_logs_merged_and_without_duplicates(self, logs): for field in log_level_messages: if field in log["_source"]: - splitter = "\r\n" if field in ["message", "whole_message"] else " " - log_level_messages[field][log_level] =\ + splitter = "\n" if field in ["message", "whole_message"] else " " + log_level_messages[field][log_level] = \ log_level_messages[field][log_level] + log["_source"][field] + splitter else: log_level_ids_to_add[log_level].append(log["_id"]) - return self.merge_big_and_small_logs(logs, log_level_ids_to_add, - log_level_messages, log_level_ids_merged, - logs_ids_in_merged_logs) + return self.merge_big_and_small_logs( + logs, log_level_ids_to_add, log_level_messages, log_level_ids_merged, logs_ids_in_merged_logs) - def prepare_new_log(self, old_log, new_id, is_merged, merged_small_logs, fields_to_clean=None): + def prepare_new_log(self, old_log: dict[str, Any], new_id, is_merged: bool, merged_small_logs: str, + fields_to_clean: Optional[list[str]] = None) -> dict[str, Any]: """Prepare updated log""" merged_log = copy.deepcopy(old_log) merged_log["_source"]["is_merged"] = is_merged diff --git a/app/commons/log_preparation.py b/app/commons/log_preparation.py deleted file mode 100644 index 3916c139..00000000 --- a/app/commons/log_preparation.py +++ /dev/null @@ -1,294 +0,0 @@ -# Copyright 2023 EPAM Systems -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from app.utils import utils, text_processing -from datetime import datetime -from app.commons.log_merger import LogMerger - - -class LogPreparation: - - def __init__(self): - self.log_merger = LogMerger() - - def clean_message(self, message): - # FIXME: unify line endings to '\n' to apply optimizations - # cleaned_message = text_processing.unify_line_endings(message) - cleaned_message = text_processing.replace_tabs_for_newlines(message) - cleaned_message = text_processing.fix_big_encoded_urls(cleaned_message) - cleaned_message = text_processing.remove_generated_parts(cleaned_message) - cleaned_message = text_processing.remove_guid_uids_from_text(cleaned_message) - cleaned_message = text_processing.clean_html(cleaned_message) - cleaned_message = text_processing.delete_empty_lines(cleaned_message) - cleaned_message = text_processing.leave_only_unique_lines(cleaned_message) - return cleaned_message - - def _create_log_template(self) -> dict: - return { - "_id": "", - "_index": "", - "_source": { - "launch_id": "", - "launch_name": "", - "launch_number": 0, - "launch_start_time": "", - "test_item": "", - "test_item_name": "", - "unique_id": "", - "cluster_id": "", - "cluster_message": "", - "test_case_hash": 0, - "is_auto_analyzed": False, - "issue_type": "", - "log_time": "", - "log_level": 0, - "original_message_lines": 0, - "original_message_words_number": 0, - "message": "", - "is_merged": False, - "start_time": "", - "merged_small_logs": "", - "detected_message": "", - "detected_message_with_numbers": "", - "stacktrace": "", - "only_numbers": "", - "found_exceptions": "", - "whole_message": "", - "potential_status_codes": "", - "found_tests_and_methods": "", - "cluster_with_numbers": False}} - - def transform_issue_type_into_lowercase(self, issue_type): - return issue_type[:2].lower() + issue_type[2:] - - def _fill_launch_test_item_fields(self, log_template, launch, test_item, project): - log_template["_index"] = project - log_template["_source"]["launch_id"] = launch.launchId - log_template["_source"]["launch_name"] = launch.launchName - log_template["_source"]["launch_number"] = getattr(launch, 'launchNumber', 0) - log_template["_source"]["launch_start_time"] = datetime( - *launch.launchStartTime[:6]).strftime("%Y-%m-%d %H:%M:%S") - log_template["_source"]["test_item"] = test_item.testItemId - log_template["_source"]["unique_id"] = test_item.uniqueId - log_template["_source"]["test_case_hash"] = test_item.testCaseHash - log_template["_source"]["is_auto_analyzed"] = test_item.isAutoAnalyzed - log_template["_source"]["test_item_name"] = text_processing.preprocess_test_item_name(test_item.testItemName) - log_template["_source"]["issue_type"] = self.transform_issue_type_into_lowercase( - test_item.issueType) - log_template["_source"]["start_time"] = datetime( - *test_item.startTime[:6]).strftime("%Y-%m-%d %H:%M:%S") - return log_template - - def _fill_log_fields(self, log_template, log, number_of_lines): - cleaned_message = self.clean_message(log.message) - - test_and_methods = text_processing.find_test_methods_in_text(cleaned_message) - message = text_processing.first_lines(cleaned_message, number_of_lines) - message = text_processing.replace_text_pieces(message, test_and_methods) - message_without_params = message - message = text_processing.delete_empty_lines(text_processing.sanitize_text(message)) - - message_without_params = text_processing.clean_from_urls(message_without_params) - message_without_params = text_processing.clean_from_paths(message_without_params) - message_without_params = text_processing.clean_from_params(message_without_params) - message_without_params = text_processing.remove_starting_datetime(message_without_params) - message_without_params = text_processing.sanitize_text(message_without_params) - message_without_params_and_brackets = text_processing.clean_from_brackets(message_without_params) - - detected_message, stacktrace = text_processing.detect_log_description_and_stacktrace(cleaned_message) - - detected_message_without_params = detected_message - urls = " ".join(text_processing.extract_urls(detected_message_without_params)) - detected_message_without_params = text_processing.clean_from_urls(detected_message_without_params) - paths = " ".join(text_processing.extract_paths(detected_message_without_params)) - detected_message_without_params = text_processing.clean_from_paths(detected_message_without_params) - potential_status_codes = " ".join( - text_processing.get_potential_status_codes(detected_message_without_params)) - detected_message_without_params = text_processing.replace_text_pieces( - detected_message_without_params, test_and_methods) - detected_message = text_processing.replace_text_pieces(detected_message, test_and_methods) - detected_message_without_params = text_processing.remove_starting_datetime(detected_message_without_params) - detected_message_wo_urls_and_paths = detected_message_without_params - - message_params = " ".join(text_processing.extract_message_params(detected_message_without_params)) - detected_message_without_params = text_processing.clean_from_params(detected_message_without_params) - detected_message_without_params = text_processing.sanitize_text(detected_message_without_params) - detected_message_without_params_and_brackets = text_processing.clean_from_brackets( - detected_message_without_params) - - detected_message_with_numbers = text_processing.remove_starting_datetime(detected_message) - detected_message_only_numbers = text_processing.find_only_numbers(detected_message_with_numbers) - detected_message = text_processing.sanitize_text(detected_message) - stacktrace = text_processing.sanitize_text(stacktrace) - found_exceptions = text_processing.get_found_exceptions(detected_message) - found_exceptions_extended = text_processing.enrich_found_exceptions(found_exceptions) - found_test_methods = text_processing.enrich_text_with_method_and_classes(" ".join(test_and_methods)) - - log_template["_id"] = log.logId - log_template["_source"]["log_time"] = datetime(*log.logTime[:6]).strftime("%Y-%m-%d %H:%M:%S") - log_template["_source"]["cluster_id"] = str(log.clusterId) - log_template["_source"]["cluster_message"] = log.clusterMessage - log_template["_source"]["cluster_with_numbers"] = utils.extract_clustering_setting(log.clusterId) - log_template["_source"]["log_level"] = log.logLevel - log_template["_source"]["original_message_lines"] = text_processing.calculate_line_number(cleaned_message) - log_template["_source"]["original_message_words_number"] = len( - text_processing.split_words(cleaned_message, split_urls=False)) - log_template["_source"]["message"] = message - log_template["_source"]["detected_message"] = detected_message - log_template["_source"]["detected_message_with_numbers"] = detected_message_with_numbers - log_template["_source"]["stacktrace"] = stacktrace - log_template["_source"]["only_numbers"] = detected_message_only_numbers - log_template["_source"]["urls"] = urls - log_template["_source"]["paths"] = paths - log_template["_source"]["message_params"] = message_params - log_template["_source"]["found_exceptions"] = found_exceptions - log_template["_source"]["found_exceptions_extended"] = found_exceptions_extended - log_template["_source"]["detected_message_extended"] =\ - text_processing.enrich_text_with_method_and_classes(detected_message) - log_template["_source"]["detected_message_without_params_extended"] =\ - text_processing.enrich_text_with_method_and_classes(detected_message_without_params) - log_template["_source"]["stacktrace_extended"] =\ - text_processing.enrich_text_with_method_and_classes(stacktrace) - log_template["_source"]["message_extended"] =\ - text_processing.enrich_text_with_method_and_classes(message) - log_template["_source"]["message_without_params_extended"] =\ - text_processing.enrich_text_with_method_and_classes(message_without_params) - log_template["_source"]["whole_message"] = detected_message_wo_urls_and_paths + " \n " + stacktrace - log_template["_source"]["detected_message_without_params_and_brackets"] =\ - detected_message_without_params_and_brackets - log_template["_source"]["message_without_params_and_brackets"] =\ - message_without_params_and_brackets - log_template["_source"]["potential_status_codes"] =\ - potential_status_codes - log_template["_source"]["found_tests_and_methods"] = found_test_methods - - for field in ["message", "detected_message", "detected_message_with_numbers", - "stacktrace", "only_numbers", "found_exceptions", "found_exceptions_extended", - "detected_message_extended", "detected_message_without_params_extended", - "stacktrace_extended", "message_extended", "message_without_params_extended", - "detected_message_without_params_and_brackets", - "message_without_params_and_brackets"]: - log_template["_source"][field] = text_processing.leave_only_unique_lines(log_template["_source"][field]) - log_template["_source"][field] = text_processing.clean_colon_stacking(log_template["_source"][field]) - return log_template - - def _prepare_log(self, launch, test_item, log, project) -> dict: - log_template = self._create_log_template() - log_template = self._fill_launch_test_item_fields(log_template, launch, test_item, project) - log_template = self._fill_log_fields(log_template, log, launch.analyzerConfig.numberOfLogLines) - return log_template - - def _fill_test_item_info_fields(self, log_template, test_item_info, project): - log_template["_index"] = project - log_template["_source"]["launch_id"] = test_item_info.launchId - log_template["_source"]["launch_name"] = test_item_info.launchName - log_template["_source"]["launch_number"] = getattr(test_item_info, 'launchNumber', 0) - log_template["_source"]["test_item"] = test_item_info.testItemId - log_template["_source"]["unique_id"] = test_item_info.uniqueId - log_template["_source"]["test_case_hash"] = test_item_info.testCaseHash - log_template["_source"]["test_item_name"] = text_processing.preprocess_test_item_name( - test_item_info.testItemName) - log_template["_source"]["is_auto_analyzed"] = False - log_template["_source"]["issue_type"] = "" - log_template["_source"]["start_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - return log_template - - def _prepare_log_for_suggests(self, test_item_info, log, project): - log_template = self._create_log_template() - log_template = self._fill_test_item_info_fields(log_template, test_item_info, project) - log_template = self._fill_log_fields( - log_template, log, test_item_info.analyzerConfig.numberOfLogLines) - return log_template - - def prepare_log_words(self, launches): - log_words = {} - project = None - for launch in launches: - project = str(launch.project) - for test_item in launch.testItems: - for log in test_item.logs: - - if log.logLevel < utils.ERROR_LOGGING_LEVEL or not log.message.strip(): - continue - clean_message = self.clean_message(log.message) - det_message, stacktrace = text_processing.detect_log_description_and_stacktrace( - clean_message) - for word in text_processing.split_words(stacktrace): - if "." in word and len(word.split(".")) > 2: - log_words[word] = 1 - return log_words, project - - def prepare_log_clustering_light(self, launch, test_item, log, project): - log_template = self._create_log_template() - log_template = self._fill_launch_test_item_fields(log_template, launch, test_item, project) - cleaned_message = self.clean_message(log.message) - detected_message, stacktrace = text_processing.detect_log_description_and_stacktrace( - cleaned_message) - test_and_methods = text_processing.find_test_methods_in_text(cleaned_message) - detected_message = text_processing.replace_text_pieces(detected_message, test_and_methods) - stacktrace = text_processing.sanitize_text(stacktrace) - message = text_processing.first_lines(cleaned_message, -1) - message = text_processing.sanitize_text(message) - log_template["_id"] = log.logId - log_template["_source"]["cluster_id"] = str(log.clusterId) - log_template["_source"]["cluster_message"] = log.clusterMessage - log_template["_source"]["log_level"] = log.logLevel - log_template["_source"]["original_message_lines"] = text_processing.calculate_line_number( - cleaned_message) - log_template["_source"]["original_message_words_number"] = len( - text_processing.split_words(cleaned_message, split_urls=False)) - detected_message_wo_urls_and_paths = text_processing.clean_from_urls(detected_message) - detected_message_wo_urls_and_paths = text_processing.clean_from_paths(detected_message_wo_urls_and_paths) - detected_message_wo_urls_and_paths = text_processing.remove_starting_datetime( - detected_message_wo_urls_and_paths) - log_template["_source"]["message"] = message - log_template["_source"]["detected_message"] = detected_message_wo_urls_and_paths - log_template["_source"]["detected_message_with_numbers"] = detected_message_wo_urls_and_paths - log_template["_source"]["stacktrace"] = stacktrace - potential_status_codes = " ".join( - text_processing.get_potential_status_codes(detected_message_wo_urls_and_paths)) - log_template["_source"]["potential_status_codes"] = potential_status_codes - log_template["_source"]["found_exceptions"] = text_processing.get_found_exceptions(detected_message) - log_template["_source"]["whole_message"] = text_processing.delete_empty_lines( - detected_message_wo_urls_and_paths + " \n " + stacktrace) - return log_template - - def prepare_logs_for_clustering(self, launch, number_of_lines, clean_numbers, project): - log_messages = [] - log_dict = {} - ind = 0 - full_log_ids_for_merged_logs = {} - for test_item in launch.testItems: - prepared_logs = [] - for log in test_item.logs: - if log.logLevel < utils.ERROR_LOGGING_LEVEL: - continue - prepared_logs.append( - self.prepare_log_clustering_light(launch, test_item, log, project)) - merged_logs, log_ids_for_merged_logs = self.log_merger.decompose_logs_merged_and_without_duplicates( # noqa - prepared_logs) - for _id in log_ids_for_merged_logs: - full_log_ids_for_merged_logs[_id] = log_ids_for_merged_logs[_id] - for log in merged_logs: - number_of_log_lines = number_of_lines - if log["_source"]["is_merged"]: - number_of_log_lines = -1 - log_message = text_processing.prepare_message_for_clustering( - log["_source"]["whole_message"], number_of_log_lines, clean_numbers) - if not log_message.strip(): - continue - log_messages.append(log_message) - log_dict[ind] = log - ind += 1 - return log_messages, log_dict, full_log_ids_for_merged_logs diff --git a/app/commons/log_requests.py b/app/commons/log_requests.py new file mode 100644 index 00000000..3744f388 --- /dev/null +++ b/app/commons/log_requests.py @@ -0,0 +1,240 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime +from typing import Any + +from app.commons.model.launch_objects import Launch, TestItem, Log, TestItemInfo +from app.commons.log_merger import LogMerger +from app.commons.prepared_log import PreparedLogMessage +from app.utils import utils, text_processing +from app.utils.log_preparation import basic_prepare + + +def create_log_template() -> dict: + return { + "_id": "", + "_index": "", + "_source": { + "launch_id": "", + "launch_name": "", + "launch_number": 0, + "launch_start_time": "", + "test_item": "", + "test_item_name": "", + "unique_id": "", + "cluster_id": "", + "cluster_message": "", + "test_case_hash": 0, + "is_auto_analyzed": False, + "issue_type": "", + "log_time": "", + "log_level": 0, + 'original_message': '', + "original_message_lines": 0, + "original_message_words_number": 0, + "message": "", + "is_merged": False, + "start_time": "", + "merged_small_logs": "", + "detected_message": "", + "detected_message_with_numbers": "", + "stacktrace": "", + "only_numbers": "", + "found_exceptions": "", + "whole_message": "", + "potential_status_codes": "", + "found_tests_and_methods": "", + "cluster_with_numbers": False + } + } + + +class LogRequests: + + def __init__(self): + self.log_merger = LogMerger() + + @staticmethod + def transform_issue_type_into_lowercase(issue_type): + return issue_type[:2].lower() + issue_type[2:] + + @staticmethod + def _fill_launch_test_item_fields(log_template: dict, launch: Launch, test_item: TestItem, project: str): + log_template["_index"] = project + log_template["_source"]["launch_id"] = launch.launchId + log_template["_source"]["launch_name"] = launch.launchName + log_template["_source"]["launch_number"] = getattr(launch, 'launchNumber', 0) + log_template["_source"]["launch_start_time"] = datetime( + *launch.launchStartTime[:6]).strftime("%Y-%m-%d %H:%M:%S") + log_template["_source"]["test_item"] = test_item.testItemId + log_template["_source"]["unique_id"] = test_item.uniqueId + log_template["_source"]["test_case_hash"] = test_item.testCaseHash + log_template["_source"]["is_auto_analyzed"] = test_item.isAutoAnalyzed + log_template["_source"]["test_item_name"] = text_processing.preprocess_test_item_name(test_item.testItemName) + log_template["_source"]["issue_type"] = LogRequests.transform_issue_type_into_lowercase(test_item.issueType) + log_template["_source"]["start_time"] = datetime(*test_item.startTime[:6]).strftime("%Y-%m-%d %H:%M:%S") + return log_template + + @staticmethod + def _fill_log_fields(log_template: dict, log: Log, number_of_lines: int) -> dict[str, Any]: + prepared_log = PreparedLogMessage(log.message, number_of_lines) + log_template["_id"] = log.logId + log_template["_source"]["log_time"] = datetime(*log.logTime[:6]).strftime("%Y-%m-%d %H:%M:%S") + log_template["_source"]["cluster_id"] = str(log.clusterId) + log_template["_source"]["cluster_message"] = log.clusterMessage + log_template["_source"]["cluster_with_numbers"] = utils.extract_clustering_setting(log.clusterId) + log_template["_source"]["log_level"] = log.logLevel + log_template["_source"]['original_message'] = log.message + log_template["_source"]["original_message_lines"] = text_processing.calculate_line_number( + prepared_log.clean_message) + log_template["_source"]["original_message_words_number"] = len( + text_processing.split_words(prepared_log.clean_message, split_urls=False)) + log_template["_source"]["message"] = prepared_log.message + log_template["_source"]["detected_message"] = prepared_log.exception_message_no_numbers + log_template["_source"]["detected_message_with_numbers"] = prepared_log.exception_message + log_template["_source"]["stacktrace"] = prepared_log.stacktrace + log_template["_source"]["only_numbers"] = prepared_log.exception_message_numbers + log_template["_source"]["urls"] = prepared_log.exception_message_urls + log_template["_source"]["paths"] = prepared_log.exception_message_paths + log_template["_source"]["message_params"] = prepared_log.exception_message_params + log_template["_source"]["found_exceptions"] = prepared_log.exception_found + log_template["_source"]["found_exceptions_extended"] = prepared_log.exception_found_extended + log_template["_source"]["detected_message_extended"] = \ + text_processing.enrich_text_with_method_and_classes(prepared_log.exception_message) + log_template["_source"]["detected_message_without_params_extended"] = \ + text_processing.enrich_text_with_method_and_classes(prepared_log.exception_message_no_params) + log_template["_source"]["stacktrace_extended"] = \ + text_processing.enrich_text_with_method_and_classes(prepared_log.stacktrace) + log_template["_source"]["message_extended"] = \ + text_processing.enrich_text_with_method_and_classes(prepared_log.message) + log_template["_source"]["message_without_params_extended"] = \ + text_processing.enrich_text_with_method_and_classes(prepared_log.message_no_params) + log_template["_source"]["whole_message"] = (prepared_log.exception_message_no_params + "\n" + + prepared_log.stacktrace) + log_template["_source"]["detected_message_without_params_and_brackets"] = \ + prepared_log.exception_message_no_params + log_template["_source"]["message_without_params_and_brackets"] = prepared_log.message_no_params + log_template["_source"]["potential_status_codes"] = prepared_log.exception_message_potential_status_codes + log_template["_source"]["found_tests_and_methods"] = prepared_log.test_and_methods_extended + + for field in ["message", "detected_message", "detected_message_with_numbers", + "stacktrace", "only_numbers", "found_exceptions", "found_exceptions_extended", + "detected_message_extended", "detected_message_without_params_extended", + "stacktrace_extended", "message_extended", "message_without_params_extended", + "detected_message_without_params_and_brackets", "message_without_params_and_brackets"]: + log_template["_source"][field] = text_processing.leave_only_unique_lines(log_template["_source"][field]) + log_template["_source"][field] = text_processing.clean_colon_stacking(log_template["_source"][field]) + return log_template + + @staticmethod + def _prepare_log(launch: Launch, test_item: TestItem, log: Log, project: str) -> dict: + log_template = create_log_template() + log_template = LogRequests._fill_launch_test_item_fields(log_template, launch, test_item, project) + log_template = LogRequests._fill_log_fields(log_template, log, launch.analyzerConfig.numberOfLogLines) + return log_template + + @staticmethod + def _fill_test_item_info_fields(log_template: dict, test_item_info: TestItemInfo, project: str) -> dict[str, Any]: + log_template["_index"] = project + log_template["_source"]["launch_id"] = test_item_info.launchId + log_template["_source"]["launch_name"] = test_item_info.launchName + log_template["_source"]["launch_number"] = getattr(test_item_info, 'launchNumber', 0) + log_template["_source"]["test_item"] = test_item_info.testItemId + log_template["_source"]["unique_id"] = test_item_info.uniqueId + log_template["_source"]["test_case_hash"] = test_item_info.testCaseHash + log_template["_source"]["test_item_name"] = text_processing.preprocess_test_item_name( + test_item_info.testItemName) + log_template["_source"]["is_auto_analyzed"] = False + log_template["_source"]["issue_type"] = "" + log_template["_source"]["start_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + return log_template + + @staticmethod + def _prepare_log_for_suggests(test_item_info: TestItemInfo, log: Log, project: str) -> dict: + log_template = create_log_template() + log_template = LogRequests._fill_test_item_info_fields(log_template, test_item_info, project) + log_template = LogRequests._fill_log_fields( + log_template, log, test_item_info.analyzerConfig.numberOfLogLines) + return log_template + + @staticmethod + def prepare_log_words(launches: list[Launch]) -> tuple[dict[str, int], int]: + log_words = {} + project = None + for launch in launches: + project = launch.project + for test_item in launch.testItems: + for log in test_item.logs: + if log.logLevel < utils.ERROR_LOGGING_LEVEL or not log.message.strip(): + continue + cleaned_message = basic_prepare(log.message) + det_message, stacktrace = text_processing.detect_log_description_and_stacktrace(cleaned_message) + for word in text_processing.split_words(stacktrace): + if '.' in word and len(word.split('.')) > 2: + log_words[word] = 1 + return log_words, project + + @staticmethod + def prepare_log_clustering_light(launch: Launch, test_item: TestItem, log: Log, project: str) -> dict[str, Any]: + log_template = create_log_template() + log_template = LogRequests._fill_launch_test_item_fields(log_template, launch, test_item, project) + prepared_log = PreparedLogMessage(log.message, -1) + log_template["_id"] = log.logId + log_template["_source"]["cluster_id"] = str(log.clusterId) + log_template["_source"]["cluster_message"] = log.clusterMessage + log_template["_source"]["log_level"] = log.logLevel + log_template["_source"]['original_message'] = log.message + log_template["_source"]["original_message_lines"] = text_processing.calculate_line_number( + prepared_log.clean_message) + log_template["_source"]["original_message_words_number"] = len( + text_processing.split_words(prepared_log.clean_message, split_urls=False)) + log_template["_source"]["message"] = text_processing.remove_numbers(prepared_log.message) + log_template["_source"]["detected_message"] = prepared_log.exception_message_no_numbers + log_template["_source"]["detected_message_with_numbers"] = prepared_log.exception_message + log_template["_source"]["stacktrace"] = prepared_log.stacktrace + log_template["_source"]["potential_status_codes"] = prepared_log.exception_message_potential_status_codes + log_template["_source"]["found_exceptions"] = prepared_log.exception_found + log_template["_source"]["whole_message"] = (prepared_log.exception_message_no_params + "\n" + + prepared_log.stacktrace) + return log_template + + def prepare_logs_for_clustering(self, launch: Launch, number_of_lines: int, clean_numbers: bool, + project: str) -> tuple[list[str], dict[int, dict[str, Any]], dict[str, list[int]]]: + log_messages = [] + log_dict = {} + ind = 0 + full_log_ids_for_merged_logs = {} + for test_item in launch.testItems: + prepared_logs = [] + for log in test_item.logs: + if log.logLevel < utils.ERROR_LOGGING_LEVEL: + continue + prepared_logs.append(LogRequests.prepare_log_clustering_light(launch, test_item, log, project)) + merged_logs, log_ids_for_merged_logs = self.log_merger.decompose_logs_merged_and_without_duplicates( + prepared_logs) + for _id, merged_list in log_ids_for_merged_logs.items(): + full_log_ids_for_merged_logs[_id] = merged_list + for log in merged_logs: + number_of_log_lines = number_of_lines + if log["_source"]["is_merged"]: + number_of_log_lines = -1 + log_message = text_processing.prepare_message_for_clustering( + log["_source"]["whole_message"], number_of_log_lines, clean_numbers) + if not log_message.strip(): + continue + log_messages.append(log_message) + log_dict[ind] = log + ind += 1 + return log_messages, log_dict, full_log_ids_for_merged_logs diff --git a/app/commons/logging.py b/app/commons/logging.py new file mode 100644 index 00000000..c24bce25 --- /dev/null +++ b/app/commons/logging.py @@ -0,0 +1,89 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Logging adapter to add correlation id to each log entry which Analyzer outputs.""" + +import base64 +import logging +import uuid +from threading import local + +__INSTANCES = local() + + +class Logger: + __logger: logging.Logger + + def __init__(self, logger: logging.Logger): + self.__logger = logger + + def debug(self, msg, *args, **kwargs): + """ + Delegate a debug call to the underlying logger. + """ + kwargs['extra'] = {'correlation_id': get_correlation_id()} + self.__logger.debug(msg, *args, **kwargs) + + def info(self, msg, *args, **kwargs): + """ + Delegate an info call to the underlying logger. + """ + kwargs['extra'] = {'correlation_id': get_correlation_id()} + self.__logger.info(msg, *args, **kwargs) + + def warning(self, msg, *args, **kwargs): + """ + Delegate a warning call to the underlying logger. + """ + kwargs['extra'] = {'correlation_id': get_correlation_id()} + self.__logger.warning(msg, *args, **kwargs) + + def error(self, msg, *args, **kwargs): + """ + Delegate an error call to the underlying logger. + """ + kwargs['extra'] = {'correlation_id': get_correlation_id()} + self.__logger.error(msg, *args, **kwargs) + + def exception(self, msg, *args, exc_info=True, **kwargs): + """ + Delegate an exception call to the underlying logger. + """ + kwargs['extra'] = {'correlation_id': get_correlation_id()} + self.__logger.error(msg, *args, exc_info=exc_info, **kwargs) + + def critical(self, msg, *args, **kwargs): + """ + Delegate a critical call to the underlying logger. + """ + kwargs['extra'] = {'correlation_id': get_correlation_id()} + self.__logger.critical(msg, *args, **kwargs) + + +def new_correlation_id() -> str: + corr_id = base64.urlsafe_b64encode(uuid.uuid4().bytes).decode('utf-8').rstrip('=') + __INSTANCES.correlation_id = corr_id + return corr_id + + +def get_correlation_id() -> str: + corr_id = getattr(__INSTANCES, 'correlation_id', None) + if corr_id is None: + corr_id = new_correlation_id() + return corr_id + + +# noinspection PyPep8Naming +def getLogger(logger_name: str) -> Logger: + return Logger(logging.getLogger(logger_name)) diff --git a/app/commons/model/__init__.py b/app/commons/model/__init__.py new file mode 100644 index 00000000..2c4530c5 --- /dev/null +++ b/app/commons/model/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/app/commons/launch_objects.py b/app/commons/model/launch_objects.py similarity index 53% rename from app/commons/launch_objects.py rename to app/commons/model/launch_objects.py index b4c4c1d1..0c7d30f0 100644 --- a/app/commons/launch_objects.py +++ b/app/commons/model/launch_objects.py @@ -12,15 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List -from pydantic import BaseModel from datetime import datetime +from typing import Optional + +from pydantic import BaseModel class AnalyzerConf(BaseModel): """Analyzer config object""" - analyzerMode: str = "ALL" - minShouldMatch: int = 0 + analyzerMode: str = 'ALL' + minShouldMatch: int = 80 numberOfLogLines: int = -1 isAutoAnalyzerEnabled: bool = True indexingRunning: bool = True @@ -29,6 +30,76 @@ class AnalyzerConf(BaseModel): uniqueErrorsMinShouldMatch: int = 95 +class ApplicationConfig(BaseModel): + esHost: str = '' + esUser: str = '' + esPassword: str = '' + logLevel: str = 'DEBUG' + amqpUrl: str = '' + exchangeName: str = 'analyzer' + analyzerPriority: int = 1 + analyzerIndex: bool = True + analyzerLogSearch: bool = True + analyzerSuggest: bool = True + analyzerCluster: bool = True + turnOffSslVerification: bool = False + esVerifyCerts: bool = False + esUseSsl: bool = False + esSslShowWarn: bool = False + esCAcert: str = '' + esClientCert: str = '' + esClientKey: str = '' + minioHost: str = 'minio:9000' + minioAccessKey: str = 'minio' + minioSecretKey: str = 'minio123' + minioUseTls: bool = False + appVersion: str = '' + binaryStoreType: str = 'filesystem' + minioBucketPrefix: str = 'prj-' + minioRegion: str | None = None + instanceTaskType: str = '' + filesystemDefaultPath: str = 'storage' + esChunkNumber: int = 1000 + esChunkNumberUpdateClusters: int = 500 + esProjectIndexPrefix: str = '' + analyzerHttpPort: int = 5001 + analyzerPathToLog: str = '/tmp/config.log' + + +class SearchConfig(BaseModel): + """Search config object""" + SearchLogsMinSimilarity: float = 0.95 + MinShouldMatch: str = '80%' + BoostAA: float = 2.0 + BoostLaunch: float = 2.0 + BoostTestCaseHash: float = 2.0 + MaxQueryTerms: int = 50 + MinWordLength: int = 2 + TimeWeightDecay: float = 0.95 + PatternLabelMinPercentToSuggest: float = 0.9 + PatternLabelMinCountToSuggest: int = 5 + PatternMinCountToSuggest: int = 10 + MaxLogsForDefectTypeModel: int = 10 + ProbabilityForCustomModelSuggestions: float = 0.7 + ProbabilityForCustomModelAutoAnalysis: float = 0.5 + BoostModelFolder: str = '' + SuggestBoostModelFolder: str = '' + SimilarityWeightsFolder: str = '' + GlobalDefectTypeModelFolder: str = '' + SuggestBoostModelFeatures: str = '' + AutoBoostModelFeatures: str = '' + SuggestBoostModelMonotonousFeatures: str = '' + AutoBoostModelMonotonousFeatures: str = '' + MaxSuggestionsNumber: int = 3 + AutoAnalysisTimeout: int = 300 + MaxAutoAnalysisItemsToProcess: int = 4000 + DefectTypeModelNumEstimators: int = 5 + SuggestBoostModelNumEstimators: int = 50 + SuggestBoostModelMaxDepth: int = 5 + AutoBoostModelNumEstimators: int = 50 + AutoBoostModelMaxDepth: int = 5 + + class SearchLogInfo(BaseModel): """Search log info""" logId: int @@ -40,51 +111,56 @@ class Log(BaseModel): """Log object""" logId: int logLevel: int = 0 - logTime: List[int] = list(datetime.now().timetuple())[:7] + logTime: list[int] = list(datetime.now().timetuple())[:7] message: str clusterId: int = 0 - clusterMessage: str = "" + clusterMessage: str = '' class TestItem(BaseModel): """Test item object""" testItemId: int - uniqueId: str isAutoAnalyzed: bool - issueType: str = "" - originalIssueType: str = "" - startTime: List[int] = list(datetime.now().timetuple())[:7] + uniqueId: str = '' + issueType: str = '' + issueDescription: str = '' + originalIssueType: str = '' + startTime: list[int] = list(datetime.now().timetuple())[:7] + endTime: Optional[list[int]] = None + lastModified: Optional[list[int]] = None testCaseHash: int = 0 - testItemName: str = "" - logs: List[Log] = [] + testItemName: str = '' + description: Optional[str] = None + linksToBts: list[str] = [] + logs: list[Log] = [] class TestItemInfo(BaseModel): """Test item info object""" testItemId: int = 0 - uniqueId: str = "" + uniqueId: str = '' testCaseHash: int = 0 clusterId: int = 0 launchId: int - launchName: str = "" + launchName: str = '' launchNumber: int = 0 previousLaunchId: int = 0 - testItemName: str = "" + testItemName: str = '' project: int analyzerConfig: AnalyzerConf = AnalyzerConf() - logs: List[Log] = [] + logs: list[Log] = [] class Launch(BaseModel): """Launch object""" launchId: int project: int - launchName: str = "" + launchName: str = '' launchNumber: int = 0 previousLaunchId: int = 0 - launchStartTime: List[int] = list(datetime.now().timetuple())[:7] + launchStartTime: list[int] = list(datetime.now().timetuple())[:7] analyzerConfig: AnalyzerConf = AnalyzerConf() - testItems: List[TestItem] = [] + testItems: list[TestItem] = [] clusters: dict = {} @@ -106,15 +182,15 @@ class AnalysisResult(BaseModel): class ClusterInfo(BaseModel): clusterId: int clusterMessage: str - logIds: List[int] - itemIds: List[int] + logIds: list[int] + itemIds: list[int] class ClusterResult(BaseModel): """Analysis result object""" project: int launchId: int - clusters: List[ClusterInfo] + clusters: list[ClusterInfo] class SuggestAnalysisResult(BaseModel): @@ -146,13 +222,13 @@ class SuggestAnalysisResult(BaseModel): class CleanIndex(BaseModel): """Clean index object""" - ids: List[int] + ids: list[int] project: int class CleanIndexStrIds(BaseModel): """Clean index object that supports string ids""" - ids: List[str] + ids: list[str] project: int @@ -162,8 +238,8 @@ class SearchLogs(BaseModel): launchName: str itemId: int projectId: int - filteredLaunchIds: List[int] - logMessages: List[str] + filteredLaunchIds: list[int] + logMessages: list[str] analyzerConfig: AnalyzerConf = AnalyzerConf() logLines: int @@ -171,22 +247,22 @@ class SearchLogs(BaseModel): class Response(BaseModel): """Response object""" acknowledged: bool = False - error: str = "" + error: str = '' status: int = 0 class LogExceptionResult(BaseModel): """Log object with exceptions""" logId: int - foundExceptions: List[str] = [] + foundExceptions: list[str] = [] class BulkResponse(BaseModel): """Bulk response object""" took: int errors: bool - items: List[str] = [] - logResults: List[LogExceptionResult] = [] + items: list[str] = [] + logResults: list[LogExceptionResult] = [] status: int = 0 @@ -195,13 +271,13 @@ class SuggestPatternLabel(BaseModel): pattern: str totalCount: int percentTestItemsWithLabel: float = 0.0 - label: str = "" + label: str = '' class SuggestPattern(BaseModel): """Suggest pattern object with 2 lists of suggestions""" - suggestionsWithLabels: List[SuggestPatternLabel] = [] - suggestionsWithoutLabels: List[SuggestPatternLabel] = [] + suggestionsWithLabels: list[SuggestPatternLabel] = [] + suggestionsWithoutLabels: list[SuggestPatternLabel] = [] class BatchLogInfo(BaseModel): @@ -219,8 +295,8 @@ class AnalysisCandidate(BaseModel): analyzerConfig: AnalyzerConf testItemId: int timeProcessed: float - candidates: List[tuple] - candidatesWithNoDefect: List[tuple] + candidates: list[tuple] + candidatesWithNoDefect: list[tuple] project: int launchId: int launchName: str diff --git a/app/commons/model/ml.py b/app/commons/model/ml.py new file mode 100644 index 00000000..9effb5f5 --- /dev/null +++ b/app/commons/model/ml.py @@ -0,0 +1,33 @@ +# Copyright 2024 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from enum import Enum, auto +from typing import Iterable, Optional + +from pydantic import BaseModel + + +class ModelType(Enum): + defect_type = auto() + suggestion = auto() + auto_analysis = auto() + + +class ModelInfo(BaseModel): + model_type: ModelType + project: int + + +class TrainInfo(ModelInfo): + additional_projects: Optional[Iterable[int]] + gathered_metric_total: int = 0 diff --git a/app/commons/model_chooser.py b/app/commons/model_chooser.py index c81191c3..f2097878 100644 --- a/app/commons/model_chooser.py +++ b/app/commons/model_chooser.py @@ -12,72 +12,85 @@ # See the License for the specific language governing permissions and # limitations under the License. -from app.boosting_decision_making import (defect_type_model, custom_defect_type_model, custom_boosting_decision_maker, - boosting_decision_maker) -from app.commons.object_saving.object_saver import ObjectSaver -import logging -import numpy as np import os +import numpy as np + +from app.commons import logging +from app.commons import object_saving +from app.commons.model.launch_objects import SearchConfig, ApplicationConfig +from app.commons.model.ml import ModelType +from app.commons.object_saving.object_saver import ObjectSaver +from app.machine_learning.models import (defect_type_model, custom_defect_type_model, custom_boosting_decision_maker, + boosting_decision_maker, MlModel) + logger = logging.getLogger("analyzerApp.modelChooser") +CUSTOM_MODEL_MAPPING = { + ModelType.defect_type: custom_defect_type_model.CustomDefectTypeModel, + ModelType.suggestion: custom_boosting_decision_maker.CustomBoostingDecisionMaker, + ModelType.auto_analysis: custom_boosting_decision_maker.CustomBoostingDecisionMaker +} + +GLOBAL_MODEL_MAPPING = { + ModelType.defect_type: defect_type_model.DefectTypeModel, + ModelType.suggestion: boosting_decision_maker.BoostingDecisionMaker, + ModelType.auto_analysis: boosting_decision_maker.BoostingDecisionMaker +} + class ModelChooser: + app_config: ApplicationConfig + object_saver: ObjectSaver + search_cfg: SearchConfig + global_models: dict[ModelType, MlModel] - def __init__(self, app_config=None, search_cfg=None): - self.app_config = app_config or {} - self.search_cfg = search_cfg or {} - self.object_saver = ObjectSaver(self.app_config) - self.model_folder_mapping = { - "defect_type_model/": custom_defect_type_model.CustomDefectTypeModel, - "suggestion_model/": custom_boosting_decision_maker.CustomBoostingDecisionMaker, - "auto_analysis_model/": custom_boosting_decision_maker.CustomBoostingDecisionMaker - } - self.initialize_global_models() + def __init__(self, app_config: ApplicationConfig, search_cfg: SearchConfig): + self.app_config = app_config + self.search_cfg = search_cfg + self.object_saver = object_saving.create(self.app_config) + self.global_models = self.initialize_global_models() - def initialize_global_models(self): - self.global_models = {} - for model_name, folder, class_to_use in [ - ("defect_type_model/", - self.search_cfg["GlobalDefectTypeModelFolder"], defect_type_model.DefectTypeModel), - ("suggestion_model/", - self.search_cfg["SuggestBoostModelFolder"], boosting_decision_maker.BoostingDecisionMaker), - ("auto_analysis_model/", - self.search_cfg["BoostModelFolder"], boosting_decision_maker.BoostingDecisionMaker)]: + def initialize_global_models(self) -> dict[ModelType, MlModel]: + result = {} + for model_type, folder in zip( + [ModelType.defect_type, ModelType.suggestion, ModelType.auto_analysis], + [self.search_cfg.GlobalDefectTypeModelFolder, self.search_cfg.SuggestBoostModelFolder, + self.search_cfg.BoostModelFolder]): if folder.strip(): - self.global_models[model_name] = class_to_use(folder=folder) + model = GLOBAL_MODEL_MAPPING[model_type](object_saving.create_filesystem(folder)) + model.load_model() + result[model_type] = model else: - self.global_models[model_name] = None + result[model_type] = None + return result - def choose_model(self, project_id, model_name_folder, custom_model_prob=1.0): - model = self.global_models[model_name_folder] + def choose_model(self, project_id: int, model_type: ModelType, custom_model_prob: float = 1.0) -> MlModel: + model = self.global_models[model_type] prob_for_model = np.random.uniform() if prob_for_model > custom_model_prob: return model - folders = self.object_saver.get_folder_objects(project_id, model_name_folder) + folders = self.object_saver.get_folder_objects(f'{model_type.name}_model/', project_id) if len(folders): try: - model = self.model_folder_mapping[model_name_folder]( - self.app_config, project_id, folder=folders[0]) + model = CUSTOM_MODEL_MAPPING[model_type](object_saving.create(self.app_config, project_id, folders[0])) + model.load_model() except Exception as err: - logger.error(err) + logger.exception(err) return model - def delete_old_model(self, model_name, project_id): - all_folders = self.object_saver.get_folder_objects( - project_id, "%s/" % model_name) + def delete_old_model(self, model_type: ModelType, project_id: str | int | None = None): + all_folders = self.object_saver.get_folder_objects(f'{model_type.name}_model/', project_id) deleted_models = 0 for folder in all_folders: - if os.path.basename( - folder.strip("/").strip("\\")).startswith(model_name): - deleted_models += self.object_saver.remove_folder_objects(project_id, folder) + if os.path.basename(folder.strip("/").strip("\\")).startswith(model_type.name): + deleted_models += int(self.object_saver.remove_folder_objects(folder, project_id)) return deleted_models - def delete_all_custom_models(self, project_id): - for model_name_folder in self.model_folder_mapping: - self.delete_old_model(model_name_folder.strip("/").strip("\\"), project_id) + def delete_all_custom_models(self, project_id: str | int | None = None): + for model in CUSTOM_MODEL_MAPPING.keys(): + self.delete_old_model(model, project_id) - def get_model_info(self, model_name, project_id): - all_folders = self.object_saver.get_folder_objects( - project_id, "%s/" % model_name) - return all_folders[0] if len(all_folders) else "" + def get_model_info(self, model_type: ModelType, project_id: str | int | None = None): + all_folders = self.object_saver.get_folder_objects(f'{model_type.name}_model/', project_id) + return all_folders[0] if len(all_folders) else '' diff --git a/app/commons/namespace_finder.py b/app/commons/namespace_finder.py index 467fc8b9..1d488e88 100644 --- a/app/commons/namespace_finder.py +++ b/app/commons/namespace_finder.py @@ -12,38 +12,46 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -from app.commons.object_saving.object_saver import ObjectSaver from gensim.models.phrases import Phrases +from app.commons import logging +from app.commons.model.launch_objects import ApplicationConfig +from app.commons.object_saving.object_saver import ObjectSaver + logger = logging.getLogger("analyzerApp.namespace_finder") +UNIQUE_WORDS_OBJECT = 'project_log_unique_words' +CHOSEN_NAMESPACES_OBJECT = 'chosen_namespaces' + class NamespaceFinder: + object_saver: ObjectSaver - def __init__(self, app_config): + def __init__(self, app_config: ApplicationConfig): self.object_saver = ObjectSaver(app_config) - def remove_namespaces(self, project_id): - self.object_saver.remove_project_objects( - project_id, ["project_log_unique_words", "chosen_namespaces"]) + def remove_namespaces(self, project_id: int): + self.object_saver.remove_project_objects([UNIQUE_WORDS_OBJECT, CHOSEN_NAMESPACES_OBJECT], project_id) - def get_chosen_namespaces(self, project_id): - return self.object_saver.get_project_object( - project_id, "chosen_namespaces", using_json=True) + def get_chosen_namespaces(self, project_id: int) -> dict[str, int]: + if self.object_saver.does_object_exists(CHOSEN_NAMESPACES_OBJECT, project_id): + return self.object_saver.get_project_object(CHOSEN_NAMESPACES_OBJECT, project_id, using_json=True) + else: + return {} - def update_namespaces(self, project_id, log_words): - all_words = self.object_saver.get_project_object( - project_id, "project_log_unique_words", using_json=True) + def update_namespaces(self, project_id: int, log_words: dict[str, int]) -> None: + if self.object_saver.does_object_exists(UNIQUE_WORDS_OBJECT, project_id): + all_words = self.object_saver.get_project_object(UNIQUE_WORDS_OBJECT, project_id, using_json=True) + else: + all_words = {} for word in log_words: all_words[word] = 1 - self.object_saver.put_project_object( - all_words, project_id, "project_log_unique_words", using_json=True) - phrases = Phrases([w.split(".") for w in all_words], min_count=1, threshold=1) + self.object_saver.put_project_object(all_words, UNIQUE_WORDS_OBJECT, project_id, using_json=True) + phrases = Phrases([w.split('.') for w in all_words], min_count=1, threshold=1) potential_project_namespaces = {} for word in all_words: - potential_namespace = phrases[word.split(".")][0] - if "_" not in potential_namespace: + potential_namespace = phrases[word.split('.')][0] + if '_' not in potential_namespace: continue if potential_namespace not in potential_project_namespaces: potential_project_namespaces[potential_namespace] = 0 @@ -51,7 +59,6 @@ def update_namespaces(self, project_id, log_words): chosen_namespaces = {} for item, cnt in potential_project_namespaces.items(): if cnt > 10: - chosen_namespaces[item.replace("_", ".")] = cnt + chosen_namespaces[item.replace('_', '.')] = cnt logger.debug("Chosen namespaces %s", chosen_namespaces) - self.object_saver.put_project_object( - chosen_namespaces, project_id, "chosen_namespaces", using_json=True) + self.object_saver.put_project_object(chosen_namespaces, CHOSEN_NAMESPACES_OBJECT, project_id, using_json=True) diff --git a/app/commons/object_saving/__init__.py b/app/commons/object_saving/__init__.py index f86b6d2f..8d0bdbf4 100644 --- a/app/commons/object_saving/__init__.py +++ b/app/commons/object_saving/__init__.py @@ -11,3 +11,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Common package for different Storage services (Minio, Filesystem, etc.).""" + +from app.commons.model.launch_objects import ApplicationConfig +from app.commons.object_saving.object_saver import ObjectSaver + + +def create(app_config: ApplicationConfig, project_id: str | int | None = None, path: str | None = None) -> ObjectSaver: + return ObjectSaver(app_config=app_config, project_id=project_id, path=path) + + +def create_filesystem(base_path: str, project_id: str | int | None = None, path: str | None = None) -> ObjectSaver: + return ObjectSaver( + app_config=ApplicationConfig(binaryStoreType='filesystem', filesystemDefaultPath=base_path, + minioBucketPrefix=''), + project_id=project_id, + path=path + ) diff --git a/app/commons/object_saving/filesystem_saver.py b/app/commons/object_saving/filesystem_saver.py index 21257d5c..2bf3b5a8 100644 --- a/app/commons/object_saving/filesystem_saver.py +++ b/app/commons/object_saving/filesystem_saver.py @@ -12,79 +12,71 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -import pickle +import json import os +import pickle import shutil -import json +from typing import Any + +from app.commons import logging +from app.commons.model.launch_objects import ApplicationConfig +from app.commons.object_saving.storage import Storage +from app.utils import utils -logger = logging.getLogger("analyzerApp.filesystemSaver") +logger = logging.getLogger('analyzerApp.filesystemSaver') -class FilesystemSaver: +class FilesystemSaver(Storage): + _base_path: str - def __init__(self, app_config): - self.app_config = app_config - self.folder_storage = self.app_config["filesystemDefaultPath"] + def __init__(self, app_config: ApplicationConfig) -> None: + self._base_path = app_config.filesystemDefaultPath - def remove_project_objects(self, project_id, object_names): - try: - for filename in object_names: - object_name_full = os.path.join( - self.folder_storage, project_id, filename).replace("\\", "/") - if os.path.exists(object_name_full): - os.remove(object_name_full) - except Exception as err: - logger.error(err) + def remove_project_objects(self, path: str, object_names: list[str]) -> None: + for filename in object_names: + object_name_full = os.path.join(self._base_path, path, filename).replace("\\", "/") + if os.path.exists(object_name_full): + os.remove(object_name_full) - def put_project_object(self, data, project_id, object_name, using_json=False): - try: - folder_to_save = os.path.join( - self.folder_storage, project_id, os.path.dirname(object_name)).replace("\\", "/") - filename = os.path.join( - self.folder_storage, project_id, object_name).replace("\\", "/") + def put_project_object(self, data: Any, path: str, object_name: str, using_json: bool = False) -> None: + folder_to_save = os.path.join(self._base_path, path, os.path.dirname(object_name)).replace("\\", "/") + filename = os.path.join(self._base_path, path, object_name).replace("\\", "/") + if folder_to_save: os.makedirs(folder_to_save, exist_ok=True) - with open(filename, "wb") as f: - if using_json: - f.write(json.dumps(data).encode("utf-8")) - else: - pickle.dump(data, f) - logger.debug( - "Saved into folder '%s' with name '%s': %s", project_id, object_name, data) - except Exception as err: - logger.error(err) + with open(filename, "wb") as f: + if using_json: + f.write(json.dumps(data).encode("utf-8")) + else: + pickle.dump(data, f) + logger.debug("Saved into folder '%s' with name '%s': %s", path, object_name, data) - def get_project_object(self, project_id, object_name, using_json=False): - try: - filename = os.path.join( - self.folder_storage, project_id, object_name).replace("\\", "/") - if os.path.exists(filename): - with open(filename, "rb") as f: - return json.loads(f.read()) if using_json else pickle.load(f) - except Exception as err: - logger.error(err) - return {} + def get_project_object(self, path: str, object_name: str, using_json: bool = False) -> object | None: + filename = os.path.join(self._base_path, path, object_name).replace("\\", "/") + if not utils.validate_file(filename): + raise ValueError(f'Unable to get file: {filename}') + with open(filename, "rb") as f: + return json.loads(f.read()) if using_json else pickle.load(f) - def does_object_exists(self, project_id, object_name): - return os.path.exists( - os.path.join(self.folder_storage, project_id, object_name).replace("\\", "/")) + def does_object_exists(self, path: str, object_name: str) -> bool: + return os.path.exists(os.path.join(self._base_path, path, object_name).replace("\\", "/")) - def get_folder_objects(self, project_id, folder): - folder_to_check = os.path.join( - self.folder_storage, project_id, folder).replace("\\", "/") - if os.path.exists(folder_to_check): - return [ - os.path.join(folder, file_name) for file_name in os.listdir(folder_to_check)] + def get_folder_objects(self, path: str, folder: str) -> list[str]: + root_path = self._base_path + if not root_path and not path: + root_path = os.getcwd() + if folder.endswith('/'): + folder_to_check = os.path.join(root_path, path, folder).replace("\\", "/") + if os.path.exists(folder_to_check): + return [os.path.join(folder, file_name) for file_name in os.listdir(folder_to_check)] + else: + folder_to_check = os.path.join(root_path, path).replace("\\", "/") + if os.path.exists(folder_to_check): + return [file_name for file_name in os.listdir(folder_to_check) if file_name.startswith(folder)] return [] - def remove_folder_objects(self, project_id, folder): - try: - folder_name = os.path.join(self.folder_storage, - project_id, folder).replace("\\", "/") - if os.path.exists(folder_name): - shutil.rmtree(folder_name, ignore_errors=True) - return 1 - return 0 - except Exception as err: - logger.error(err) - return 0 + def remove_folder_objects(self, path: str, folder: str) -> bool: + folder_name = os.path.join(self._base_path, path, folder).replace("\\", "/") + if os.path.exists(folder_name): + shutil.rmtree(folder_name, ignore_errors=True) + return True + return False diff --git a/app/commons/object_saving/minio_client.py b/app/commons/object_saving/minio_client.py index c4affd53..32b5b22f 100644 --- a/app/commons/object_saving/minio_client.py +++ b/app/commons/object_saving/minio_client.py @@ -12,115 +12,107 @@ # See the License for the specific language governing permissions and # limitations under the License. -from minio import Minio -import json import io -import logging +import json import pickle +from typing import Any + +from minio import Minio +from minio.error import NoSuchKey +from app.commons import logging +from app.commons.model.launch_objects import ApplicationConfig +from app.commons.object_saving.storage import Storage logger = logging.getLogger("analyzerApp.minioClient") -class MinioClient: +class MinioClient(Storage): + region: str + bucket_prefix: str - def __init__(self, app_config): - self.app_config = app_config - self.minioClient = None - try: - minio_host = app_config['minioHost'] - self.minioClient = Minio( - minio_host, - access_key=app_config['minioAccessKey'], - secret_key=app_config['minioSecretKey'], - secure=app_config['minioUseTls'], - region=app_config['minioRegion'] - ) - logger.info(f'Minio initialized {minio_host}') - except Exception as err: - logger.error(err) + def __init__(self, app_config: ApplicationConfig) -> None: + minio_host = app_config.minioHost + self.region = app_config.minioRegion + self.bucket_prefix = app_config.minioBucketPrefix + self.minioClient = Minio( + minio_host, + access_key=app_config.minioAccessKey, + secret_key=app_config.minioSecretKey, + secure=app_config.minioUseTls, + region=self.region + ) + logger.info(f'Minio initialized {minio_host}') - def remove_project_objects(self, project_id, object_names): - if self.minioClient is None: - return - try: - bucket_name = project_id - if not self.minioClient.bucket_exists(bucket_name): - return - for object_name in object_names: - self.minioClient.remove_object( - bucket_name=bucket_name, object_name=object_name) - except Exception as err: - logger.error(err) + def get_bucket(self, bucket: str | None): + if bucket: + return self.bucket_prefix + bucket + else: + return '' - def put_project_object(self, data, project_id, object_name, using_json=False): - if self.minioClient is None: + def remove_project_objects(self, bucket: str, object_names: list[str]) -> None: + bucket_name = self.get_bucket(bucket) + if not self.minioClient.bucket_exists(bucket_name): return - try: - bucket_name = project_id + for object_name in object_names: + self.minioClient.remove_object(bucket_name=bucket_name, object_name=object_name) + + def put_project_object(self, data: Any, bucket: str, object_name: str, using_json=False) -> None: + bucket_name = self.get_bucket(bucket) + if bucket_name: if not self.minioClient.bucket_exists(bucket_name): logger.debug("Creating minio bucket %s" % bucket_name) - self.minioClient.make_bucket( - bucket_name=bucket_name, location=self.app_config["minioRegion"]) + self.minioClient.make_bucket(bucket_name=bucket_name, location=self.region) logger.debug("Created minio bucket %s" % bucket_name) - if using_json: - data_to_save = json.dumps(data).encode("utf-8") - else: - data_to_save = pickle.dumps(data) - data_stream = io.BytesIO(data_to_save) - data_stream.seek(0) - self.minioClient.put_object( - bucket_name=bucket_name, object_name=object_name, - data=data_stream, length=len(data_to_save)) - logger.debug( - "Saved into bucket '%s' with name '%s': %s", bucket_name, object_name, data) - except Exception as err: - logger.error(err) + if using_json: + data_to_save = json.dumps(data).encode("utf-8") + content_type = 'application/json' + else: + data_to_save = pickle.dumps(data) + content_type = 'application/octet-stream' + data_stream = io.BytesIO(data_to_save) + data_stream.seek(0) + self.minioClient.put_object( + bucket_name=bucket_name, object_name=object_name, data=data_stream, length=len(data_to_save), + content_type=content_type) + logger.debug("Saved into bucket '%s' with name '%s': %s", bucket_name, object_name, data) - def get_project_object(self, project_id, object_name, using_json=False): - if self.minioClient is None: - return {} + def get_project_object(self, bucket: str, object_name: str, using_json=False) -> object | None: + bucket_name = self.get_bucket(bucket) try: - if not self.minioClient.bucket_exists(project_id): - return {} - obj = self.minioClient.get_object( - bucket_name=project_id, object_name=object_name) - return json.loads(obj.data) if using_json else pickle.loads(obj.data) - except Exception: - return {} + obj = self.minioClient.get_object(bucket_name=bucket_name, object_name=object_name) + except NoSuchKey as exc: + raise ValueError(f'Unable to get file: {object_name}', exc) + return json.loads(obj.data) if using_json else pickle.loads(obj.data) - def does_object_exists(self, project_id, object_name): - if self.minioClient is None: - return False - try: - if not self.minioClient.bucket_exists(project_id): + def does_object_exists(self, bucket: str, object_name: str) -> bool: + bucket_name = self.get_bucket(bucket) + if bucket_name: + if not self.minioClient.bucket_exists(bucket_name): return False - self.minioClient.get_object( - bucket_name=project_id, object_name=object_name) - return True - except Exception: + try: + self.minioClient.stat_object(bucket_name=bucket_name, object_name=object_name) + except NoSuchKey: return False + return True - def get_folder_objects(self, project_id, folder): - if self.minioClient is None: - return [] - object_names = [] - if not self.minioClient.bucket_exists(project_id): - return [] - for obj in self.minioClient.list_objects(project_id, prefix=folder): - object_names.append(obj.object_name) - return object_names + def get_folder_objects(self, bucket: str, folder: str) -> list[str]: + bucket_name = self.get_bucket(bucket) + if bucket_name: + if not self.minioClient.bucket_exists(bucket_name): + return [] + object_names = set() + object_list = self.minioClient.list_objects( + bucket_name, prefix=folder.endswith('/') and folder or folder + '/') + for obj in object_list: + object_names.add(obj.object_name.strip('/')) + return sorted(list(object_names)) - def remove_folder_objects(self, project_id, folder): - if self.minioClient is None: - return 0 - if not self.minioClient.bucket_exists(project_id): - return 0 - try: - for obj in self.minioClient.list_objects(project_id, prefix=folder): - self.minioClient.remove_object( - bucket_name=project_id, object_name=obj.object_name) - return 1 - except Exception as err: - logger.error(err) - return 0 + def remove_folder_objects(self, bucket: str, folder: str) -> bool: + bucket_name = self.get_bucket(bucket) + if bucket_name: + if not self.minioClient.bucket_exists(bucket_name): + return False + for obj in self.minioClient.list_objects(bucket_name, prefix=folder.endswith('/') and folder or folder + '/'): + self.minioClient.remove_object(bucket_name=bucket_name, object_name=obj.object_name) + return True diff --git a/app/commons/object_saving/object_saver.py b/app/commons/object_saving/object_saver.py index a4002061..86442c5e 100644 --- a/app/commons/object_saving/object_saver.py +++ b/app/commons/object_saving/object_saver.py @@ -12,57 +12,78 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -from app.commons.object_saving.minio_client import MinioClient -from app.commons.object_saving.filesystem_saver import FilesystemSaver +import os +from typing import Any, Callable +from app.commons import logging +from app.commons.model.launch_objects import ApplicationConfig +from app.commons.object_saving.filesystem_saver import FilesystemSaver +from app.commons.object_saving.minio_client import MinioClient +from app.commons.object_saving.storage import Storage logger = logging.getLogger("analyzerApp.objectSaver") +def create_minio_client(app_config: ApplicationConfig) -> Storage: + return MinioClient(app_config) + + +def create_filesystem_client(app_config: ApplicationConfig) -> Storage: + return FilesystemSaver(app_config) + + +STORAGE_FACTORIES: dict[str, Callable[[ApplicationConfig], Storage]] = { + 'minio': create_minio_client, + 'filesystem': create_filesystem_client +} + + class ObjectSaver: + storage: Storage + project_id: str | int | None = None + path: str + + def __init__(self, app_config: ApplicationConfig, project_id: str | int | None = None, + path: str | None = None) -> None: + self.project_id = project_id + self.path = path or "" + if app_config.binaryStoreType in STORAGE_FACTORIES: + self.storage = STORAGE_FACTORIES[app_config.binaryStoreType](app_config) + else: + raise ValueError( + f'Storage "{app_config.binaryStoreType}" is not supported, possible types are: ' + + str(STORAGE_FACTORIES.keys()) + ) + + def get_project_id(self, project_id: str | int | None) -> str: + if project_id is not None: + return str(project_id) + if self.project_id is not None: + return str(self.project_id) + return "" + + def get_object_name(self, object_names: str) -> str: + return os.path.join(self.path, object_names) + + def remove_project_objects(self, object_names: list[str], project_id: str | int | None = None) -> None: + self.storage.remove_project_objects(self.get_project_id(project_id), + [self.get_object_name(n) for n in object_names]) + + def put_project_object(self, data: Any, object_name: str, project_id: str | int | None = None, + using_json: bool = False) -> None: + self.storage.put_project_object(data, self.get_project_id(project_id), self.get_object_name(object_name), + using_json=using_json) + + def get_project_object(self, object_name: str, project_id: str | int | None = None, + using_json: bool = False) -> Any: + return self.storage.get_project_object(self.get_project_id(project_id), self.get_object_name(object_name), + using_json=using_json) + + def does_object_exists(self, object_name: str, project_id: str | int | None = None) -> bool: + return self.storage.does_object_exists(self.get_project_id(project_id), self.get_object_name(object_name)) + + def get_folder_objects(self, folder: str, project_id: str | int | None = None) -> list: + return self.storage.get_folder_objects(self.get_project_id(project_id), self.get_object_name(folder)) - def __init__(self, app_config): - self.app_config = app_config - self.saving_strategy = { - "minio": self.create_minio, - "filesystem": self.create_fs - } - self.binarystore_type = "filesystem" - if "binaryStoreType" in self.app_config and\ - self.app_config["binaryStoreType"] in self.saving_strategy: - self.binarystore_type = self.app_config["binaryStoreType"] - - def create_minio(self): - return MinioClient(self.app_config) - - def create_fs(self): - return FilesystemSaver(self.app_config) - - def get_bucket_name(self, project_id): - return self.app_config["minioBucketPrefix"] + str(project_id) - - def remove_project_objects(self, project_id, object_names): - self.saving_strategy[self.binarystore_type]().remove_project_objects( - self.get_bucket_name(project_id), object_names) - - def put_project_object(self, data, project_id, object_name, using_json=False): - self.saving_strategy[self.binarystore_type]().put_project_object( - data, self.get_bucket_name(project_id), - object_name, using_json=using_json) - - def get_project_object(self, project_id, object_name, using_json=False): - return self.saving_strategy[self.binarystore_type]().get_project_object( - self.get_bucket_name(project_id), object_name, using_json=using_json) - - def does_object_exists(self, project_id, object_name): - return self.saving_strategy[self.binarystore_type]().does_object_exists( - self.get_bucket_name(project_id), object_name) - - def get_folder_objects(self, project_id, folder): - return self.saving_strategy[self.binarystore_type]().get_folder_objects( - self.get_bucket_name(project_id), folder) - - def remove_folder_objects(self, project_id, folder): - return self.saving_strategy[self.binarystore_type]().remove_folder_objects( - self.get_bucket_name(project_id), folder) + def remove_folder_objects(self, folder: str, project_id: str | int | None = None) -> bool: + return self.storage.remove_folder_objects(self.get_project_id(project_id), self.get_object_name(folder)) diff --git a/app/commons/object_saving/storage.py b/app/commons/object_saving/storage.py new file mode 100644 index 00000000..cf45eb6a --- /dev/null +++ b/app/commons/object_saving/storage.py @@ -0,0 +1,45 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Common interface class for Storage types.""" + +from abc import ABCMeta, abstractmethod +from typing import Any + + +class Storage(metaclass=ABCMeta): + + @abstractmethod + def remove_project_objects(self, path: str, object_names: list[str]) -> None: + raise NotImplementedError('"remove_project_objects" method is not implemented!') + + @abstractmethod + def put_project_object(self, data: Any, path: str, object_name: str, using_json: bool = False) -> None: + raise NotImplementedError('"put_project_object" method is not implemented!') + + @abstractmethod + def get_project_object(self, path: str, object_name: str, using_json: bool = False) -> object | None: + raise NotImplementedError('"get_project_object" method is not implemented!') + + @abstractmethod + def does_object_exists(self, path: str, object_name: str) -> bool: + raise NotImplementedError('"does_object_exists" method is not implemented!') + + @abstractmethod + def get_folder_objects(self, path: str, folder: str) -> list[str]: + raise NotImplementedError('"get_folder_objects" method is not implemented!') + + @abstractmethod + def remove_folder_objects(self, path: str, folder: str) -> bool: + raise NotImplementedError('"remove_folder_objects" method is not implemented!') diff --git a/app/commons/prepared_log.py b/app/commons/prepared_log.py new file mode 100644 index 00000000..8d78f22b --- /dev/null +++ b/app/commons/prepared_log.py @@ -0,0 +1,173 @@ +# Copyright 2024 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from app.utils.log_preparation import (basic_prepare, prepare_message, prepare_message_no_params, + prepare_exception_message_no_params, + prepare_exception_message_and_stacktrace) +from app.utils import text_processing + + +class PreparedLogMessage: + + original_message: str + number_of_lines: int + _clean_message: str = None + _test_and_methods: set[str] = None + _message: str = None + _message_no_params: str = None + _exception_message: str = None + _stacktrace: str = None + _exception_message_urls: str = None + _exception_message_paths: str = None + _exception_message_potential_status_codes: str = None + _exception_message_params: str = None + _exception_message_no_params: str = None + _exception_message_no_numbers: str = None + _exception_message_numbers: str = None + _exception_found: str = None + _exception_found_extended: str = None + _test_and_methods_extended: str = None + _stacktrace_paths: str = None + _stacktrace_no_paths: str = None + _stacktrace_no_paths_extended: str = None + + def __init__(self, message: str, number_of_lines: int): + self.original_message = message + self.number_of_lines = number_of_lines + + def __str__(self): + return self.original_message + + @property + def clean_message(self) -> str: + if not self._clean_message: + self._clean_message = basic_prepare(self.original_message) + return self._clean_message + + @property + def test_and_methods(self) -> set[str]: + if not self._test_and_methods: + self._test_and_methods = text_processing.find_test_methods_in_text(self.clean_message) + return self._test_and_methods + + @property + def message(self) -> str: + if not self._message: + self._message = prepare_message(self.clean_message, self.number_of_lines, self.test_and_methods) + return self._message + + @property + def message_no_params(self) -> str: + if not self._message_no_params: + self._message_no_params = prepare_message_no_params(self.message) + return self._message_no_params + + @property + def exception_message(self) -> str: + if not self._exception_message: + self._exception_message, self._stacktrace = prepare_exception_message_and_stacktrace(self.clean_message) + return self._exception_message + + @property + def stacktrace(self) -> str: + if not self._stacktrace: + self._raw_exception_message, self._stacktrace = prepare_exception_message_and_stacktrace( + self.clean_message) + return self._stacktrace + + @property + def exception_message_urls(self) -> str: + if not self._exception_message_urls: + self._exception_message_urls = " ".join(text_processing.extract_urls(self.exception_message)) + return self._exception_message_urls + + @property + def exception_message_paths(self) -> str: + if not self._exception_message_paths: + self._exception_message_paths = " ".join(text_processing.extract_paths(self.exception_message)) + return self._exception_message_paths + + @property + def exception_message_potential_status_codes(self) -> str: + if not self._exception_message_potential_status_codes: + self._exception_message_potential_status_codes = " ".join( + text_processing.get_potential_status_codes(self.exception_message)) + return self._exception_message_potential_status_codes + + @property + def exception_message_params(self) -> str: + if not self._exception_message_params: + self._exception_message_params = " ".join(text_processing.extract_message_params( + self.exception_message)) + return self._exception_message_params + + @property + def exception_message_no_params(self) -> str: + if not self._exception_message_no_params: + self._exception_message_no_params = text_processing.unify_spaces(prepare_exception_message_no_params( + self.exception_message)) + return self._exception_message_no_params + + @property + def exception_message_no_numbers(self) -> str: + if not self._exception_message_no_numbers: + self._exception_message_no_numbers = text_processing.remove_numbers(self.exception_message) + return self._exception_message_no_numbers + + @property + def exception_message_numbers(self) -> str: + if not self._exception_message_numbers: + self._exception_message_numbers = text_processing.find_only_numbers(self.exception_message) + return self._exception_message_numbers + + @property + def exception_found(self) -> str: + if not self._exception_found: + self._exception_found = text_processing.get_found_exceptions(self.exception_message_no_numbers) + return self._exception_found + + @property + def exception_found_extended(self) -> str: + if not self._exception_found_extended: + self._exception_found_extended = text_processing.enrich_found_exceptions(self.exception_found) + return self._exception_found_extended + + # TODO: This is used in training only, subject to remove + @property + def stacktrace_paths(self) -> str: + if not self._stacktrace_paths: + self._stacktrace_paths = " ".join(text_processing.extract_paths(self.stacktrace)) + return self._stacktrace_paths + + # TODO: This is used in training only, subject to remove + @property + def stacktrace_no_paths(self) -> str: + if not self._stacktrace_no_paths: + self._stacktrace_no_paths = text_processing.unify_spaces(text_processing.clean_from_paths(self.stacktrace)) + return self._stacktrace_no_paths + + # TODO: This is used in training only, subject to remove + @property + def stacktrace_no_paths_extended(self) -> str: + if not self._stacktrace_no_paths_extended: + self._stacktrace_no_paths_extended = text_processing.enrich_text_with_method_and_classes( + self.stacktrace_no_paths) + return self._stacktrace_no_paths_extended + + @property + def test_and_methods_extended(self) -> str: + if not self._test_and_methods_extended: + self._test_and_methods_extended = text_processing.enrich_text_with_method_and_classes( + " ".join(self.test_and_methods)) + return self._test_and_methods_extended diff --git a/app/commons/similarity_calculator.py b/app/commons/similarity_calculator.py index 2ca55a00..3abe90c5 100644 --- a/app/commons/similarity_calculator.py +++ b/app/commons/similarity_calculator.py @@ -11,18 +11,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, Optional import numpy as np from scipy import spatial from sklearn.feature_extraction.text import CountVectorizer +from app.machine_learning.models.weighted_similarity_calculator import WeightedSimilarityCalculator from app.utils import text_processing class SimilarityCalculator: + similarity_model: WeightedSimilarityCalculator - def __init__(self, config, weighted_similarity_calculator=None): - self.weighted_similarity_calculator = weighted_similarity_calculator + def __init__(self, config: dict[str, Any], similarity_model: WeightedSimilarityCalculator): + self.similarity_model = similarity_model self.config = config self.similarity_dict = {} self.object_id_weights = {} @@ -33,44 +36,42 @@ def __init__(self, config, weighted_similarity_calculator=None): } self.artificial_columns = ["namespaces_stacktrace"] - def find_similarity(self, all_results, fields): + def find_similarity(self, all_results: list[tuple[dict, dict]], fields: list[str]) -> None: for field in fields: if field in self.similarity_dict: continue self.similarity_dict[field] = {} - log_field_ids = {} + log_field_ids: dict = {} index_in_message_array = 0 - count_vector_matrix = None - all_messages = [] - all_messages_needs_reweighting = [] - needs_reweighting_wc = False + count_vector_matrix: np.ndarray | None = None + all_messages: list[str] = [] + all_messages_needs_reweighting: list[int] = [] + needs_reweighting_wc: bool = False for log, res in all_results: for obj in [log] + res["hits"]["hits"]: if obj["_id"] not in log_field_ids: - if field not in self.artificial_columns and not obj["_source"][field].strip(): + if field not in self.artificial_columns and ( + field not in obj["_source"] or not obj["_source"][field].strip()): log_field_ids[obj["_id"]] = -1 else: text = [] needs_reweighting = 0 - if self.config["number_of_log_lines"] == -1 and \ - field in self.fields_mapping_for_weighting: + if (self.config["number_of_log_lines"] == -1 + and field in self.fields_mapping_for_weighting): fields_to_use = self.fields_mapping_for_weighting[field] - text = self.weighted_similarity_calculator.message_to_array( - obj["_source"][fields_to_use[0]], - obj["_source"][fields_to_use[1]]) + text = self.similarity_model.message_to_array( + obj["_source"][fields_to_use[0]], obj["_source"][fields_to_use[1]]) elif field == "namespaces_stacktrace": gathered_lines = [] weights = [] for line in obj["_source"]["stacktrace"].split("\n"): line_words = text_processing.split_words( - line, - min_word_length=self.config["min_word_length"]) + line, min_word_length=self.config["min_word_length"]) for word in line_words: part_of_namespace = ".".join(word.split(".")[:2]) if part_of_namespace in self.config["chosen_namespaces"]: gathered_lines.append(" ".join(line_words)) - weights.append( - self.config["chosen_namespaces"][part_of_namespace]) + weights.append(self.config["chosen_namespaces"][part_of_namespace]) if len(gathered_lines): text = gathered_lines self.object_id_weights[obj["_id"]] = weights @@ -78,40 +79,39 @@ def find_similarity(self, all_results, fields): text = [] for line in obj["_source"]["stacktrace"].split("\n"): text.append(" ".join(text_processing.split_words( - text_processing.clean_from_brackets(line), - min_word_length=self.config["min_word_length"]))) + line, min_word_length=self.config["min_word_length"]))) text = text_processing.filter_empty_lines(text) self.object_id_weights[obj["_id"]] = [1] * len(text) elif field.startswith("stacktrace"): if text_processing.does_stacktrace_need_words_reweighting(obj["_source"][field]): needs_reweighting = 1 - text = self.weighted_similarity_calculator.message_to_array( - "", obj["_source"][field]) + text = self.similarity_model.message_to_array("", obj["_source"][field]) else: - text = text_processing.filter_empty_lines([" ".join( - text_processing.split_words( - obj["_source"][field], - min_word_length=self.config["min_word_length"]))]) + text = [" ".join( + text_processing.split_words( + obj["_source"][field], min_word_length=self.config["min_word_length"]))] if not text: log_field_ids[obj["_id"]] = -1 else: all_messages.extend(text) all_messages_needs_reweighting.append(needs_reweighting) - log_field_ids[obj["_id"]] = [index_in_message_array, - len(all_messages) - 1] + log_field_ids[obj["_id"]] = [index_in_message_array, len(all_messages) - 1] index_in_message_array += len(text) if all_messages: - needs_reweighting_wc = all_messages_needs_reweighting and \ - sum(all_messages_needs_reweighting) == len(all_messages_needs_reweighting) + needs_reweighting_wc = (all_messages_needs_reweighting + and sum(all_messages_needs_reweighting) == len(all_messages_needs_reweighting)) vectorizer = CountVectorizer( - binary=not needs_reweighting_wc, - analyzer="word", token_pattern="[^ ]+") - count_vector_matrix = np.asarray(vectorizer.fit_transform(all_messages).toarray()) + binary=not needs_reweighting_wc, analyzer="word", token_pattern="[^ ]+") + try: + count_vector_matrix = np.asarray(vectorizer.fit_transform(all_messages).toarray()) + except ValueError: + # All messages are empty or contains only stop words + pass for log, res in all_results: sim_dict = self._calculate_field_similarity( log, res, log_field_ids, count_vector_matrix, needs_reweighting_wc, field) - for key in sim_dict: - self.similarity_dict[field][key] = sim_dict[key] + for key, value in sim_dict.items(): + self.similarity_dict[field][key] = value def reweight_words_weights_by_summing(self, count_vector_matrix): count_vector_matrix_weighted = np.zeros_like(count_vector_matrix, dtype=float) @@ -132,36 +132,39 @@ def normalize_weights(self, weights): return np.clip(normalized_weights, a_min=1.0, a_max=3.0) def _calculate_field_similarity( - self, log, res, log_field_ids, count_vector_matrix, needs_reweighting_wc, field): + self, log: dict, res: dict, log_field_ids: dict, count_vector_matrix: Optional[np.ndarray], + needs_reweighting_wc: bool, field: str) -> dict: all_results_similarity = {} for obj in res["hits"]["hits"]: group_id = (obj["_id"], log["_id"]) index_query_message = log_field_ids[log["_id"]] index_log_message = log_field_ids[obj["_id"]] - if (isinstance(index_query_message, int) and index_query_message < 0) and \ - (isinstance(index_log_message, int) and index_log_message < 0): + if ((isinstance(index_query_message, int) and index_query_message < 0) + and (isinstance(index_log_message, int) and index_log_message < 0)): all_results_similarity[group_id] = {"similarity": 1.0, "both_empty": True} - elif (isinstance(index_query_message, int) and index_query_message < 0) or \ - (isinstance(index_log_message, int) and index_log_message < 0): + elif ((isinstance(index_query_message, int) and index_query_message < 0) + or (isinstance(index_log_message, int) and index_log_message < 0)): all_results_similarity[group_id] = {"similarity": 0.0, "both_empty": False} else: - query_vector = count_vector_matrix[index_query_message[0]:index_query_message[1] + 1] - log_vector = count_vector_matrix[index_log_message[0]:index_log_message[1] + 1] - if field == "namespaces_stacktrace": - query_vector = self.multiply_vectors_by_weight( - query_vector, self.normalize_weights(self.object_id_weights[log["_id"]])) - log_vector = self.multiply_vectors_by_weight( - log_vector, self.normalize_weights(self.object_id_weights[obj["_id"]])) + if count_vector_matrix is not None: + query_vector = count_vector_matrix[index_query_message[0]:index_query_message[1] + 1] + log_vector = count_vector_matrix[index_log_message[0]:index_log_message[1] + 1] + if field == "namespaces_stacktrace": + query_vector = self.multiply_vectors_by_weight( + query_vector, self.normalize_weights(self.object_id_weights[log["_id"]])) + log_vector = self.multiply_vectors_by_weight( + log_vector, self.normalize_weights(self.object_id_weights[obj["_id"]])) + else: + if needs_reweighting_wc: + query_vector = self.reweight_words_weights_by_summing(query_vector) + log_vector = self.reweight_words_weights_by_summing(log_vector) + query_vector = self.similarity_model.weigh_data_rows(query_vector) + log_vector = self.similarity_model.weigh_data_rows(log_vector) + if needs_reweighting_wc: + query_vector *= 2 + log_vector *= 2 + similarity = round(1 - spatial.distance.cosine(query_vector, log_vector), 2) + all_results_similarity[group_id] = {"similarity": similarity, "both_empty": False} else: - if needs_reweighting_wc: - query_vector = self.reweight_words_weights_by_summing(query_vector) - log_vector = self.reweight_words_weights_by_summing(log_vector) - query_vector = self.weighted_similarity_calculator.weigh_data_rows(query_vector) - log_vector = self.weighted_similarity_calculator.weigh_data_rows(log_vector) - if needs_reweighting_wc: - query_vector *= 2 - log_vector *= 2 - similarity = round(1 - spatial.distance.cosine(query_vector, log_vector), 2) - all_results_similarity[group_id] = {"similarity": similarity, "both_empty": False} - + all_results_similarity[group_id] = {"similarity": 0.0, "both_empty": False} return all_results_similarity diff --git a/app/commons/trigger_manager.py b/app/commons/trigger_manager.py index 40117a75..85e23d59 100644 --- a/app/commons/trigger_manager.py +++ b/app/commons/trigger_manager.py @@ -11,41 +11,42 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any -import logging +from app.commons import logging +from app.commons.model.launch_objects import SearchConfig, ApplicationConfig +from app.commons.model.ml import ModelType +from app.commons.model_chooser import ModelChooser from app.commons.triggering_training.retraining_triggering import RetrainingTriggering -from app.boosting_decision_making.training_models import training_defect_type_model, training_analysis_model +from app.machine_learning.training.train_analysis_model import AnalysisModelTraining +from app.machine_learning.training.train_defect_type_model import DefectTypeModelTraining logger = logging.getLogger("analyzerApp.triggerManager") class TriggerManager: + model_training_triggering: dict[ModelType, tuple[RetrainingTriggering, Any]] - def __init__(self, model_chooser, app_config=None, search_cfg=None): - self.app_config = app_config or {} - self.search_cfg = search_cfg or {} + def __init__(self, model_chooser: ModelChooser, app_config: ApplicationConfig, search_cfg: SearchConfig): self.model_training_triggering = { - "defect_type": (RetrainingTriggering(self.app_config, "defect_type_trigger_info", - start_number=100, accumulated_difference=100), - training_defect_type_model.DefectTypeModelTraining( - model_chooser, self.app_config, self.search_cfg)), - "suggestion": (RetrainingTriggering(self.app_config, "suggestion_trigger_info", - start_number=100, accumulated_difference=50), - training_analysis_model.AnalysisModelTraining( - model_chooser, self.app_config, self.search_cfg)), - "auto_analysis": (RetrainingTriggering(self.app_config, "auto_analysis_trigger_info", - start_number=300, accumulated_difference=100), - training_analysis_model.AnalysisModelTraining( - model_chooser, self.app_config, self.search_cfg)) + ModelType.defect_type: (RetrainingTriggering(app_config, 'defect_type_trigger_info', + start_number=100, accumulated_difference=100), + DefectTypeModelTraining(app_config, search_cfg, model_chooser)), + ModelType.suggestion: (RetrainingTriggering(app_config, 'suggestion_trigger_info', + start_number=100, accumulated_difference=50), + AnalysisModelTraining(app_config, search_cfg, ModelType.suggestion, model_chooser)), + ModelType.auto_analysis: (RetrainingTriggering(app_config, 'auto_analysis_trigger_info', + start_number=300, accumulated_difference=100), + AnalysisModelTraining(app_config, search_cfg, ModelType.auto_analysis, + model_chooser)) } - def does_trigger_exist(self, name): - return name in self.model_training_triggering + def does_trigger_exist(self, model: ModelType): + return model in self.model_training_triggering - def get_trigger_info(self, name): - return self.model_training_triggering[name] + def get_trigger_info(self, model: ModelType) -> tuple[RetrainingTriggering, Any]: + return self.model_training_triggering[model] - def delete_triggers(self, project_id): - for model_type in self.model_training_triggering: - self.model_training_triggering[model_type][0].remove_triggering_info( - {"project_id": project_id}) + def delete_triggers(self, project_id: int): + for trigger_info in self.model_training_triggering.values(): + trigger_info[0].remove_triggering_info(project_id) diff --git a/app/commons/triggering_training/retraining_triggering.py b/app/commons/triggering_training/retraining_triggering.py index 2b0f73e9..b3e29d48 100644 --- a/app/commons/triggering_training/retraining_triggering.py +++ b/app/commons/triggering_training/retraining_triggering.py @@ -12,52 +12,61 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging - +from app.commons import logging +from app.commons.model.ml import TrainInfo +from app.commons.model.launch_objects import ApplicationConfig from app.commons.object_saving.object_saver import ObjectSaver -METRIC_SINCE_TRAINING = "gathered_metric_since_training" -GATHERED_METRIC_TOTAL = "gathered_metric_total" +METRIC_SINCE_TRAINING = 'gathered_metric_since_training' +GATHERED_METRIC_TOTAL = 'gathered_metric_total' +REQUIRED_FIELDS = [METRIC_SINCE_TRAINING, GATHERED_METRIC_TOTAL] -logger = logging.getLogger("analyzerApp.retraining_triggering") +logger = logging.getLogger('analyzerApp.retraining_triggering') class RetrainingTriggering: + object_saver: ObjectSaver + start_number: int + accumulated_difference: int + trigger_saving_name: str - def __init__(self, app_config, trigger_saving_name, start_number=100, accumulated_difference=100): + def __init__(self, app_config: ApplicationConfig, trigger_saving_name: str, start_number: int = 100, + accumulated_difference: int = 100): self.object_saver = ObjectSaver(app_config) self.start_number = start_number self.accumulated_difference = accumulated_difference self.trigger_saving_name = trigger_saving_name - self.required_fields = [METRIC_SINCE_TRAINING, GATHERED_METRIC_TOTAL] - def remove_triggering_info(self, train_info): - self.object_saver.remove_project_objects( - train_info["project_id"], [self.trigger_saving_name]) + def remove_triggering_info(self, project_id: int) -> None: + self.object_saver.remove_project_objects([self.trigger_saving_name], project_id) - def get_triggering_info(self, train_info): - obj = self.object_saver.get_project_object(train_info["project_id"], self.trigger_saving_name, using_json=True) - for required_field in self.required_fields: + def get_triggering_info(self, project_id: int) -> dict[str, int]: + if not self.object_saver.does_object_exists(self.trigger_saving_name, project_id): + self.clean_triggering_info(project_id, 0) + obj = self.object_saver.get_project_object(self.trigger_saving_name, project_id, using_json=True) + for required_field in REQUIRED_FIELDS: if required_field not in obj: return {} return obj - def save_triggering_info(self, trigger_info, train_info): - self.object_saver.put_project_object(trigger_info, train_info["project_id"], self.trigger_saving_name, - using_json=True) + def save_triggering_info(self, trigger_info: dict[str, int], project_id: int) -> None: + self.object_saver.put_project_object(trigger_info, self.trigger_saving_name, project_id, using_json=True) - def clean_triggering_info(self, train_info, gathered_metric_total): - trigger_info = self.get_triggering_info(train_info) + def clean_triggering_info(self, project_id: int, gathered_metric_total: int) -> None: + if self.object_saver.does_object_exists(self.trigger_saving_name, project_id): + trigger_info = self.get_triggering_info(project_id) + else: + trigger_info = {} trigger_info[METRIC_SINCE_TRAINING] = 0 trigger_info[GATHERED_METRIC_TOTAL] = gathered_metric_total - self.save_triggering_info(trigger_info, train_info) + self.save_triggering_info(trigger_info, project_id) - def should_model_training_be_triggered(self, train_info): - trigger_info = self.get_triggering_info(train_info) + def should_model_training_be_triggered(self, train_info: TrainInfo) -> bool: + trigger_info = self.get_triggering_info(train_info.project) gathered_metric_total = trigger_info.get(GATHERED_METRIC_TOTAL, 0) - trigger_info[GATHERED_METRIC_TOTAL] = gathered_metric_total + train_info[GATHERED_METRIC_TOTAL] + trigger_info[GATHERED_METRIC_TOTAL] = gathered_metric_total + train_info.gathered_metric_total metric_since_training = trigger_info.get(METRIC_SINCE_TRAINING, 0) - trigger_info[METRIC_SINCE_TRAINING] = metric_since_training + train_info[GATHERED_METRIC_TOTAL] - self.save_triggering_info(trigger_info, train_info) + trigger_info[METRIC_SINCE_TRAINING] = metric_since_training + train_info.gathered_metric_total + self.save_triggering_info(trigger_info, train_info.project) return trigger_info[GATHERED_METRIC_TOTAL] >= self.start_number \ and trigger_info[METRIC_SINCE_TRAINING] >= self.accumulated_difference diff --git a/app/boosting_decision_making/__init__.py b/app/machine_learning/__init__.py similarity index 100% rename from app/boosting_decision_making/__init__.py rename to app/machine_learning/__init__.py diff --git a/app/boosting_decision_making/boosting_featurizer.py b/app/machine_learning/boosting_featurizer.py similarity index 63% rename from app/boosting_decision_making/boosting_featurizer.py rename to app/machine_learning/boosting_featurizer.py index 79fc0501..a1e31335 100644 --- a/app/boosting_decision_making/boosting_featurizer.py +++ b/app/machine_learning/boosting_featurizer.py @@ -12,32 +12,34 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -from collections import deque +from collections import deque, defaultdict from datetime import datetime +from typing import Optional, Any, Callable import numpy as np -from app.boosting_decision_making.boosting_decision_maker import BoostingDecisionMaker -from app.commons import similarity_calculator +from app.commons import logging, similarity_calculator +from app.machine_learning.models import WeightedSimilarityCalculator +from app.machine_learning.models.defect_type_model import DATA_FIELD +from app.machine_learning.models.defect_type_model import DefectTypeModel from app.utils import utils, text_processing logger = logging.getLogger("analyzerApp.boosting_featurizer") class BoostingFeaturizer: - - def __init__(self, all_results, config, feature_ids, weighted_log_similarity_calculator=None, - features_dict_with_saved_objects=None): + defect_type_predict_model: Optional[DefectTypeModel] + scores_by_type: Optional[dict[str, dict[str, Any]]] + feature_ids: list[int] + feature_functions: dict[int, tuple[Callable, dict[str, Any], list[int]]] + previously_gathered_features: dict[int, list[list[float]]] + + def __init__(self, all_results, config, feature_ids: str | list[int], + weighted_log_similarity_calculator: WeightedSimilarityCalculator = None): self.config = config self.previously_gathered_features = {} - self.models = {} - self.features_dict_with_saved_objects = {} - if features_dict_with_saved_objects is not None: - self.features_dict_with_saved_objects = features_dict_with_saved_objects self.similarity_calculator = similarity_calculator.SimilarityCalculator( - self.config, - weighted_similarity_calculator=weighted_log_similarity_calculator) + self.config, similarity_model=weighted_log_similarity_calculator) if type(feature_ids) is str: self.feature_ids = text_processing.transform_string_feature_range_into_list(feature_ids) else: @@ -49,7 +51,8 @@ def __init__(self, all_results, config, feature_ids, weighted_log_similarity_cal "message_extended", "detected_message_extended", "message_without_params_and_brackets", - "detected_message_without_params_and_brackets"] + "detected_message_without_params_and_brackets" + ] self.feature_functions = { 0: (self._calculate_score, {}, []), @@ -69,14 +72,12 @@ def __init__(self, all_results, config, feature_ids, weighted_log_similarity_cal 25: (self._calculate_similarity_percent, {"field_name": "only_numbers"}, []), 26: (self._calculate_max_score_and_pos, {"return_val_name": "max_score"}, []), 27: (self._calculate_min_score_and_pos, {"return_val_name": "min_score"}, []), - 28: (self._calculate_percent_count_items_and_mean, - {"return_val_name": "mean_score"}, []), + 28: (self._calculate_percent_count_items_and_mean, {"return_val_name": "mean_score"}, []), 29: (self._calculate_similarity_percent, {"field_name": "message_params"}, []), 34: (self._calculate_similarity_percent, {"field_name": "found_exceptions"}, []), 35: (self._is_all_log_lines, {}, []), 36: (self._calculate_similarity_percent, {"field_name": "detected_message_extended"}, []), - 37: (self._calculate_similarity_percent, - {"field_name": "detected_message_without_params_extended"}, []), + 37: (self._calculate_similarity_percent, {"field_name": "detected_message_without_params_extended"}, []), 38: (self._calculate_similarity_percent, {"field_name": "stacktrace_extended"}, []), 40: (self._calculate_similarity_percent, {"field_name": "message_without_params_extended"}, []), 41: (self._calculate_similarity_percent, {"field_name": "message_extended"}, []), @@ -87,32 +88,16 @@ def __init__(self, all_results, config, feature_ids, weighted_log_similarity_cal 50: (self.is_text_of_particular_defect_type, {"label_type": "si"}, []), 51: (self.predict_particular_defect_type, {}, []), 52: (self._calculate_similarity_percent, {"field_name": "namespaces_stacktrace"}, []), - 53: (self._calculate_similarity_percent, - {"field_name": "detected_message_without_params_and_brackets"}, []), - 55: (self._calculate_similarity_percent, - {"field_name": "potential_status_codes"}, []), + 53: (self._calculate_similarity_percent, {"field_name": "detected_message_without_params_and_brackets"}, + []), + 55: (self._calculate_similarity_percent, {"field_name": "potential_status_codes"}, []), 56: (self.is_the_same_launch, {}, []), 57: (self.is_the_same_launch_id, {}, []), - 58: (self._calculate_model_probability, - {"model_folder": self.config["boosting_model"]}, - self.get_necessary_features(self.config["boosting_model"])), 59: (self._calculate_similarity_percent, {"field_name": "found_tests_and_methods"}, []), 61: (self._calculate_similarity_percent, {"field_name": "test_item_name"}, []), 64: (self._calculate_decay_function_score, {"field_name": "start_time"}, []), 65: (self._calculate_test_item_logs_similar_percent, {}, []), - 66: (self._count_test_item_logs, {}, []), - 67: (self._encode_into_vector, - {"field_name": "launch_name", "feature_name": 67, "only_query": True}, []), - 68: (self._encode_into_vector, - {"field_name": "detected_message", "feature_name": 68, "only_query": False}, []), - 69: (self._encode_into_vector, - {"field_name": "stacktrace", "feature_name": 69, "only_query": False}, []), - 70: (self._encode_into_vector, - {"field_name": "launch_name", "feature_name": 70, "only_query": True}, []), - 71: (self._encode_into_vector, - {"field_name": "test_item_name", "feature_name": 71, "only_query": False}, []), - 72: (self._encode_into_vector, - {"field_name": "found_exceptions", "feature_name": 72, "only_query": True}, []) + 66: (self._count_test_item_logs, {}, []) } fields_to_calc_similarity = self.find_columns_to_find_similarities_for() @@ -120,32 +105,25 @@ def __init__(self, all_results, config, feature_ids, weighted_log_similarity_cal if "filter_min_should_match" in self.config and len(self.config["filter_min_should_match"]) > 0: self.similarity_calculator.find_similarity( - all_results, - self.config["filter_min_should_match"] + ["merged_small_logs"]) + all_results, self.config["filter_min_should_match"] + ["merged_small_logs"]) for field in self.config["filter_min_should_match"]: all_results = self.filter_by_min_should_match(all_results, field=field) - if "filter_min_should_match_any" in self.config and \ - len(self.config["filter_min_should_match_any"]) > 0: + if "filter_min_should_match_any" in self.config and len(self.config["filter_min_should_match_any"]) > 0: self.similarity_calculator.find_similarity( - all_results, - self.config["filter_min_should_match_any"] + ["merged_small_logs"]) + all_results, self.config["filter_min_should_match_any"] + ["merged_small_logs"]) all_results = self.filter_by_min_should_match_any( - all_results, - fields=self.config["filter_min_should_match_any"]) + all_results, fields=self.config["filter_min_should_match_any"]) self.test_item_log_stats = self._calculate_stats_by_test_item_ids(all_results) if "filter_by_all_logs_should_be_similar" in self.config: if self.config["filter_by_all_logs_should_be_similar"]: all_results = self.filter_by_all_logs_should_be_similar(all_results) - if "filter_by_test_case_hash" in self.config \ - and self.config["filter_by_test_case_hash"]: + if "filter_by_test_case_hash" in self.config and self.config["filter_by_test_case_hash"]: all_results = self.filter_by_test_case_hash(all_results) if "calculate_similarities" not in self.config or self.config["calculate_similarities"]: - self.similarity_calculator.find_similarity( - all_results, - fields_to_calc_similarity) + self.similarity_calculator.find_similarity(all_results, fields_to_calc_similarity) self.raw_results = all_results self.all_results = self.normalize_results(all_results) - self.scores_by_issue_type = None + self.scores_by_type = None self.defect_type_predict_model = None self.used_model_info = set() self.features_to_recalculate_always = set([51, 58] + list(range(67, 74))) @@ -160,8 +138,8 @@ def _count_test_item_logs(self): def _calculate_test_item_logs_similar_percent(self): scores_by_issue_type = self.find_most_relevant_by_type() sim_logs_num_scores = {} - for issue_type in scores_by_issue_type: - test_item_id = scores_by_issue_type[issue_type]["mrHit"]["_source"]["test_item"] + for issue_type, search_rs in scores_by_issue_type.items(): + test_item_id = search_rs["mrHit"]["_source"]["test_item"] sim_logs_num_scores[issue_type] = 0.0 if test_item_id in self.test_item_log_stats: sim_logs_num_scores[issue_type] = self.test_item_log_stats[test_item_id] @@ -180,7 +158,8 @@ def _calculate_stats_by_test_item_ids(self, all_results): test_item_log_stats[test_item_id] /= all_logs return test_item_log_stats - def _perform_additional_text_processing(self, all_results): + @staticmethod + def _perform_additional_text_processing(all_results): for log, res in all_results: for r in res["hits"]["hits"]: if "found_tests_and_methods" in r["_source"]: @@ -189,97 +168,36 @@ def _perform_additional_text_processing(self, all_results): return all_results def _calculate_decay_function_score(self, field_name): + decay_speed = np.log(self.config["time_weight_decay"]) scores_by_issue_type = self.find_most_relevant_by_type() dates_by_issue_types = {} - for issue_type in scores_by_issue_type: - field_date = scores_by_issue_type[issue_type]["mrHit"]["_source"][field_name] + for issue_type, search_rs in scores_by_issue_type.items(): + field_date = search_rs["mrHit"]["_source"][field_name] field_date = datetime.strptime(field_date, '%Y-%m-%d %H:%M:%S') - compared_field_date = scores_by_issue_type[issue_type]["compared_log"]["_source"][field_name] + compared_field_date = search_rs["compared_log"]["_source"][field_name] compared_field_date = datetime.strptime(compared_field_date, '%Y-%m-%d %H:%M:%S') if compared_field_date < field_date: field_date, compared_field_date = compared_field_date, field_date - dates_by_issue_types[issue_type] = np.exp( - np.log(self.config["time_weight_decay"]) * (compared_field_date - field_date).days / 7) + dates_by_issue_types[issue_type] = np.exp(decay_speed * (compared_field_date - field_date).days / 7) return dates_by_issue_types - def _encode_into_vector(self, field_name, feature_name, only_query): - if feature_name not in self.features_dict_with_saved_objects: - logger.error(self.features_dict_with_saved_objects) - logger.error("Feature '%s' has no encoder" % feature_name) - return [] - if field_name != self.features_dict_with_saved_objects[feature_name].field_name: - logger.error(field_name) - logger.error("Field name '%s' is not the same as in the settings '%s'" % ( - field_name, self.features_dict_with_saved_objects[feature_name].field_name)) - return [] - scores_by_issue_type = self.find_most_relevant_by_type() - encodings_by_issue_type = {} - issue_types, gathered_data = [], [] - for issue_type in scores_by_issue_type: - field_data = scores_by_issue_type[issue_type]["compared_log"]["_source"][field_name] - issue_types.append(issue_type) - gathered_data.append(field_data) - if not only_query: - gathered_data.append( - scores_by_issue_type[issue_type]["mrHit"]["_source"][field_name]) - if gathered_data: - encoded_data = self.features_dict_with_saved_objects[feature_name].transform( - gathered_data).toarray() - encoded_data[encoded_data != 0.0] = 1.0 - for idx in range(len(issue_types)): - if only_query: - encodings_by_issue_type[issue_types[idx]] = list(encoded_data[idx]) - else: - encodings_by_issue_type[issue_types[idx]] = list( - (encoded_data[2 * idx] + encoded_data[2 * idx + 1]) / 2) - return encodings_by_issue_type - - def _calculate_model_probability(self, model_folder=""): - if not model_folder.strip(): - return [] - if model_folder not in self.models: - logger.error("Model folder is not found: '%s'", model_folder) - return [] - feature_ids = self.models[model_folder].get_feature_ids() - feature_data = utils.gather_feature_list(self.previously_gathered_features, feature_ids, to_list=True) - predicted_labels, predicted_labels_probability = self.models[model_folder].predict( - feature_data) - predicted_probability = [] - for res in predicted_labels_probability: - predicted_probability.append(float(res[1])) - return [[round(r, 2)] for r in predicted_probability] - - def get_necessary_features(self, model_folder): - if not model_folder.strip(): - return [] - if model_folder not in self.models: - try: - self.models[model_folder] = BoostingDecisionMaker(folder=model_folder) - return self.models[model_folder].get_feature_ids() - except Exception as err: - logger.debug(err) - return [] - return self.models[model_folder].get_feature_ids() - - def fill_prevously_gathered_features(self, feature_list, feature_ids): - self.previously_gathered_features = utils.fill_prevously_gathered_features( - feature_list, feature_ids) + def fill_previously_gathered_features(self, feature_list: list[list[float]], feature_ids: list[int]) -> None: + self.previously_gathered_features = utils.fill_previously_gathered_features(feature_list, feature_ids) def get_used_model_info(self): return list(self.used_model_info) - def set_defect_type_model(self, defect_type_model): + def set_defect_type_model(self, defect_type_model: DefectTypeModel): self.defect_type_predict_model = defect_type_model def predict_particular_defect_type(self): scores_by_issue_type = self.find_most_relevant_by_type() result = {} - for issue_type in scores_by_issue_type: - compared_log = scores_by_issue_type[issue_type]["compared_log"] - det_message = compared_log["_source"]["detected_message_without_params_extended"] - mr_hit = scores_by_issue_type[issue_type]["mrHit"] + for issue_type, search_rs in scores_by_issue_type.items(): + compared_log = search_rs["compared_log"] + det_message = compared_log["_source"][DATA_FIELD] + mr_hit = search_rs["mrHit"] issue_type_to_compare = mr_hit["_source"]["issue_type"] - det_message = text_processing.clean_from_brackets(det_message) result[issue_type] = 0.0 try: model_to_use = issue_type_to_compare.lower()[:2] @@ -292,14 +210,14 @@ def predict_particular_defect_type(self): result[issue_type] = res_prob[0][1] if len(res_prob[0]) == 2 else 0.0 self.used_model_info.update(self.defect_type_predict_model.get_model_info()) except Exception as err: - logger.error(err) + logger.exception(err) return result def is_text_of_particular_defect_type(self, label_type): scores_by_issue_type = self.find_most_relevant_by_type() issue_type_stats = {} - for issue_type in scores_by_issue_type: - mr_hit = scores_by_issue_type[issue_type]["mrHit"] + for issue_type, search_rs in scores_by_issue_type.items(): + mr_hit = search_rs["mrHit"] rel_item_issue_type = mr_hit["_source"]["issue_type"] issue_type_stats[issue_type] = int(label_type == rel_item_issue_type.lower()[:2]) return issue_type_stats @@ -347,9 +265,9 @@ def filter_by_test_case_hash(self, all_results): def is_the_same_field(self, field_name: str) -> dict[str, int]: scores_by_issue_type = self.find_most_relevant_by_type() num_of_logs_issue_type = {} - for issue_type in scores_by_issue_type: - rel_item_value = scores_by_issue_type[issue_type]["mrHit"]["_source"][field_name] - queried_item_value = scores_by_issue_type[issue_type]["compared_log"]["_source"][field_name] + for issue_type, search_rs in scores_by_issue_type.items(): + rel_item_value = search_rs["mrHit"]["_source"][field_name] + queried_item_value = search_rs["compared_log"]["_source"][field_name] if rel_item_value is None and queried_item_value is None: num_of_logs_issue_type[issue_type] = 0 @@ -380,10 +298,9 @@ def has_the_same_test_case_in_all_results(self): scores_by_issue_type = self.find_most_relevant_by_type() num_of_logs_issue_type = {} has_the_same_test_case = 0 - for issue_type in scores_by_issue_type: - rel_item_test_case_hash = scores_by_issue_type[issue_type]["mrHit"]["_source"]["test_case_hash"] - queried_item_test_case_hash = \ - scores_by_issue_type[issue_type]["compared_log"]["_source"]["test_case_hash"] + for search_rs in scores_by_issue_type.values(): + rel_item_test_case_hash = search_rs["mrHit"]["_source"]["test_case_hash"] + queried_item_test_case_hash = search_rs["compared_log"]["_source"]["test_case_hash"] if not rel_item_test_case_hash: continue if rel_item_test_case_hash == queried_item_test_case_hash: @@ -393,12 +310,12 @@ def has_the_same_test_case_in_all_results(self): num_of_logs_issue_type[issue_type] = has_the_same_test_case return num_of_logs_issue_type - def find_columns_to_find_similarities_for(self): + def find_columns_to_find_similarities_for(self) -> list[str]: fields_to_calc_similarity = set() for feature in self.feature_ids: method_params = self.feature_functions[feature] - if "field_name" in method_params[1]: - fields_to_calc_similarity.add(method_params[1]["field_name"]) + if 'field_name' in method_params[1]: + fields_to_calc_similarity.add(method_params[1]['field_name']) return list(fields_to_calc_similarity) def _is_all_log_lines(self): @@ -411,9 +328,8 @@ def _is_all_log_lines(self): def is_only_merged_small_logs(self): scores_by_issue_type = self.find_most_relevant_by_type() similarity_percent_by_type = {} - for issue_type in scores_by_issue_type: - group_id = (scores_by_issue_type[issue_type]["mrHit"]["_id"], - scores_by_issue_type[issue_type]["compared_log"]["_id"]) + for issue_type, search_rs in scores_by_issue_type.items(): + group_id = (search_rs["mrHit"]["_id"], search_rs["compared_log"]["_id"]) sim_obj = self.similarity_calculator.similarity_dict["message"][group_id] similarity_percent_by_type[issue_type] = int(sim_obj["both_empty"]) return similarity_percent_by_type @@ -434,7 +350,7 @@ def filter_by_min_should_match(self, all_results, field="message"): new_results.append((log, {"hits": {"hits": new_elastic_res}})) return new_results - def filter_by_min_should_match_any(self, all_results, fields=["detected_message"]): + def filter_by_min_should_match_any(self, all_results, fields: list[str]): if not fields: return all_results new_results = [] @@ -455,68 +371,55 @@ def filter_by_min_should_match_any(self, all_results, fields=["detected_message" new_results.append((log, {"hits": {"hits": new_elastic_res}})) return new_results - def _calculate_percent_issue_types(self): + def _calculate_percent_issue_types(self) -> dict[str, float]: scores_by_issue_type = self._calculate_score() percent_by_issue_type = {} for issue_type in scores_by_issue_type: - percent_by_issue_type[issue_type] = 1 / len(scores_by_issue_type) \ - if len(scores_by_issue_type) else 0 + percent_by_issue_type[issue_type] = 1 / len(scores_by_issue_type) if len(scores_by_issue_type) else 0 return percent_by_issue_type def _has_test_item_several_logs(self): scores_by_issue_type = self.find_most_relevant_by_type() has_several_logs_by_type = {} - for issue_type in scores_by_issue_type: - merged_small_logs = \ - scores_by_issue_type[issue_type]["mrHit"]["_source"]["merged_small_logs"] + for issue_type, search_rs in scores_by_issue_type.items(): + merged_small_logs = search_rs["mrHit"]["_source"]["merged_small_logs"] has_several_logs_by_type[issue_type] = int(merged_small_logs.strip() != "") return has_several_logs_by_type def _has_query_several_logs(self): scores_by_issue_type = self.find_most_relevant_by_type() has_several_logs_by_type = {} - for issue_type in scores_by_issue_type: - merged_small_logs = \ - scores_by_issue_type[issue_type]["compared_log"]["_source"]["merged_small_logs"] + for issue_type, search_rs in scores_by_issue_type.items(): + merged_small_logs = search_rs["compared_log"]["_source"]["merged_small_logs"] has_several_logs_by_type[issue_type] = int(merged_small_logs.strip() != "") return has_several_logs_by_type - def find_most_relevant_by_type(self): - if self.scores_by_issue_type is not None: - return self.scores_by_issue_type - self.scores_by_issue_type = {} + def find_most_relevant_by_type(self) -> dict[str, dict[str, Any]]: + if self.scores_by_type is not None: + return self.scores_by_type + + scores_by_issue_type = defaultdict(lambda: {'mrHit': {'_score': -1}, 'score': 0}) for log, es_results in self.all_results: for idx, hit in enumerate(es_results): - issue_type = hit["_source"]["issue_type"] - hit["es_pos"] = idx + issue_type = hit['_source']['issue_type'] + hit['es_pos'] = idx - if issue_type not in self.scores_by_issue_type: - self.scores_by_issue_type[issue_type] = { - "mrHit": hit, - "compared_log": log, - "score": 0} - - issue_type_item = self.scores_by_issue_type[issue_type] - if hit["_score"] > issue_type_item["mrHit"]["_score"]: - self.scores_by_issue_type[issue_type]["mrHit"] = hit - self.scores_by_issue_type[issue_type]["compared_log"] = log - - for idx, hit in enumerate(es_results): - issue_type = hit["_source"]["issue_type"] - self.scores_by_issue_type[issue_type]["score"] += \ - (hit["normalized_score"] / self.total_normalized) - return self.scores_by_issue_type + issue_type_item = scores_by_issue_type[issue_type] + if hit['_score'] > issue_type_item['mrHit']['_score']: + issue_type_item['mrHit'] = hit + issue_type_item['compared_log'] = log + issue_type_item['score'] += (hit['normalized_score'] / self.total_normalized) + self.scores_by_type = dict(scores_by_issue_type) + return self.scores_by_type def _calculate_score(self): scores_by_issue_type = self.find_most_relevant_by_type() - return {item: scores_by_issue_type[item]["score"] for item in scores_by_issue_type} + return {item: search_rs['score'] for item, search_rs in scores_by_issue_type.items()} def _calculate_place(self): scores_by_issue_type = self._calculate_score() place_by_issue_type = {} - for idx, issue_type_item in enumerate(sorted(scores_by_issue_type.items(), - key=lambda x: x[1], - reverse=True)): + for idx, issue_type_item in enumerate(sorted(scores_by_issue_type.items(), key=lambda x: x[1], reverse=True)): place_by_issue_type[issue_type_item[0]] = 1 / (1 + idx) return place_by_issue_type @@ -526,13 +429,11 @@ def _calculate_max_score_and_pos(self, return_val_name="max_score"): for idx, hit in enumerate(es_results): issue_type = hit["_source"]["issue_type"] - if issue_type not in max_scores_by_issue_type or \ - hit["normalized_score"] > max_scores_by_issue_type[issue_type]["max_score"]: + if issue_type not in max_scores_by_issue_type \ + or hit["normalized_score"] > max_scores_by_issue_type[issue_type]["max_score"]: max_scores_by_issue_type[issue_type] = {"max_score": hit["normalized_score"], "max_score_pos": 1 / (1 + idx), } - - return {item: max_scores_by_issue_type[item][return_val_name] - for item in max_scores_by_issue_type} + return {item: results[return_val_name] for item, results in max_scores_by_issue_type.items()} def _calculate_min_score_and_pos(self, return_val_name="min_score"): min_scores_by_issue_type = {} @@ -540,36 +441,32 @@ def _calculate_min_score_and_pos(self, return_val_name="min_score"): for idx, hit in enumerate(es_results): issue_type = hit["_source"]["issue_type"] - if issue_type not in min_scores_by_issue_type or \ - hit["normalized_score"] < min_scores_by_issue_type[issue_type]["min_score"]: + if issue_type not in min_scores_by_issue_type \ + or hit["normalized_score"] < min_scores_by_issue_type[issue_type]["min_score"]: min_scores_by_issue_type[issue_type] = {"min_score": hit["normalized_score"], "min_score_pos": 1 / (1 + idx), } + return {item: results[return_val_name] for item, results in min_scores_by_issue_type.items()} - return {item: min_scores_by_issue_type[item][return_val_name] - for item in min_scores_by_issue_type} + def _calculate_percent_count_items_and_mean(self, return_val_name: str = 'mean_score') -> dict[str, float]: + """Calculate percent of items by issue type and mean score of this issue type. - def _calculate_percent_count_items_and_mean(self, return_val_name="mean_score", scaled=False): - cnt_items_by_issue_type = {} + :param str return_val_name: name of return value, can be 'mean_score' or 'cnt_items_percent' + :return: dict with issue type as key and value as mean score or percent of items + """ + cnt_items_by_issue_type: dict[str, dict[str: int]] = defaultdict(lambda: defaultdict(lambda: 0)) cnt_items_glob = 0 for log, es_results in self.all_results: cnt_items_glob += len(es_results) for idx, hit in enumerate(es_results): - issue_type = hit["_source"]["issue_type"] - - if issue_type not in cnt_items_by_issue_type: - cnt_items_by_issue_type[issue_type] = {"mean_score": 0, - "cnt_items_percent": 0, } - - cnt_items_by_issue_type[issue_type]["cnt_items_percent"] += 1 - cnt_items_by_issue_type[issue_type]["mean_score"] += hit["normalized_score"] + issue_type = hit['_source']['issue_type'] + cnt_items_by_issue_type[issue_type]['cnt_items_percent'] += 1 + cnt_items_by_issue_type[issue_type]['mean_score'] += hit['normalized_score'] - for issue_type in cnt_items_by_issue_type: - cnt_items_by_issue_type[issue_type]["mean_score"] /= \ - cnt_items_by_issue_type[issue_type]["cnt_items_percent"] - cnt_items_by_issue_type[issue_type]["cnt_items_percent"] /= cnt_items_glob - return {item: cnt_items_by_issue_type[item][return_val_name] - for item in cnt_items_by_issue_type} + for issue_scores in cnt_items_by_issue_type.values(): + issue_scores['mean_score'] /= issue_scores['cnt_items_percent'] + issue_scores['cnt_items_percent'] /= cnt_items_glob + return {item: results[return_val_name] for item, results in cnt_items_by_issue_type.items()} def normalize_results(self, all_elastic_results): all_results = [] @@ -582,26 +479,22 @@ def normalize_results(self, all_elastic_results): for hit in es_results["hits"]["hits"]: hit["normalized_score"] = hit["_score"] / max_score self.total_normalized += hit["normalized_score"] - all_results.append((log, es_results["hits"]["hits"])) return all_results def _calculate_similarity_percent(self, field_name="message"): scores_by_issue_type = self.find_most_relevant_by_type() if field_name not in self.similarity_calculator.similarity_dict: - self.similarity_calculator.find_similarity( - self.raw_results, - [field_name]) + self.similarity_calculator.find_similarity(self.raw_results, [field_name]) similarity_percent_by_type = {} - for issue_type in scores_by_issue_type: - group_id = (scores_by_issue_type[issue_type]["mrHit"]["_id"], - scores_by_issue_type[issue_type]["compared_log"]["_id"]) + for issue_type, search_rs in scores_by_issue_type.items(): + group_id = (search_rs["mrHit"]["_id"], search_rs["compared_log"]["_id"]) sim_obj = self.similarity_calculator.similarity_dict[field_name][group_id] similarity_percent_by_type[issue_type] = sim_obj["similarity"] return similarity_percent_by_type - def get_ordered_features_to_process(self): - feature_graph = {} + def get_ordered_features_to_process(self) -> list[int]: + feature_graph: dict[int, list[int]] = {} features_queue = deque(self.feature_ids.copy()) while features_queue: cur_feature = features_queue.popleft() @@ -613,22 +506,20 @@ def get_ordered_features_to_process(self): ordered_features = utils.topological_sort(feature_graph) return ordered_features - @utils.ignore_warnings - def gather_features_info(self): + def gather_features_info(self) -> tuple[list[list[float]], list[str]]: """Gather all features from feature_ids for a test item""" gathered_data = [] - gathered_data_dict = {} - issue_type_names = [] - issue_type_by_index = {} + gathered_data_dict: dict[int, list[list[float]]] = {} + issue_type_names: list[str] = [] + issue_type_by_index: dict[int, str] = {} try: - issue_types = self.find_most_relevant_by_type() - for idx, issue_type in enumerate(issue_types): + scores_by_types = self.find_most_relevant_by_type() + for idx, issue_type in enumerate(scores_by_types): issue_type_by_index[idx] = issue_type issue_type_names.append(issue_type) for feature in self.get_ordered_features_to_process(): - if feature in self.previously_gathered_features and \ - feature not in self.features_to_recalculate_always: + if feature in self.previously_gathered_features and feature not in self.features_to_recalculate_always: gathered_data_dict[feature] = self.previously_gathered_features[feature] else: func, args, _ = self.feature_functions[feature] @@ -645,8 +536,8 @@ def gather_features_info(self): except: # noqa gathered_data_dict[feature].append([round(result[issue_type], 2)]) self.previously_gathered_features[feature] = gathered_data_dict[feature] - gathered_data = utils.gather_feature_list(gathered_data_dict, self.feature_ids, to_list=True) + gathered_data = utils.gather_feature_list(gathered_data_dict, self.feature_ids) except Exception as err: logger.error("Errors in boosting features calculation") - logger.error(err) + logger.exception(err) return gathered_data, issue_type_names diff --git a/app/machine_learning/models/__init__.py b/app/machine_learning/models/__init__.py new file mode 100644 index 00000000..a221ea1e --- /dev/null +++ b/app/machine_learning/models/__init__.py @@ -0,0 +1,31 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Common package for ML models.""" + +from app.machine_learning.models.ml_model import MlModel +from app.machine_learning.models.defect_type_model import DefectTypeModel +from app.machine_learning.models.custom_defect_type_model import CustomDefectTypeModel +from app.machine_learning.models.boosting_decision_maker import BoostingDecisionMaker +from app.machine_learning.models.custom_boosting_decision_maker import CustomBoostingDecisionMaker +from app.machine_learning.models.weighted_similarity_calculator import WeightedSimilarityCalculator + +__all__ = [ + 'MlModel', + 'DefectTypeModel', + 'CustomDefectTypeModel', + 'BoostingDecisionMaker', + 'CustomBoostingDecisionMaker', + 'WeightedSimilarityCalculator' +] diff --git a/app/machine_learning/models/boosting_decision_maker.py b/app/machine_learning/models/boosting_decision_maker.py new file mode 100644 index 00000000..b434e726 --- /dev/null +++ b/app/machine_learning/models/boosting_decision_maker.py @@ -0,0 +1,112 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +from typing import Any, Optional + +from sklearn.metrics import classification_report, confusion_matrix, f1_score +from xgboost import XGBClassifier + +from app.commons import logging +from app.commons.object_saving.object_saver import ObjectSaver +from app.machine_learning.models import MlModel +from app.utils import text_processing + +LOGGER = logging.getLogger("analyzerApp.boosting_decision_maker") + +MODEL_FILES: list[str] = ['boost_model.pickle', 'data_features_config.pickle'] +DEFAULT_RANDOM_STATE = 43 +DEFAULT_N_ESTIMATORS = 75 +DEFAULT_MAX_DEPTH = 5 + + +class BoostingDecisionMaker(MlModel): + _loaded: bool + n_estimators: int + max_depth: int + random_state: int + feature_ids: list[int] + monotonous_features: set[int] + boost_model: Any + + def __init__(self, object_saver: ObjectSaver, tags: str = 'global boosting model', *, + features: Optional[list[int]] = None, monotonous_features: Optional[list[int]] = None, + n_estimators: Optional[int] = None, max_depth: Optional[int] = None, + random_state: Optional[int] = None) -> None: + super().__init__(object_saver, tags) + self.n_estimators = n_estimators if n_estimators is not None else DEFAULT_N_ESTIMATORS + self.max_depth = max_depth if max_depth is not None else DEFAULT_MAX_DEPTH + self.random_state = random_state if random_state is not None else DEFAULT_RANDOM_STATE + self.boost_model = XGBClassifier( + n_estimators=n_estimators, max_depth=max_depth, random_state=self.random_state) + self.feature_ids = features if features else [] + self.monotonous_features = set(monotonous_features) if monotonous_features else {} + self._loaded = False + + @property + def loaded(self) -> bool: + return self._loaded + + def load_model(self) -> None: + if self.loaded: + return + boost_model, features_config = self._load_models(MODEL_FILES) + if len(boost_model) > 3: + # New model format + self.n_estimators, self.max_depth, self.random_state, self.boost_model = boost_model + self.feature_ids, self.monotonous_features = features_config + else: + # Old model format + self.n_estimators, self.max_depth, self.boost_model = boost_model + self.random_state = DEFAULT_RANDOM_STATE + _, features, self.monotonous_features = features_config + self.feature_ids = text_processing.transform_string_feature_range_into_list(features) + self._loaded = True + + def save_model(self): + self._save_models(zip(MODEL_FILES, [[self.n_estimators, self.max_depth, self.random_state, self.boost_model], + [self.feature_ids, self.monotonous_features]])) + + def train_model(self, train_data: list[list[float]], labels: list[int]) -> float: + mon_features = [ + (1 if feature in self.monotonous_features else 0) for feature in self.feature_ids] + mon_features_prepared = "(" + ",".join([str(f) for f in mon_features]) + ")" + self.boost_model = XGBClassifier( + n_estimators=self.n_estimators, max_depth=self.max_depth, random_state=self.random_state, + monotone_constraints=mon_features_prepared) + self.boost_model.fit(train_data, labels) + self._loaded = True + res = self.boost_model.predict(train_data) + f1 = f1_score(y_pred=res, y_true=labels) + if f1 is None: + f1 = 0.0 + LOGGER.debug(f'Train dataset F1 score: {f1:.5f}') + LOGGER.debug( + 'Feature importances: %s', + json.dumps(dict(zip(self.feature_ids, self.boost_model.feature_importances_.tolist())))) + return f1 + + def predict(self, data: list[list[float]]) -> tuple[list[int], list[list[float]]]: + if not len(data): + return [], [] + return self.boost_model.predict(data).tolist(), self.boost_model.predict_proba(data).tolist() + + def validate_model(self, valid_test_set: list[list[float]], valid_test_labels: list[int]) -> float: + res, res_prob = self.predict(valid_test_set) + f1 = f1_score(y_pred=res, y_true=valid_test_labels) + if f1 is None: + f1 = 0.0 + LOGGER.debug(f'Valid dataset F1 score: {f1:.5f}') + LOGGER.debug(f'\n{confusion_matrix(valid_test_labels, res)}') + LOGGER.debug(f'\n{classification_report(valid_test_labels, res)}') + return f1 diff --git a/app/machine_learning/models/custom_boosting_decision_maker.py b/app/machine_learning/models/custom_boosting_decision_maker.py new file mode 100644 index 00000000..012c5802 --- /dev/null +++ b/app/machine_learning/models/custom_boosting_decision_maker.py @@ -0,0 +1,29 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +from app.commons.object_saving.object_saver import ObjectSaver +from app.machine_learning.models.boosting_decision_maker import BoostingDecisionMaker + + +class CustomBoostingDecisionMaker(BoostingDecisionMaker): + + def __init__(self, object_saver: ObjectSaver, *, + features: list[int], monotonous_features: Optional[list[int]] = None, + n_estimators: Optional[int] = None, max_depth: Optional[int] = None, + random_state: Optional[int] = None): + super().__init__(object_saver, 'custom boosting model', features=features, + monotonous_features=monotonous_features, n_estimators=n_estimators, + max_depth=max_depth, random_state=random_state) diff --git a/app/machine_learning/models/custom_defect_type_model.py b/app/machine_learning/models/custom_defect_type_model.py new file mode 100644 index 00000000..bf44725a --- /dev/null +++ b/app/machine_learning/models/custom_defect_type_model.py @@ -0,0 +1,27 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +from app.commons.object_saving.object_saver import ObjectSaver +from app.machine_learning.models.defect_type_model import DefectTypeModel + + +MODEL_TAG = 'custom defect type model' + + +class CustomDefectTypeModel(DefectTypeModel): + + def __init__(self, object_saver: ObjectSaver, n_estimators: Optional[int] = None): + super().__init__(object_saver, MODEL_TAG, n_estimators=n_estimators) diff --git a/app/machine_learning/models/defect_type_model.py b/app/machine_learning/models/defect_type_model.py new file mode 100644 index 00000000..aa93f92b --- /dev/null +++ b/app/machine_learning/models/defect_type_model.py @@ -0,0 +1,120 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from collections import Counter +from typing import Optional + +import pandas as pd +from sklearn.ensemble import RandomForestClassifier +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics import classification_report, confusion_matrix, f1_score + +from app.commons import logging +from app.commons.object_saving.object_saver import ObjectSaver +from app.machine_learning.models import MlModel +from app.utils import text_processing +from app.utils.defaultdict import DefaultDict + +LOGGER = logging.getLogger('analyzerApp.DefectTypeModel') +MODEL_FILES: list[str] = ['count_vectorizer_models.pickle', 'models.pickle'] +DATA_FIELD = 'detected_message_without_params_extended' +BASE_DEFECT_TYPE_PATTERN = re.compile(r'^([^_]+)_.*|^(\D+)\d+') +DEFAULT_N_ESTIMATORS = 10 + + +def get_model(self: DefaultDict, model_name: str): + m = BASE_DEFECT_TYPE_PATTERN.match(model_name) + if not m: + raise KeyError(model_name) + base_model_name = m.group(1) + if not base_model_name: + base_model_name = m.group(2) + if not base_model_name: + raise KeyError(model_name) + return self[base_model_name] + + +class DefectTypeModel(MlModel): + _loaded: bool + count_vectorizer_models: DefaultDict[str, TfidfVectorizer] + models: DefaultDict[str, RandomForestClassifier] + n_estimators: int + + def __init__(self, object_saver: ObjectSaver, tags: str = 'global defect type model', *, + n_estimators: Optional[int] = None) -> None: + super().__init__(object_saver, tags) + self._loaded = False + self.count_vectorizer_models = DefaultDict(get_model) + self.models = DefaultDict(get_model) + self.n_estimators = n_estimators if n_estimators is not None else DEFAULT_N_ESTIMATORS + + @property + def loaded(self) -> bool: + return self._loaded + + def load_model(self) -> None: + if self.loaded: + return + model = self._load_models(MODEL_FILES) + self.count_vectorizer_models = DefaultDict(get_model, **model[0]) + self.models = DefaultDict(get_model, **model[1]) + self._loaded = True + + def save_model(self): + self._save_models(zip(MODEL_FILES, [self.count_vectorizer_models, self.models])) + + def train_model(self, name: str, train_data_x: list[str], labels: list[int]) -> float: + self.count_vectorizer_models[name] = TfidfVectorizer( + binary=True, min_df=5, analyzer=text_processing.preprocess_words) + transformed_values = self.count_vectorizer_models[name].fit_transform(train_data_x) + LOGGER.debug(f'Length of train data: {len(labels)}') + LOGGER.debug(f'Train data label distribution: {Counter(labels)}') + LOGGER.debug(f'Train model name: {name}; estimators number: {self.n_estimators}') + model = RandomForestClassifier(self.n_estimators, class_weight='balanced') + x_train_values = pd.DataFrame( + transformed_values.toarray(), + columns=self.count_vectorizer_models[name].get_feature_names_out()) + model.fit(x_train_values, labels) + self.models[name] = model + self._loaded = True + res = model.predict(x_train_values) + f1 = f1_score(y_pred=res, y_true=labels) + if f1 is None: + f1 = 0.0 + LOGGER.debug(f'Train dataset F1 score: {f1:.5f}') + return f1 + + def validate_model(self, name: str, test_data_x: list[str], labels: list[int]) -> float: + assert name in self.models + LOGGER.debug(f'Validation data label distribution: {Counter(labels)}') + LOGGER.debug(f'Validation model name: {name}') + res, res_prob = self.predict(test_data_x, name) + f1 = f1_score(y_pred=res, y_true=labels) + if f1 is None: + f1 = 0.0 + LOGGER.debug(f'Valid dataset F1 score: {f1:.5f}') + LOGGER.debug(f'\n{confusion_matrix(y_pred=res, y_true=labels)}') + LOGGER.debug(f'\n{classification_report(y_pred=res, y_true=labels)}') + return f1 + + def predict(self, data: list, model_name: str) -> tuple[list, list]: + if len(data) == 0: + return [], [] + transformed_values = self.count_vectorizer_models[model_name].transform(data) + x_test_values = pd.DataFrame( + transformed_values.toarray(), columns=self.count_vectorizer_models[model_name].get_feature_names_out()) + predicted_labels = self.models[model_name].predict(x_test_values) + predicted_probs = self.models[model_name].predict_proba(x_test_values) + return predicted_labels, predicted_probs diff --git a/app/machine_learning/models/ml_model.py b/app/machine_learning/models/ml_model.py new file mode 100644 index 00000000..303c0f92 --- /dev/null +++ b/app/machine_learning/models/ml_model.py @@ -0,0 +1,61 @@ +# Copyright 2024 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import ABCMeta, abstractmethod +from typing import Any + +from app.commons.object_saving.object_saver import ObjectSaver + + +class MlModel(metaclass=ABCMeta): + """Base class for ML models.""" + tags: list[str] + object_saver: ObjectSaver + + def __init__(self, object_saver: ObjectSaver, tags: str) -> None: + self.tags = [tag.strip() for tag in tags.split(',')] + self.object_saver = object_saver + + def _load_models(self, model_files: list[str]) -> list[Any]: + result = [] + for file in model_files: + model = self.object_saver.get_project_object(file, using_json=False) + if model is None: + raise ValueError(f'Unable to load model "{file}".') + result.append(model) + return result + + def _save_models(self, data: dict[str, Any] | list[tuple[str, Any]]) -> None: + for file_name, object_to_save in dict(data).items(): + self.object_saver.put_project_object(object_to_save, file_name, using_json=False) + + def get_model_info(self) -> list[str]: + folder_name = self.object_saver.path.strip("/").strip("\\").strip() + tags = self.tags + if folder_name: + tags = [folder_name] + self.tags + return tags + + @abstractmethod + def load_model(self) -> None: + raise NotImplementedError('"load_model" method is not implemented!') + + @abstractmethod + def save_model(self) -> None: + raise NotImplementedError('"save_model" method is not implemented!') + + @property + @abstractmethod + def loaded(self) -> bool: + raise NotImplementedError('"loaded" property is not implemented!') diff --git a/app/machine_learning/models/weighted_similarity_calculator.py b/app/machine_learning/models/weighted_similarity_calculator.py new file mode 100644 index 00000000..dd0ef0a9 --- /dev/null +++ b/app/machine_learning/models/weighted_similarity_calculator.py @@ -0,0 +1,80 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import numpy as np + +from app.commons.object_saving import ObjectSaver +from app.machine_learning.models import MlModel +from app.utils import text_processing + +MODEL_FILES: list[str] = ['weights.pickle'] + + +class WeightedSimilarityCalculator(MlModel): + _loaded: bool + block_to_split: int + min_log_number_in_block: int + weights: np.ndarray + softmax_weights: np.ndarray + + def __init__(self, object_saver: ObjectSaver, block_to_split: int = 10, min_log_number_in_block: int = 1): + super().__init__(object_saver, 'global similarity model') + self.block_to_split = block_to_split + self.min_log_number_in_block = min_log_number_in_block + self.weights = np.array([]) + self.softmax_weights = np.array([]) + self._loaded = False + + @property + def loaded(self) -> bool: + return self._loaded + + def load_model(self) -> None: + if self.loaded: + return + weights = self._load_models(MODEL_FILES)[0] + self.block_to_split, self.min_log_number_in_block, self.weights, self.softmax_weights = weights + self._loaded = True + + def save_model(self) -> None: + self._save_models(zip( + MODEL_FILES, + [[self.block_to_split, self.min_log_number_in_block, self.weights, self.softmax_weights]])) + + def message_to_array(self, detected_message_res: str, stacktrace_res: str) -> list[str]: + all_lines = [" ".join(text_processing.split_words(detected_message_res))] + split_log_lines = text_processing.filter_empty_lines( + [" ".join(text_processing.split_words(line)) for line in stacktrace_res.split("\n")]) + split_log_lines_num = len(split_log_lines) + data_in_block = max(self.min_log_number_in_block, + math.ceil(split_log_lines_num / self.block_to_split)) + blocks_num = math.ceil(split_log_lines_num / data_in_block) + + for block in range(blocks_num): + all_lines.append('\n'.join( + split_log_lines[block * data_in_block: (block + 1) * data_in_block])) + if len([line for line in all_lines if line.strip()]) == 0: + return [] + return all_lines + + def weigh_data_rows(self, data_rows: np.ndarray, use_softmax: bool = False): + padded_data_rows = np.concatenate( + [data_rows, np.zeros((max(0, self.block_to_split + 1 - len(data_rows)), data_rows.shape[1]))], axis=0) + if use_softmax: + result = np.dot(np.reshape(self.softmax_weights, [-1]), padded_data_rows) + else: + result = np.dot(np.reshape(self.weights, [-1]), padded_data_rows) + return np.clip(result, a_min=0, a_max=1) diff --git a/app/boosting_decision_making/suggest_boosting_featurizer.py b/app/machine_learning/suggest_boosting_featurizer.py similarity index 51% rename from app/boosting_decision_making/suggest_boosting_featurizer.py rename to app/machine_learning/suggest_boosting_featurizer.py index 581efb58..b574c899 100644 --- a/app/boosting_decision_making/suggest_boosting_featurizer.py +++ b/app/machine_learning/suggest_boosting_featurizer.py @@ -12,62 +12,55 @@ # See the License for the specific language governing permissions and # limitations under the License. -from app.boosting_decision_making import boosting_featurizer +from collections import defaultdict +from typing import Any + +from app.machine_learning import boosting_featurizer +from app.machine_learning.models import WeightedSimilarityCalculator class SuggestBoostingFeaturizer(boosting_featurizer.BoostingFeaturizer): - def __init__(self, all_results, config, feature_ids, - weighted_log_similarity_calculator=None, - features_dict_with_saved_objects=None): - super().__init__(all_results, config, feature_ids, - weighted_log_similarity_calculator=weighted_log_similarity_calculator, - features_dict_with_saved_objects=features_dict_with_saved_objects) + def __init__(self, all_results, config, feature_ids: str | list[int], + weighted_log_similarity_calculator: WeightedSimilarityCalculator = None) -> None: + super().__init__( + all_results, config, feature_ids, weighted_log_similarity_calculator=weighted_log_similarity_calculator) - def _calculate_percent_issue_types(self): + def _calculate_percent_issue_types(self) -> dict[str, float]: scores_by_issue_type = self.find_most_relevant_by_type() percent_by_issue_type = {} issue_types = set() - for test_item in scores_by_issue_type: - issue_type = scores_by_issue_type[test_item]["mrHit"]["_source"]["issue_type"] + for search_rs in scores_by_issue_type.values(): + issue_type = search_rs["mrHit"]["_source"]["issue_type"] issue_types.add(issue_type) for test_item in scores_by_issue_type: percent_by_issue_type[test_item] = 1 / len(issue_types) if len(issue_types) > 0 else 0 return percent_by_issue_type - def find_most_relevant_by_type(self): - if self.scores_by_issue_type is not None: - return self.scores_by_issue_type - self.scores_by_issue_type = {} + def find_most_relevant_by_type(self) -> dict[str, dict[str, Any]]: + if self.scores_by_type is not None: + return self.scores_by_type + scores_by_type = defaultdict(lambda: {'mrHit': {'_score': -1}, 'score': 0}) for log, es_results in self.all_results: for idx, hit in enumerate(es_results): - test_item = hit["_source"]["test_item"] - hit["es_pos"] = idx + test_item = str(hit['_source']['test_item']) + hit['es_pos'] = idx - if test_item not in self.scores_by_issue_type: - self.scores_by_issue_type[test_item] = { - "mrHit": hit, - "compared_log": log, - "score": 0} + issue_type_item = scores_by_type[test_item] + if hit['_score'] > issue_type_item['mrHit']['_score']: + issue_type_item['mrHit'] = hit + issue_type_item['compared_log'] = log - issue_type_item = self.scores_by_issue_type[test_item] - if hit["_score"] > issue_type_item["mrHit"]["_score"]: - self.scores_by_issue_type[test_item]["mrHit"] = hit - self.scores_by_issue_type[test_item]["compared_log"] = log - - for idx, hit in enumerate(es_results): - test_item = hit["_source"]["test_item"] - self.scores_by_issue_type[test_item]["score"] = max( - self.scores_by_issue_type[test_item]["score"], - hit["normalized_score"]) - return self.scores_by_issue_type + issue_type_item['score'] = max(issue_type_item['score'], hit['normalized_score']) + self.scores_by_type = dict(scores_by_type) + return self.scores_by_type def _calculate_max_score_and_pos(self, return_val_name="max_score"): max_scores_results = super()._calculate_max_score_and_pos(return_val_name=return_val_name) max_scores_by_test_item = {} test_items = self.find_most_relevant_by_type() - for test_item in test_items: - issue_type = test_items[test_item]["mrHit"]["_source"]["issue_type"] + for test_item, search_rs in test_items.items(): + issue_type = search_rs["mrHit"]["_source"]["issue_type"] max_scores_by_test_item[test_item] = max_scores_results[issue_type] return max_scores_by_test_item @@ -75,16 +68,16 @@ def _calculate_min_score_and_pos(self, return_val_name="min_score"): min_scores_results = super()._calculate_min_score_and_pos(return_val_name=return_val_name) min_scores_by_test_item = {} test_items = self.find_most_relevant_by_type() - for test_item in test_items: - issue_type = test_items[test_item]["mrHit"]["_source"]["issue_type"] + for test_item, search_rs in test_items.items(): + issue_type = search_rs["mrHit"]["_source"]["issue_type"] min_scores_by_test_item[test_item] = min_scores_results[issue_type] return min_scores_by_test_item - def _calculate_percent_count_items_and_mean(self, return_val_name="mean_score"): + def _calculate_percent_count_items_and_mean(self, return_val_name="mean_score") -> dict[str, float]: mean_scores_results = super()._calculate_percent_count_items_and_mean(return_val_name=return_val_name) mean_scores_by_test_item = {} test_items = self.find_most_relevant_by_type() - for test_item in test_items: - issue_type = test_items[test_item]["mrHit"]["_source"]["issue_type"] + for test_item, search_rs in test_items.items(): + issue_type = search_rs["mrHit"]["_source"]["issue_type"] mean_scores_by_test_item[test_item] = mean_scores_results[issue_type] return mean_scores_by_test_item diff --git a/app/boosting_decision_making/training_models/__init__.py b/app/machine_learning/training/__init__.py similarity index 92% rename from app/boosting_decision_making/training_models/__init__.py rename to app/machine_learning/training/__init__.py index f86b6d2f..180a3292 100644 --- a/app/boosting_decision_making/training_models/__init__.py +++ b/app/machine_learning/training/__init__.py @@ -11,3 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Common package for ML Model training code.""" diff --git a/app/machine_learning/training/train_analysis_model.py b/app/machine_learning/training/train_analysis_model.py new file mode 100644 index 00000000..cf1be549 --- /dev/null +++ b/app/machine_learning/training/train_analysis_model.py @@ -0,0 +1,444 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from datetime import datetime +from time import time +from typing import Optional, Any, Type + +import elasticsearch +import elasticsearch.helpers +import numpy as np +import scipy.stats as stats +from imblearn.over_sampling import BorderlineSMOTE +from sklearn.model_selection import train_test_split + +from app.commons import logging, namespace_finder, object_saving +from app.commons.esclient import EsClient +from app.commons.model.launch_objects import SearchConfig, ApplicationConfig +from app.commons.model.ml import TrainInfo, ModelType +from app.commons.model_chooser import ModelChooser +from app.machine_learning.boosting_featurizer import BoostingFeaturizer +from app.machine_learning.models import (BoostingDecisionMaker, CustomBoostingDecisionMaker, + WeightedSimilarityCalculator) +from app.machine_learning.suggest_boosting_featurizer import SuggestBoostingFeaturizer +from app.utils import utils, text_processing +from app.utils.defaultdict import DefaultDict + +LOGGER = logging.getLogger("analyzerApp.trainingAnalysisModel") +TRAIN_DATA_RANDOM_STATES = [1257, 1873, 1917, 2477, 3449, 353, 4561, 5417, 6427, 2029, 2137] +DUE_PROPORTION = 0.05 +SMOTE_PROPORTION = 0.4 +MIN_P_VALUE = 0.05 +METRIC = 'F1' + + +def deduplicate_data(data: list[list[float]], labels: list[int]) -> tuple[list[list[float]], list[int]]: + data_wo_duplicates = [] + labels_wo_duplicates = [] + data_set = set() + for i in range(len(data)): + data_tuple = tuple(data[i]) + if data_tuple not in data_set: + data_set.add(data_tuple) + data_wo_duplicates.append(data[i]) + labels_wo_duplicates.append(labels[i]) + return data_wo_duplicates, labels_wo_duplicates + + +def split_data( + data: list[list[float]], labels: list[int], random_state: int +) -> tuple[list[list[float]], list[list[float]], list[int], list[int]]: + x_ids: list[int] = [i for i in range(len(data))] + x_train_ids, x_test_ids, y_train, y_test = train_test_split( + x_ids, labels, test_size=0.1, random_state=random_state, stratify=labels) + x_train = [data[idx] for idx in x_train_ids] + x_test = [data[idx] for idx in x_test_ids] + return x_train, x_test, y_train, y_test + + +def transform_data_from_feature_lists(feature_list: list[list[float]], cur_features: list[int], + desired_features: list[int]) -> list[list[float]]: + previously_gathered_features = utils.fill_previously_gathered_features(feature_list, cur_features) + gathered_data = utils.gather_feature_list(previously_gathered_features, desired_features) + return gathered_data + + +def fill_metric_stats(baseline_model_metric_result: list[float], new_model_metric_results: list[float], + info_dict: dict[str, Any]) -> None: + _, p_value = stats.f_oneway(baseline_model_metric_result, new_model_metric_results) + p_value = p_value if p_value is not None else 1.0 + info_dict['p_value'] = p_value + mean_metric = np.mean(new_model_metric_results) + baseline_mean_metric = np.mean(baseline_model_metric_result) + info_dict['baseline_mean_metric'] = baseline_mean_metric + info_dict['new_model_mean_metric'] = mean_metric + info_dict['gather_date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + +def train_several_times( + new_model: BoostingDecisionMaker, data: list[list[float]], labels: list[int], + random_states: Optional[list[int]] = None, baseline_model: Optional[BoostingDecisionMaker] = None +) -> tuple[dict[str, list[float]], dict[str, list[float]], bool, float]: + new_model_results = DefaultDict(lambda _, __: []) + baseline_model_results = DefaultDict(lambda _, __: []) + my_random_states = random_states if random_states else TRAIN_DATA_RANDOM_STATES + + bad_data = False + proportion_binary_labels = utils.calculate_proportions_for_labels(labels) + if proportion_binary_labels < DUE_PROPORTION: + LOGGER.debug("Train data has a bad proportion: %.3f", proportion_binary_labels) + bad_data = True + + if not bad_data: + data, labels = deduplicate_data(data, labels) + for random_state in my_random_states: + x_train, x_test, y_train, y_test = split_data(data, labels, random_state) + proportion_binary_labels = utils.calculate_proportions_for_labels(y_train) + if proportion_binary_labels < SMOTE_PROPORTION: + oversample = BorderlineSMOTE(sampling_strategy=SMOTE_PROPORTION, random_state=random_state) + x_train, y_train = oversample.fit_resample(x_train, y_train) + new_model.train_model(x_train, y_train) + LOGGER.debug("New model results") + new_model_results[METRIC].append(new_model.validate_model(x_test, y_test)) + LOGGER.debug("Baseline results") + x_test_for_baseline = transform_data_from_feature_lists( + x_test, new_model.feature_ids, baseline_model.feature_ids) if baseline_model else x_test + baseline_model_results[METRIC].append(baseline_model.validate_model(x_test_for_baseline, y_test)) + return baseline_model_results, new_model_results, bad_data, proportion_binary_labels + + +class AnalysisModelTraining: + app_config: ApplicationConfig + search_cfg: SearchConfig + model_type: ModelType + model_class: Type[BoostingDecisionMaker] + baseline_folder: Optional[str] + baseline_model: Optional[BoostingDecisionMaker] + model_chooser: ModelChooser + features: list[int] + monotonous_features: list[int] + n_estimators: int + max_depth: int + + def __init__(self, app_config: ApplicationConfig, search_cfg: SearchConfig, model_type: ModelType, + model_chooser: ModelChooser, model_class: Optional[Type[BoostingDecisionMaker]] = None, + use_baseline_features: bool = True) -> None: + self.app_config = app_config + self.search_cfg = search_cfg + self.due_proportion = 0.05 + self.due_proportion_to_smote = 0.4 + self.es_client = EsClient(app_config=app_config) + self.model_type = model_type + if model_type is ModelType.suggestion: + self.baseline_folder = self.search_cfg.SuggestBoostModelFolder + self.features = text_processing.transform_string_feature_range_into_list( + self.search_cfg.SuggestBoostModelFeatures) + self.monotonous_features = text_processing.transform_string_feature_range_into_list( + self.search_cfg.SuggestBoostModelMonotonousFeatures) + self.n_estimators = self.search_cfg.SuggestBoostModelNumEstimators + self.max_depth = self.search_cfg.SuggestBoostModelMaxDepth + elif model_type is ModelType.auto_analysis: + self.baseline_folder = self.search_cfg.BoostModelFolder + self.features = text_processing.transform_string_feature_range_into_list( + self.search_cfg.AutoBoostModelFeatures) + self.monotonous_features = text_processing.transform_string_feature_range_into_list( + self.search_cfg.AutoBoostModelMonotonousFeatures) + self.n_estimators = self.search_cfg.AutoBoostModelNumEstimators + self.max_depth = self.search_cfg.AutoBoostModelMaxDepth + else: + raise ValueError(f'Incorrect model type {model_type}') + + self.model_class = model_class if model_class else CustomBoostingDecisionMaker + + if self.baseline_folder: + self.baseline_model = BoostingDecisionMaker( + object_saving.create_filesystem(self.baseline_folder)) + self.baseline_model.load_model() + # Take features from baseline model if this is retrain + if use_baseline_features: + self.features = self.baseline_model.feature_ids + self.monotonous_features = list(self.baseline_model.monotonous_features) + + if not self.features: + raise ValueError('No feature config found, please either correct values in "search_cfg" parameter') + + self.weighted_log_similarity_calculator = None + if self.search_cfg.SimilarityWeightsFolder.strip(): + self.weighted_log_similarity_calculator = WeightedSimilarityCalculator( + object_saving.create_filesystem(self.search_cfg.SimilarityWeightsFolder)) + self.weighted_log_similarity_calculator.load_model() + self.namespace_finder = namespace_finder.NamespaceFinder(app_config) + self.model_chooser = model_chooser + + def get_config_for_boosting(self, number_of_log_lines: int, namespaces) -> dict[str, Any]: + return { + "max_query_terms": self.search_cfg.MaxQueryTerms, + "min_should_match": 0.4, + "min_word_length": self.search_cfg.MinWordLength, + "filter_min_should_match": [], + "filter_min_should_match_any": [], + "number_of_log_lines": number_of_log_lines, + "filter_by_test_case_hash": False, + "boosting_model": self.baseline_folder, + "chosen_namespaces": namespaces, + "calculate_similarities": False, + "time_weight_decay": self.search_cfg.TimeWeightDecay} + + @staticmethod + def get_info_template(project_info: TrainInfo, baseline_model: str, model_name: str, + metric_name: str) -> dict[str, Any]: + return {'method': 'training', 'sub_model_type': 'all', 'model_type': project_info.model_type.name, + 'baseline_model': [baseline_model], 'new_model': [model_name], + 'project_id': str(project_info.project), 'model_saved': 0, 'p_value': 1.0, 'data_size': 0, + 'data_proportion': 0.0, 'baseline_mean_metric': 0.0, 'new_model_mean_metric': 0.0, + 'bad_data_proportion': 0, 'metric_name': metric_name, 'errors': [], 'errors_count': 0} + + def query_logs(self, project_id: int, log_ids_to_find: list[str]) -> dict[str, Any]: + log_ids_to_find = list(log_ids_to_find) + project_index_name = text_processing.unite_project_name( + str(project_id), self.app_config.esProjectIndexPrefix) + batch_size = 1000 + log_id_dict = {} + for i in range(int(len(log_ids_to_find) / batch_size) + 1): + log_ids = log_ids_to_find[i * batch_size: (i + 1) * batch_size] + if not log_ids: + continue + ids_query = { + "size": self.app_config.esChunkNumber, + "query": { + "bool": { + "filter": [ + {"terms": {"_id": log_ids}} + ] + } + }} + for r in elasticsearch.helpers.scan( + self.es_client.es_client, query=ids_query, index=project_index_name, scroll="5m"): + log_id_dict[str(r["_id"])] = r + return log_id_dict + + def get_search_query_suggest(self): + return { + "sort": {"savedDate": "desc"}, + "size": self.app_config.esChunkNumber, + "query": { + "bool": { + "must": [ + {"term": {"methodName": "suggestion"}} + ] + } + } + } + + def get_search_query_aa(self, user_choice: int) -> dict[str, Any]: + return { + "sort": {"savedDate": "desc"}, + "size": self.app_config.esChunkNumber, + "query": { + "bool": { + "must": [ + {"term": {"methodName": "auto_analysis"}}, + {"term": {"userChoice": user_choice}} + ] + } + } + } + + @staticmethod + def stop_gathering_info_from_suggest_query(num_of_1s, num_of_0s, max_num): + if (num_of_1s + num_of_0s) == 0: + return False + percent_logs = (num_of_1s + num_of_0s) / max_num + percent_1s = num_of_1s / (num_of_1s + num_of_0s) + if percent_logs >= 0.8 and percent_1s <= 0.2: + return True + return False + + def query_es_for_suggest_info(self, project_id: int) -> tuple[list[dict[str, Any]], dict[str, Any]]: + log_ids_to_find = set() + gathered_suggested_data = [] + log_id_pairs_set = set() + index_name = text_processing.unite_project_name( + str(project_id) + "_suggest", self.app_config.esProjectIndexPrefix) + max_number_of_logs = 30000 + cur_number_of_logs = 0 + cur_number_of_logs_0 = 0 + cur_number_of_logs_1 = 0 + unique_saved_features = set() + for query_name, query in [ + ("auto_analysis 0s", self.get_search_query_aa(0)), + ("suggestion", self.get_search_query_suggest()), + ("auto_analysis 1s", self.get_search_query_aa(1)) + ]: + if cur_number_of_logs >= max_number_of_logs: + break + for res in elasticsearch.helpers.scan(self.es_client.es_client, query=query, index=index_name, + scroll="5m"): + if cur_number_of_logs >= max_number_of_logs: + break + saved_model_features = f'{res["_source"]["modelFeatureNames"]}|{res["_source"]["modelFeatureValues"]}' + if saved_model_features in unique_saved_features: + continue + unique_saved_features.add(saved_model_features) + log_ids_pair = (res["_source"]["testItemLogId"], res["_source"]["relevantLogId"]) + if log_ids_pair in log_id_pairs_set: + continue + log_id_pairs_set.add(log_ids_pair) + for col in ["testItemLogId", "relevantLogId"]: + log_id = str(res["_source"][col]) + if res["_source"]["isMergedLog"]: + log_id = log_id + "_m" + log_ids_to_find.add(log_id) + gathered_suggested_data.append(res) + cur_number_of_logs += 1 + if res["_source"]["userChoice"] == 1: + cur_number_of_logs_1 += 1 + else: + cur_number_of_logs_0 += 1 + if query_name == "suggestion" and self.stop_gathering_info_from_suggest_query( + cur_number_of_logs_1, cur_number_of_logs_0, max_number_of_logs): + break + LOGGER.debug("Query: '%s', results number: %d, number of 1s: %d", + query_name, cur_number_of_logs, cur_number_of_logs_1) + log_id_dict = self.query_logs(project_id, list(log_ids_to_find)) + return gathered_suggested_data, log_id_dict + + def query_data(self, projects: list[int], features: list[int]) -> tuple[list[list[float]], list[int]]: + full_data_features, labels = [], [] + for project_id in projects: + namespaces = self.namespace_finder.get_chosen_namespaces(project_id) + gathered_suggested_data, log_id_dict = self.query_es_for_suggest_info(project_id) + + for _suggest_res in gathered_suggested_data: + searched_res = [] + found_logs = {} + for col in ["testItemLogId", "relevantLogId"]: + log_id = str(_suggest_res["_source"][col]) + if _suggest_res["_source"]["isMergedLog"]: + log_id = log_id + "_m" + if log_id in log_id_dict: + found_logs[col] = log_id_dict[log_id] + if len(found_logs) == 2: + log_relevant = found_logs["relevantLogId"] + log_relevant["_score"] = _suggest_res["_source"]["esScore"] + searched_res = [(found_logs["testItemLogId"], {"hits": {"hits": [log_relevant]}})] + if searched_res: + if self.model_type is ModelType.suggestion: + _boosting_data_gatherer = SuggestBoostingFeaturizer( + searched_res, + self.get_config_for_boosting(_suggest_res["_source"]["usedLogLines"], namespaces), + feature_ids=features, + weighted_log_similarity_calculator=self.weighted_log_similarity_calculator) + else: + _boosting_data_gatherer = BoostingFeaturizer( + searched_res, + self.get_config_for_boosting(_suggest_res["_source"]["usedLogLines"], namespaces), + feature_ids=features, + weighted_log_similarity_calculator=self.weighted_log_similarity_calculator) + + # noinspection PyTypeChecker + _boosting_data_gatherer.set_defect_type_model( + self.model_chooser.choose_model(project_id, ModelType.defect_type)) + _boosting_data_gatherer.fill_previously_gathered_features( + [utils.to_float_list(_suggest_res['_source']['modelFeatureValues'])], + [int(_id) for _id in _suggest_res['_source']['modelFeatureNames'].split(';')]) + feature_data, _ = _boosting_data_gatherer.gather_features_info() + if feature_data: + full_data_features.extend(feature_data) + labels.append(_suggest_res['_source']['userChoice']) + return full_data_features, labels + + def train_several_times( + self, new_model: BoostingDecisionMaker, data: list[list[float]], labels: list[int], + random_states: Optional[list[int]] = None + ) -> tuple[dict[str, list[float]], dict[str, list[float]], bool, float]: + return train_several_times(new_model, data, labels, random_states, self.baseline_model) + + def train(self, project_info: TrainInfo) -> tuple[int, dict[str, Any]]: + time_training = time() + model_name = f'{project_info.model_type.name}_model_{datetime.now().strftime("%Y-%m-%d")}' + baseline_model = os.path.basename(self.baseline_folder) + new_model_folder = f'{project_info.model_type.name}_model/{model_name}/' + + LOGGER.info(f'Train "{self.model_type.name}" model using class: {self.model_class}') + new_model = self.model_class( + object_saving.create(self.app_config, project_id=project_info.project, path=new_model_folder), + features=self.features, monotonous_features=self.monotonous_features, n_estimators=self.n_estimators, + max_depth=self.max_depth) + + train_log_info = DefaultDict(lambda _, k: self.get_info_template(project_info, baseline_model, model_name, k)) + + LOGGER.debug(f'Initialized model training {project_info.model_type.name}') + projects = [project_info.project] + if project_info.additional_projects: + projects.extend(project_info.additional_projects) + train_data, labels = self.query_data(projects, new_model.feature_ids) + LOGGER.debug(f'Loaded data for model training {self.model_type.name}') + + baseline_model_results, new_model_results, bad_data, data_proportion = self.train_several_times( + new_model, train_data, labels) + for metric in new_model_results: + train_log_info[metric]['data_size'] = len(labels) + train_log_info[metric]['bad_data_proportion'] = int(bad_data) + train_log_info[metric]['data_proportion'] = data_proportion + + use_custom_model = False + mean_metric_results: Optional[list[float]] = None + if not bad_data: + LOGGER.debug(f'Baseline test results {baseline_model_results}') + LOGGER.debug(f'New model test results {new_model_results}') + p_values = [] + new_metrics_better = True + for metric, metric_results in new_model_results.items(): + info_dict = train_log_info[metric] + fill_metric_stats(baseline_model_results[metric], metric_results, info_dict) + p_value = info_dict['p_value'] + p_values.append(p_value) + mean_metric = info_dict['new_model_mean_metric'] + baseline_mean_metric = info_dict['baseline_mean_metric'] + new_metrics_better = new_metrics_better and mean_metric > baseline_mean_metric and mean_metric >= 0.4 + LOGGER.info( + f'Model training validation results: p-value={p_value:.3f}; mean {metric} metric ' + f'baseline={baseline_mean_metric:.3f}; mean new model={mean_metric:.3f}.') + if mean_metric_results: + for i in range(len(metric_results)): + mean_metric_results[i] = mean_metric_results[i] * metric_results[i] + else: + mean_metric_results = metric_results.copy() + + if max(p_values) < MIN_P_VALUE and new_metrics_better: + use_custom_model = True + + if use_custom_model: + LOGGER.debug('Custom model should be saved') + max_train_result_idx = int(np.argmax(mean_metric_results)) + best_random_state = TRAIN_DATA_RANDOM_STATES[max_train_result_idx] + + LOGGER.info(f'Perform final training with random state: {best_random_state}') + self.train_several_times( + new_model, train_data, labels, [best_random_state]) + if self.model_chooser: + self.model_chooser.delete_old_model(project_info.model_type, project_info.project) + new_model.save_model() + train_log_info[METRIC]['model_saved'] = 1 + else: + train_log_info[METRIC]['model_saved'] = 0 + + time_spent = (time() - time_training) + train_log_info[METRIC]['time_spent'] = time_spent + train_log_info[METRIC]['module_version'] = [self.app_config.appVersion] + + LOGGER.info(f'Finished for {time_spent} s') + return len(train_data), train_log_info diff --git a/app/machine_learning/training/train_defect_type_model.py b/app/machine_learning/training/train_defect_type_model.py new file mode 100644 index 00000000..5edca411 --- /dev/null +++ b/app/machine_learning/training/train_defect_type_model.py @@ -0,0 +1,410 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from datetime import datetime +from time import time, sleep +from typing import Any, Optional, Type + +import elasticsearch.helpers +import numpy as np +import scipy.stats as stats +from pydantic import BaseModel +from sklearn.model_selection import train_test_split + +from app.commons import logging, object_saving +from app.commons.esclient import EsClient +from app.commons.model.launch_objects import SearchConfig, ApplicationConfig +from app.commons.model.ml import TrainInfo, ModelType +from app.commons.model_chooser import ModelChooser +from app.machine_learning.models import DefectTypeModel, CustomDefectTypeModel +from app.machine_learning.models.defect_type_model import DATA_FIELD +from app.utils import utils, text_processing +from app.utils.defaultdict import DefaultDict + +LOGGER = logging.getLogger('analyzerApp.trainingDefectTypeModel') +TRAIN_DATA_RANDOM_STATES = [1257, 1873, 1917, 2477, 3449, 353, 4561, 5417, 6427, 2029, 2137] +RETRY_COUNT = 5 +RETRY_PAUSES = [0, 1, 5, 10, 20, 40, 60] +BASE_ISSUE_CLASS_INDEXES: dict[str, int] = {'ab': 0, 'pb': 1, 'si': 2} +MINIMAL_LABEL_PROPORTION = 0.2 +TEST_DATA_PROPORTION = 0.1 +MINIMAL_DATA_LENGTH_FOR_TRAIN = 50 +MIN_P_VALUE = 0.05 + + +def return_similar_objects_into_sample(x_train_ind: list[int], y_train: list[int], + data: list[tuple[str, str, str]], additional_logs: dict[int, list[int]], + label: str): + x_train = [] + x_train_add = [] + y_train_add = [] + + for idx, ind in enumerate(x_train_ind): + x_train.append(data[ind][0]) + label_to_use = y_train[idx] + if ind in additional_logs and label_to_use != 1: + for idx_ in additional_logs[ind]: + log_res, label_res, real_label = data[idx_] + if label_res == label: + label_to_use = 1 + break + if ind in additional_logs: + for idx_ in additional_logs[ind]: + x_train_add.append(data[idx_][0]) + y_train_add.append(label_to_use) + x_train.extend(x_train_add) + y_train.extend(y_train_add) + return x_train, y_train + + +def split_train_test( + logs_to_train_idx: list[int], data: list[tuple[str, str, str]], labels_filtered: list[int], + additional_logs: dict[int, list[int]], label: str, random_state: int = 1257) -> tuple[list, list, list, list]: + x_train_ind, x_test_ind, y_train, y_test = train_test_split( + logs_to_train_idx, labels_filtered, test_size=TEST_DATA_PROPORTION, random_state=random_state, + stratify=labels_filtered) + x_train, y_train = return_similar_objects_into_sample(x_train_ind, y_train, data, additional_logs, label) + x_test = [data[ind][0] for ind in x_test_ind] + return x_train, x_test, y_train, y_test + + +def perform_light_deduplication(data: list[tuple[str, str, str]]) -> tuple[dict[int, list[int]], list[int]]: + text_messages_set = {} + logs_to_train_idx = [] + additional_logs = {} + for idx, text_message_data in enumerate(data): + text_message = text_message_data[0] + text_message_normalized = " ".join(sorted( + text_processing.split_words(text_message, to_lower=True))) + if text_message_normalized not in text_messages_set: + logs_to_train_idx.append(idx) + text_messages_set[text_message_normalized] = idx + additional_logs[idx] = [] + else: + additional_logs[text_messages_set[text_message_normalized]].append(idx) + return additional_logs, logs_to_train_idx + + +def create_binary_target_data( + label: str, data: list[tuple[str, str, str]]) -> tuple[list[int], list[int], dict[int, list[int]], float]: + additional_logs, logs_to_train_idx = perform_light_deduplication(data) + labels_filtered = [] + for ind in logs_to_train_idx: + if data[ind][1] == label or data[ind][2] == label: + labels_filtered.append(1) + else: + labels_filtered.append(0) + proportion_binary_labels = utils.calculate_proportions_for_labels(labels_filtered) + if proportion_binary_labels < MINIMAL_LABEL_PROPORTION: + logs_to_train_idx, labels_filtered, proportion_binary_labels = utils.balance_data( + logs_to_train_idx, labels_filtered, MINIMAL_LABEL_PROPORTION) + return logs_to_train_idx, labels_filtered, additional_logs, proportion_binary_labels + + +def train_several_times( + new_model: DefectTypeModel, label: str, data: list[tuple[str, str, str]], + random_states: Optional[list[int]] = None, + baseline_model: Optional[DefectTypeModel] = None) -> tuple[list[float], list[float], bool, float]: + my_random_states = random_states if random_states else TRAIN_DATA_RANDOM_STATES + new_model_results = [] + baseline_model_results = [] + bad_data_proportion = False + + logs_to_train_idx, labels_filtered, additional_logs, proportion_binary_labels = create_binary_target_data( + label, data) + + if proportion_binary_labels < MINIMAL_LABEL_PROPORTION: + LOGGER.debug('Train data has a bad proportion: %.3f', proportion_binary_labels) + bad_data_proportion = True + data_length = len(data) + if data_length < MINIMAL_DATA_LENGTH_FOR_TRAIN: + LOGGER.debug(f'Train data has a too few entities:{data_length}') + bad_data_proportion = True + + if not bad_data_proportion: + for random_state in my_random_states: + x_train, x_test, y_train, y_test = split_train_test( + logs_to_train_idx, data, labels_filtered, additional_logs, label, random_state=random_state) + new_model.train_model(label, x_train, y_train) + LOGGER.debug('New model results') + new_model_results.append(new_model.validate_model(label, x_test, y_test)) + LOGGER.debug('Baseline model results') + if baseline_model: + baseline_model_results.append(baseline_model.validate_model(label, x_test, y_test)) + else: + baseline_model_results.append(0.0) + return baseline_model_results, new_model_results, bad_data_proportion, proportion_binary_labels + + +def copy_model_part_from_baseline(label: str, new_model: DefectTypeModel, baseline_model: DefectTypeModel) -> None: + if label not in baseline_model.models: + if label in new_model.models: + del new_model.models[label] + if label in new_model.count_vectorizer_models: + del new_model.count_vectorizer_models[label] + else: + new_model.models[label] = baseline_model.models[label] + _count_vectorizer = baseline_model.count_vectorizer_models[label] + new_model.count_vectorizer_models[label] = _count_vectorizer + + +class QueryResult(BaseModel): + result: list[tuple[str, str, str]] + error_count: int + errors: list[str] + + +class DefectTypeModelTraining: + app_config: ApplicationConfig + search_cfg: SearchConfig + es_client: EsClient + baseline_model: Optional[DefectTypeModel] = None + model_chooser: Optional[ModelChooser] + model_class: Type[DefectTypeModel] + + def __init__(self, app_config: ApplicationConfig, search_cfg: SearchConfig, + model_chooser: Optional[ModelChooser] = None, + model_class: Optional[Type[DefectTypeModel]] = None) -> None: + self.app_config = app_config + self.search_cfg = search_cfg + self.es_client = EsClient(app_config=app_config) + if search_cfg.GlobalDefectTypeModelFolder: + self.baseline_model = DefectTypeModel(object_saving.create_filesystem( + search_cfg.GlobalDefectTypeModelFolder)) + self.baseline_model.load_model() + self.model_chooser = model_chooser + self.model_class = model_class if model_class else CustomDefectTypeModel + + @staticmethod + def get_messages_by_issue_type(issue_type_pattern: str) -> dict[str, Any]: + return { + "_source": [DATA_FIELD, "issue_type", "launch_id", '_id'], + "sort": {"start_time": "desc"}, + "query": { + "bool": { + "filter": [ + {"range": {"log_level": {"gte": utils.ERROR_LOGGING_LEVEL}}}, + {"exists": {"field": "issue_type"}}, + {"term": {"is_merged": False}} + ], + "must": [ + { + "bool": { + "should": [ + { + "wildcard": { + "issue_type": {"value": issue_type_pattern, "case_insensitive": True} + } + } + ] + } + } + ], + "should": [ + {"term": {"is_auto_analyzed": {"value": "false", "boost": 1.0}}}, + ] + } + } + } + + def execute_data_query(self, project_index_name: str, query: str) -> QueryResult: + errors = [] + error_count = 0 + query_result = [] + while error_count <= RETRY_COUNT: + try: + query_result = elasticsearch.helpers.scan( + self.es_client.es_client, query=self.get_messages_by_issue_type(query), index=project_index_name, + size=self.app_config.esChunkNumber) + break + except Exception as exc: + # Throttling, out of memory, connection error + LOGGER.exception(exc) + errors.append(utils.extract_exception(exc)) + sleep(RETRY_PAUSES[error_count] if len(RETRY_PAUSES) < error_count else RETRY_PAUSES[-1]) + error_count += 1 + if error_count >= RETRY_COUNT: + return QueryResult(result=[], error_count=error_count, errors=errors) + data = [] + message_launch_dict = set() + for r in query_result: + detected_message = r['_source'][DATA_FIELD] + if not detected_message.strip(): + continue + text_message_normalized = text_processing.normalize_message(detected_message) + issue_type = r["_source"]["issue_type"] + message_info = (text_message_normalized, r["_source"]["launch_id"], issue_type) + if message_info not in message_launch_dict: + data.append((detected_message, issue_type[:2], issue_type)) + message_launch_dict.add(message_info) + if len(data) >= self.search_cfg.MaxLogsForDefectTypeModel: + break + return QueryResult(result=data, error_count=error_count, errors=errors) + + def query_label(self, query: str, index: str, stat: Optional[dict[str, Any]]) -> QueryResult: + LOGGER.debug(f'Query to gather data {query}.') + time_querying = time() + found_data = self.execute_data_query(index, query) + time_spent = time() - time_querying + LOGGER.debug(f'Finished querying "{query}" for {time_spent:.2f} s') + if stat: + stat['time_spent'] = time_spent + stat['data_size'] = len(found_data.result) + return found_data + + def query_data(self, projects: list[int], + stat_data_storage: Optional[DefaultDict[str, dict[str, Any]]]) -> list[tuple[str, str, str]]: + data = [] + errors = [] + error_count = 0 + start_time = time() + for project in projects: + project_index_name = text_processing.unite_project_name(project, self.app_config.esProjectIndexPrefix) + for label in BASE_ISSUE_CLASS_INDEXES: + query = f'{label}???' + found_data = self.query_label( + query, project_index_name, stat_data_storage[label] if stat_data_storage else None) + errors.append(found_data.errors) + error_count += found_data.error_count + data.extend(found_data.result) + query = f'{label}_*' + found_data = self.execute_data_query(project_index_name, query) + errors.append(found_data.errors) + error_count += found_data.error_count + sub_labels = {l[2] for l in found_data.result} + for sub_label in sub_labels: + found_data = self.query_label( + sub_label, project_index_name, stat_data_storage[sub_label] if stat_data_storage else None) + errors.append(found_data.errors) + error_count += found_data.error_count + data.extend(found_data.result) + + LOGGER.debug(f'Data gathered: {len(data)}') + if stat_data_storage: + stat_data_storage['all']['data_size'] = len(data) + stat_data_storage['all']['errors'] = errors + stat_data_storage['all']['errors_count'] = error_count + stat_data_storage['all']['time_spent'] = start_time - time() + return data + + @staticmethod + def get_info_template(project_info: TrainInfo, baseline_model: str, model_name: str, label: str) -> dict[str, Any]: + return {'method': 'training', 'sub_model_type': label, 'model_type': project_info.model_type.name, + 'baseline_model': [baseline_model], 'new_model': [model_name], + 'project_id': project_info.project, 'model_saved': 0, 'p_value': 1.0, 'data_size': 0, + 'data_proportion': 0.0, 'baseline_mean_metric': 0.0, 'new_model_mean_metric': 0.0, + 'bad_data_proportion': 0, 'metric_name': 'F1', 'errors': [], 'errors_count': 0, + 'time_spent': 0.0} + + def train_several_times(self, new_model: DefectTypeModel, label: str, data: list[tuple[str, str, str]], + random_states: Optional[list[int]] = None) -> tuple[list[float], list[float], bool, float]: + return train_several_times(new_model, label, data, random_states, self.baseline_model) + + def train(self, project_info: TrainInfo) -> tuple[int, dict[str, dict[str, Any]]]: + start_time = time() + model_name = f'{project_info.model_type.name}_model_{datetime.now().strftime("%Y-%m-%d")}' + baseline_model = os.path.basename(self.search_cfg.GlobalDefectTypeModelFolder) + new_model_folder = f'{project_info.model_type.name}_model/{model_name}/' + + LOGGER.info(f'Train "{ModelType.defect_type.name}" model using class: {self.model_class}') + new_model = self.model_class( + object_saving.create(self.app_config, project_info.project, new_model_folder), + n_estimators=self.search_cfg.DefectTypeModelNumEstimators) + + train_log_info = DefaultDict(lambda _, k: self.get_info_template(project_info, baseline_model, model_name, k)) + projects = [project_info.project] + if project_info.additional_projects: + projects.extend(project_info.additional_projects) + data = self.query_data(projects, train_log_info) + LOGGER.debug(f'Loaded data for model training {project_info.model_type.name}') + + unique_labels = {l[2] for l in data} + + data_proportion_min = 1.0 + p_value_max = 0.0 + all_bad_data = 1 + custom_models = [] + f1_chosen_models = [] + f1_baseline_models = [] + for label in unique_labels: + time_training = time() + LOGGER.info(f'Label to train the model {label}') + + (baseline_model_results, new_model_results, bad_data_proportion, + proportion_binary_labels) = self.train_several_times(new_model, label, data, TRAIN_DATA_RANDOM_STATES) + data_proportion_min = min(proportion_binary_labels, data_proportion_min) + + use_custom_model = False + if not bad_data_proportion: + LOGGER.debug(f'Baseline test results {baseline_model_results}') + LOGGER.debug(f'New model test results {new_model_results}') + f_value, p_value = stats.f_oneway(baseline_model_results, new_model_results) + if p_value is None: + p_value = 1.0 + train_log_info[label]['p_value'] = p_value + baseline_mean_f1 = np.mean(baseline_model_results) + mean_f1 = np.mean(new_model_results) + train_log_info[label]['baseline_mean_metric'] = baseline_mean_f1 + train_log_info[label]['new_model_mean_metric'] = mean_f1 + + if p_value < MIN_P_VALUE and mean_f1 > baseline_mean_f1 and mean_f1 >= 0.4: + p_value_max = max(p_value_max, p_value) + use_custom_model = True + all_bad_data = 0 + LOGGER.info( + f'Model training validation results: p-value={p_value:.3f}; mean F1 metric ' + f'baseline={baseline_mean_f1:.3f}; mean new model={mean_f1:.3f}.') + train_log_info[label]['bad_data_proportion'] = int(bad_data_proportion) + + if use_custom_model: + LOGGER.debug(f'Custom model {label} should be saved') + max_train_result_idx = int(np.argmax(new_model_results)) + best_random_state = TRAIN_DATA_RANDOM_STATES[max_train_result_idx] + + LOGGER.info(f'Perform final training with random state: {best_random_state}') + self.train_several_times(new_model, label, data, [best_random_state]) + + train_log_info[label]["model_saved"] = 1 + custom_models.append(label) + else: + train_log_info[label]["model_saved"] = 0 + copy_model_part_from_baseline(label, new_model, self.baseline_model) + if train_log_info[label]["baseline_mean_metric"] > 0.001: + f1_baseline_models.append(train_log_info[label]["baseline_mean_metric"]) + f1_chosen_models.append(train_log_info[label]["baseline_mean_metric"]) + train_log_info[label]["time_spent"] += (time() - time_training) + + LOGGER.debug(f'Custom models were for labels: {custom_models}') + if len(custom_models): + LOGGER.debug("The custom model should be saved") + train_log_info["all"]["model_saved"] = 1 + train_log_info["all"]["p_value"] = p_value_max + if self.model_chooser: + self.model_chooser.delete_old_model(project_info.model_type, project_info.project) + new_model.save_model() + + time_spent = time() - start_time + LOGGER.info("Finished for %d s", time_spent) + train_log_info["all"]["time_spent"] = time_spent + train_log_info["all"]["data_proportion"] = data_proportion_min + train_log_info["all"]["baseline_mean_metric"] = np.mean( + f1_baseline_models) if f1_baseline_models else 0.0 + train_log_info["all"]["new_model_mean_metric"] = np.mean( + f1_chosen_models) if f1_chosen_models else 0.0 + train_log_info["all"]["bad_data_proportion"] = all_bad_data + for label in train_log_info: + train_log_info[label]["gather_date"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + train_log_info[label]["module_version"] = [self.app_config.appVersion] + return len(data), train_log_info diff --git a/app/main.py b/app/main.py index a83c9c25..5f7b1fb9 100644 --- a/app/main.py +++ b/app/main.py @@ -13,7 +13,6 @@ # limitations under the License. import json -import logging import logging.config import os import threading @@ -23,11 +22,14 @@ from flask import Flask, Response, jsonify from flask_cors import CORS +from flask_wtf.csrf import CSRFProtect +from pika.adapters.blocking_connection import BlockingChannel from app.amqp import amqp_handler from app.amqp.amqp import AmqpClient -from app.commons import model_chooser +from app.commons import model_chooser, logging as my_logging from app.commons.esclient import EsClient +from app.commons.model.launch_objects import ApplicationConfig, SearchConfig from app.service import AnalyzerService from app.service import AutoAnalyzerService from app.service import CleanIndexService @@ -41,80 +43,81 @@ from app.service import SuggestService from app.utils import utils -APP_CONFIG = { - "esHost": os.getenv("ES_HOSTS", "http://elasticsearch:9200").strip("/").strip("\\"), - "esUser": os.getenv("ES_USER", "").strip(), - "esPassword": os.getenv("ES_PASSWORD", "").strip(), - "logLevel": os.getenv("LOGGING_LEVEL", "DEBUG").strip(), - "amqpUrl": os.getenv("AMQP_URL", "").strip("/").strip("\\") + "/" + os.getenv( +APP_CONFIG = ApplicationConfig( + esHost=os.getenv("ES_HOSTS", "http://elasticsearch:9200").strip("/").strip("\\"), + esUser=os.getenv("ES_USER", "").strip(), + esPassword=os.getenv("ES_PASSWORD", "").strip(), + logLevel=os.getenv("LOGGING_LEVEL", "DEBUG").strip(), + amqpUrl=os.getenv("AMQP_URL", "").strip("/").strip("\\") + "/" + os.getenv( "AMQP_VIRTUAL_HOST", "analyzer"), - "exchangeName": os.getenv("AMQP_EXCHANGE_NAME", "analyzer"), - "analyzerPriority": int(os.getenv("ANALYZER_PRIORITY", "1")), - "analyzerIndex": json.loads(os.getenv("ANALYZER_INDEX", "true").lower()), - "analyzerLogSearch": json.loads(os.getenv("ANALYZER_LOG_SEARCH", "true").lower()), - "analyzerSuggest": json.loads(os.getenv("ANALYZER_SUGGEST", "true").lower()), - "analyzerCluster": json.loads(os.getenv("ANALYZER_CLUSTER", "true").lower()), - "turnOffSslVerification": json.loads(os.getenv("ES_TURN_OFF_SSL_VERIFICATION", "false").lower()), - "esVerifyCerts": json.loads(os.getenv("ES_VERIFY_CERTS", "false").lower()), - "esUseSsl": json.loads(os.getenv("ES_USE_SSL", "false").lower()), - "esSslShowWarn": json.loads(os.getenv("ES_SSL_SHOW_WARN", "false").lower()), - "esCAcert": os.getenv("ES_CA_CERT", ""), - "esClientCert": os.getenv("ES_CLIENT_CERT", ""), - "esClientKey": os.getenv("ES_CLIENT_KEY", ""), - "minioHost": os.getenv("MINIO_SHORT_HOST", "minio:9000"), - "minioAccessKey": os.getenv("MINIO_ACCESS_KEY", "minio"), - "minioSecretKey": os.getenv("MINIO_SECRET_KEY", "minio123"), - "minioUseTls": json.loads(os.getenv("MINIO_USE_TLS", "false").lower()), - "appVersion": "", - "binaryStoreType": os.getenv("ANALYZER_BINSTORE_TYPE", - os.getenv("ANALYZER_BINARYSTORE_TYPE", "filesystem")), - "minioBucketPrefix": os.getenv("ANALYZER_BINSTORE_BUCKETPREFIX", - os.getenv("ANALYZER_BINARYSTORE_BUCKETPREFIX", "prj-")), - "minioRegion": os.getenv("ANALYZER_BINSTORE_MINIO_REGION", - os.getenv("ANALYZER_BINARYSTORE_MINIO_REGION", None)), - "instanceTaskType": os.getenv("INSTANCE_TASK_TYPE", "").strip(), - "filesystemDefaultPath": os.getenv("FILESYSTEM_DEFAULT_PATH", "storage").strip(), - "esChunkNumber": int(os.getenv("ES_CHUNK_NUMBER", "1000")), - "esChunkNumberUpdateClusters": int(os.getenv("ES_CHUNK_NUMBER_UPDATE_CLUSTERS", "500")), - "esProjectIndexPrefix": os.getenv("ES_PROJECT_INDEX_PREFIX", "").strip(), - "analyzerHttpPort": int(os.getenv("ANALYZER_HTTP_PORT", "5001")), - "analyzerPathToLog": os.getenv("ANALYZER_FILE_LOGGING_PATH", "/tmp/config.log") -} - -SEARCH_CONFIG = { - "MinShouldMatch": os.getenv("ES_MIN_SHOULD_MATCH", "80%"), - "BoostAA": float(os.getenv("ES_BOOST_AA", "-8.0")), - "BoostLaunch": float(os.getenv("ES_BOOST_LAUNCH", "4.0")), - "BoostTestCaseHash": float(os.getenv("ES_BOOST_TEST_CASE_HASH", "8.0")), - "MaxQueryTerms": int(os.getenv("ES_MAX_QUERY_TERMS", "50")), - "SearchLogsMinSimilarity": float(os.getenv("ES_LOGS_MIN_SHOULD_MATCH", "0.95")), - "MinWordLength": int(os.getenv("ES_MIN_WORD_LENGTH", "2")), - "TimeWeightDecay": float(os.getenv("ES_TIME_WEIGHT_DECAY", "0.95")), - "PatternLabelMinPercentToSuggest": float(os.getenv("PATTERN_LABEL_MIN_PERCENT", "0.9")), - "PatternLabelMinCountToSuggest": int(os.getenv("PATTERN_LABEL_MIN_COUNT", "5")), - "PatternMinCountToSuggest": int(os.getenv("PATTERN_MIN_COUNT", "10")), - "MaxLogsForDefectTypeModel": int(os.getenv("MAX_LOGS_FOR_DEFECT_TYPE_MODEL", "10000")), - "ProbabilityForCustomModelSuggestions": min( + exchangeName=os.getenv("AMQP_EXCHANGE_NAME", "analyzer"), + analyzerPriority=int(os.getenv("ANALYZER_PRIORITY", "1")), + analyzerIndex=json.loads(os.getenv("ANALYZER_INDEX", "true").lower()), + analyzerLogSearch=json.loads(os.getenv("ANALYZER_LOG_SEARCH", "true").lower()), + analyzerSuggest=json.loads(os.getenv("ANALYZER_SUGGEST", "true").lower()), + analyzerCluster=json.loads(os.getenv("ANALYZER_CLUSTER", "true").lower()), + turnOffSslVerification=json.loads(os.getenv("ES_TURN_OFF_SSL_VERIFICATION", "false").lower()), + esVerifyCerts=json.loads(os.getenv("ES_VERIFY_CERTS", "false").lower()), + esUseSsl=json.loads(os.getenv("ES_USE_SSL", "false").lower()), + esSslShowWarn=json.loads(os.getenv("ES_SSL_SHOW_WARN", "false").lower()), + esCAcert=os.getenv("ES_CA_CERT", ""), + esClientCert=os.getenv("ES_CLIENT_CERT", ""), + esClientKey=os.getenv("ES_CLIENT_KEY", ""), + minioHost=os.getenv("MINIO_SHORT_HOST", "minio:9000"), + minioAccessKey=os.getenv("MINIO_ACCESS_KEY", "minio"), + minioSecretKey=os.getenv("MINIO_SECRET_KEY", "minio123"), + minioUseTls=json.loads(os.getenv("MINIO_USE_TLS", "false").lower()), + appVersion="", + binaryStoreType=os.getenv("ANALYZER_BINSTORE_TYPE", + os.getenv("ANALYZER_BINARYSTORE_TYPE", "filesystem")), + minioBucketPrefix=os.getenv("ANALYZER_BINSTORE_BUCKETPREFIX", + os.getenv("ANALYZER_BINARYSTORE_BUCKETPREFIX", "prj-")), + minioRegion=os.getenv("ANALYZER_BINSTORE_MINIO_REGION", + os.getenv("ANALYZER_BINARYSTORE_MINIO_REGION", None)), + instanceTaskType=os.getenv("INSTANCE_TASK_TYPE", "").strip(), + filesystemDefaultPath=os.getenv("FILESYSTEM_DEFAULT_PATH", "storage").strip(), + esChunkNumber=int(os.getenv("ES_CHUNK_NUMBER", "1000")), + esChunkNumberUpdateClusters=int(os.getenv("ES_CHUNK_NUMBER_UPDATE_CLUSTERS", "500")), + esProjectIndexPrefix=os.getenv("ES_PROJECT_INDEX_PREFIX", "").strip(), + analyzerHttpPort=int(os.getenv("ANALYZER_HTTP_PORT", "5001")), + analyzerPathToLog=os.getenv("ANALYZER_FILE_LOGGING_PATH", "/tmp/config.log") +) + +SEARCH_CONFIG = SearchConfig( + SearchLogsMinSimilarity=float(os.getenv("ES_LOGS_MIN_SHOULD_MATCH", "0.95")), + MinShouldMatch=os.getenv("ES_MIN_SHOULD_MATCH", "80%"), + BoostAA=float(os.getenv("ES_BOOST_AA", "-8.0")), + BoostLaunch=float(os.getenv("ES_BOOST_LAUNCH", "4.0")), + BoostTestCaseHash=float(os.getenv("ES_BOOST_TEST_CASE_HASH", "8.0")), + MaxQueryTerms=int(os.getenv("ES_MAX_QUERY_TERMS", "50")), + MinWordLength=int(os.getenv("ES_MIN_WORD_LENGTH", "2")), + TimeWeightDecay=float(os.getenv("ES_TIME_WEIGHT_DECAY", "0.95")), + PatternLabelMinPercentToSuggest=float(os.getenv("PATTERN_LABEL_MIN_PERCENT", "0.9")), + PatternLabelMinCountToSuggest=int(os.getenv("PATTERN_LABEL_MIN_COUNT", "5")), + PatternMinCountToSuggest=int(os.getenv("PATTERN_MIN_COUNT", "10")), + MaxLogsForDefectTypeModel=int(os.getenv("MAX_LOGS_FOR_DEFECT_TYPE_MODEL", "10000")), + ProbabilityForCustomModelSuggestions=min( 0.8, float(os.getenv("PROB_CUSTOM_MODEL_SUGGESTIONS", "0.7"))), - "ProbabilityForCustomModelAutoAnalysis": min( + ProbabilityForCustomModelAutoAnalysis=min( 1.0, float(os.getenv("PROB_CUSTOM_MODEL_AUTO_ANALYSIS", "0.5"))), - "BoostModelFolder": "", - "SuggestBoostModelFolder": "", - "SimilarityWeightsFolder": "", - "GlobalDefectTypeModelFolder": "", - "RetrainSuggestBoostModelConfig": "", - "RetrainAutoBoostModelConfig": "", - "MaxSuggestionsNumber": int(os.getenv("MAX_SUGGESTIONS_NUMBER", "3")), - "AutoAnalysisTimeout": int(os.getenv("AUTO_ANALYSIS_TIMEOUT", "300")), - "MaxAutoAnalysisItemsToProcess": int( + BoostModelFolder="", + SuggestBoostModelFolder="", + SimilarityWeightsFolder="", + GlobalDefectTypeModelFolder="", + RetrainSuggestBoostModelConfig="", + RetrainAutoBoostModelConfig="", + MaxSuggestionsNumber=int(os.getenv("MAX_SUGGESTIONS_NUMBER", "3")), + AutoAnalysisTimeout=int(os.getenv("AUTO_ANALYSIS_TIMEOUT", "300")), + MaxAutoAnalysisItemsToProcess=int( os.getenv("ANALYZER_MAX_ITEMS_TO_PROCESS", os.getenv("MAX_AUTO_ANALYSIS_ITEMS_TO_PROCESS", "4000"))) -} +) def create_application(): """Creates a Flask application""" _application = Flask(__name__) CORS(_application) + CSRFProtect(_application) return _application @@ -125,75 +128,76 @@ def create_thread(func, args): return thread -def declare_exchange(channel, config): +def declare_exchange(channel: BlockingChannel, config: ApplicationConfig): """Declares exchange for rabbitmq""" - logger.info("ExchangeName: %s", config["exchangeName"]) + logger.info("ExchangeName: %s", config.exchangeName) try: - channel.exchange_declare(exchange=config["exchangeName"], exchange_type='direct', + channel.exchange_declare(exchange=config.exchangeName, exchange_type='direct', durable=False, auto_delete=True, internal=False, arguments={ - "analyzer": config["exchangeName"], - "analyzer_index": config["analyzerIndex"], - "analyzer_priority": config["analyzerPriority"], - "analyzer_log_search": config["analyzerLogSearch"], - "analyzer_suggest": config["analyzerSuggest"], - "analyzer_cluster": config["analyzerCluster"], - "version": config["appVersion"], }) + "analyzer": config.exchangeName, + "analyzer_index": config.analyzerIndex, + "analyzer_priority": config.analyzerPriority, + "analyzer_log_search": config.analyzerLogSearch, + "analyzer_suggest": config.analyzerSuggest, + "analyzer_cluster": config.analyzerCluster, + "version": config.appVersion + }) except Exception as err: logger.error("Failed to declare exchange") logger.error(err) return False - logger.info("Exchange '%s' has been declared", config["exchangeName"]) + logger.info("Exchange '%s' has been declared", config.exchangeName) return True -def init_amqp(_amqp_client): +def init_amqp(_amqp_client: AmqpClient): """Initialize rabbitmq queues, exchange and stars threads for queue messages processing""" with _amqp_client.connection.channel() as channel: try: declare_exchange(channel, APP_CONFIG) except Exception as err: - logger.error("Failed to declare amqp objects") + logger.error('Failed to declare amqp objects') logger.error(err) return _threads = [] _model_chooser = model_chooser.ModelChooser(APP_CONFIG, SEARCH_CONFIG) - if APP_CONFIG["instanceTaskType"] == "train": + if APP_CONFIG.instanceTaskType == 'train': _retraining_service = RetrainingService(_model_chooser, APP_CONFIG, SEARCH_CONFIG) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "train_models", True, False, - lambda current_channel, method, props, body: - amqp_handler.handle_inner_amqp_request( - current_channel, method, props, body, - _retraining_service.train_models)))) + _threads.append( + create_thread( + AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'train_models', True, False, + lambda current_channel, method, props, body: amqp_handler.handle_inner_amqp_request( + current_channel, method, props, body, _retraining_service.train_models, + prepare_data_func=amqp_handler.prepare_train_info)))) else: - _es_client = EsClient(APP_CONFIG, SEARCH_CONFIG) + _es_client = EsClient(APP_CONFIG) _auto_analyzer_service = AutoAnalyzerService(_model_chooser, APP_CONFIG, SEARCH_CONFIG) _delete_index_service = DeleteIndexService(_model_chooser, APP_CONFIG, SEARCH_CONFIG) - _clean_index_service = CleanIndexService(APP_CONFIG, SEARCH_CONFIG) + _clean_index_service = CleanIndexService(APP_CONFIG) _analyzer_service = AnalyzerService(_model_chooser, SEARCH_CONFIG) _suggest_service = SuggestService(_model_chooser, APP_CONFIG, SEARCH_CONFIG) - _suggest_info_service = SuggestInfoService(APP_CONFIG, SEARCH_CONFIG) + _suggest_info_service = SuggestInfoService(APP_CONFIG) _search_service = SearchService(APP_CONFIG, SEARCH_CONFIG) _cluster_service = ClusterService(APP_CONFIG, SEARCH_CONFIG) - _namespace_finder_service = NamespaceFinderService(APP_CONFIG, SEARCH_CONFIG) + _namespace_finder_service = NamespaceFinderService(APP_CONFIG) _suggest_patterns_service = SuggestPatternsService(APP_CONFIG, SEARCH_CONFIG) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "index", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'index', True, False, lambda current_channel, method, props, body: - amqp_handler.handle_amqp_request(current_channel, method, props, body, - _es_client.index_logs, - prepare_response_data=amqp_handler. - prepare_index_response_data)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "analyze", True, False, + amqp_handler.handle_amqp_request( + current_channel, method, props, body, _es_client.index_logs, + prepare_response_data=amqp_handler.prepare_index_response_data)))) + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'analyze', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request(current_channel, method, props, body, _auto_analyzer_service.analyze_logs, prepare_response_data=amqp_handler. prepare_analyze_response_data)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "delete", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'delete', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request(current_channel, method, props, body, _delete_index_service.delete_index, @@ -201,8 +205,8 @@ def init_amqp(_amqp_client): prepare_delete_index, prepare_response_data=amqp_handler. output_result)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "clean", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'clean', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request(current_channel, method, props, body, _clean_index_service.delete_logs, @@ -210,8 +214,8 @@ def init_amqp(_amqp_client): prepare_clean_index, prepare_response_data=amqp_handler. output_result)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "search", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'search', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request(current_channel, method, props, body, _search_service.search_logs, @@ -219,8 +223,8 @@ def init_amqp(_amqp_client): prepare_search_logs, prepare_response_data=amqp_handler. prepare_analyze_response_data)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "suggest", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'suggest', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request(current_channel, method, props, body, _suggest_service.suggest_items, @@ -228,8 +232,8 @@ def init_amqp(_amqp_client): prepare_test_item_info, prepare_response_data=amqp_handler. prepare_analyze_response_data)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "cluster", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'cluster', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request(current_channel, method, props, body, _cluster_service.find_clusters, @@ -237,82 +241,82 @@ def init_amqp(_amqp_client): prepare_launch_info, prepare_response_data=amqp_handler. prepare_index_response_data)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "stats_info", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'stats_info', True, False, lambda current_channel, method, props, body: amqp_handler.handle_inner_amqp_request(current_channel, method, props, body, _es_client.send_stats_info)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "namespace_finder", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'namespace_finder', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request( current_channel, method, props, body, _namespace_finder_service.update_chosen_namespaces, publish_result=False)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "suggest_patterns", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'suggest_patterns', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request( current_channel, method, props, body, _suggest_patterns_service.suggest_patterns, prepare_data_func=amqp_handler.prepare_delete_index, prepare_response_data=amqp_handler.prepare_index_response_data)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "index_suggest_info", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'index_suggest_info', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request( current_channel, method, props, body, _suggest_info_service.index_suggest_info, prepare_data_func=amqp_handler.prepare_suggest_info_list, prepare_response_data=amqp_handler.prepare_index_response_data)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "remove_suggest_info", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'remove_suggest_info', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request( current_channel, method, props, body, _suggest_info_service.remove_suggest_info, prepare_data_func=amqp_handler.prepare_delete_index, prepare_response_data=amqp_handler.output_result)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "update_suggest_info", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'update_suggest_info', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request( current_channel, method, props, body, _suggest_info_service.update_suggest_info, prepare_data_func=lambda x: x)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "remove_models", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'remove_models', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request(current_channel, method, props, body, _analyzer_service.remove_models, prepare_data_func=lambda x: x, prepare_response_data=amqp_handler. output_result)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "get_model_info", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'get_model_info', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request(current_channel, method, props, body, _analyzer_service.get_model_info, prepare_data_func=lambda x: x)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "defect_update", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'defect_update', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request(current_channel, method, props, body, _es_client.defect_update, prepare_data_func=lambda x: x, prepare_response_data=amqp_handler. prepare_search_response_data)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "item_remove", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'item_remove', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request( current_channel, method, props, body, _clean_index_service.delete_test_items, prepare_data_func=lambda x: x, prepare_response_data=amqp_handler.output_result)))) - _threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive, - (APP_CONFIG["exchangeName"], "launch_remove", True, False, + _threads.append(create_thread(AmqpClient(APP_CONFIG.amqpUrl).receive, + (APP_CONFIG.exchangeName, 'launch_remove', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request(current_channel, method, props, body, _clean_index_service.delete_launches, @@ -321,10 +325,10 @@ def init_amqp(_amqp_client): output_result)))) _threads.append( create_thread( - AmqpClient(APP_CONFIG["amqpUrl"]).receive, + AmqpClient(APP_CONFIG.amqpUrl).receive, ( - APP_CONFIG["exchangeName"], - "remove_by_launch_start_time", + APP_CONFIG.exchangeName, + 'remove_by_launch_start_time', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request( @@ -341,10 +345,10 @@ def init_amqp(_amqp_client): ) _threads.append( create_thread( - AmqpClient(APP_CONFIG["amqpUrl"]).receive, + AmqpClient(APP_CONFIG.amqpUrl).receive, ( - APP_CONFIG["exchangeName"], - "remove_by_log_time", + APP_CONFIG.exchangeName, + 'remove_by_log_time', True, False, lambda current_channel, method, props, body: amqp_handler.handle_amqp_request( @@ -365,35 +369,39 @@ def init_amqp(_amqp_client): def read_version(): """Reads the application build version""" - version_filename = "VERSION" + version_filename = 'VERSION' if os.path.exists(version_filename): - with open(version_filename, "r") as file: + with open(version_filename, 'r') as file: return file.read().strip() - return "" + return '' def read_model_settings(): """Reads paths to models""" - model_settings = utils.read_json_file("res", "model_settings.json", to_json=True) - SEARCH_CONFIG["BoostModelFolder"] = model_settings["BOOST_MODEL_FOLDER"] - SEARCH_CONFIG["SuggestBoostModelFolder"] = model_settings["SUGGEST_BOOST_MODEL_FOLDER"] - SEARCH_CONFIG["SimilarityWeightsFolder"] = model_settings["SIMILARITY_WEIGHTS_FOLDER"] - SEARCH_CONFIG["GlobalDefectTypeModelFolder"] = model_settings["GLOBAL_DEFECT_TYPE_MODEL_FOLDER"] - SEARCH_CONFIG["RetrainSuggestBoostModelConfig"] = model_settings["RETRAIN_SUGGEST_BOOST_MODEL_CONFIG"] - SEARCH_CONFIG["RetrainAutoBoostModelConfig"] = model_settings["RETRAIN_AUTO_BOOST_MODEL_CONFIG"] + model_settings = utils.read_json_file('res', 'model_settings.json', to_json=True) + if not model_settings or not isinstance(model_settings, dict): + raise RuntimeError('Failed to read model settings') + + SEARCH_CONFIG.BoostModelFolder = model_settings['BOOST_MODEL_FOLDER'].strip().rstrip('/').rstrip('\\') + SEARCH_CONFIG.SuggestBoostModelFolder = model_settings[ + 'SUGGEST_BOOST_MODEL_FOLDER'].strip().rstrip('/').rstrip('\\') + SEARCH_CONFIG.SimilarityWeightsFolder = model_settings[ + 'SIMILARITY_WEIGHTS_FOLDER'].strip().rstrip('/').rstrip('\\') + SEARCH_CONFIG.GlobalDefectTypeModelFolder = model_settings[ + 'GLOBAL_DEFECT_TYPE_MODEL_FOLDER'].strip().rstrip('/').rstrip('\\') log_file_path = 'res/logging.conf' -logging.config.fileConfig(log_file_path, defaults={'logfilename': APP_CONFIG["analyzerPathToLog"]}) -if APP_CONFIG["logLevel"].lower() == "debug": +logging.config.fileConfig(log_file_path, defaults={'logfilename': APP_CONFIG.analyzerPathToLog}) +if APP_CONFIG.logLevel.lower() == 'debug': logging.disable(logging.NOTSET) -elif APP_CONFIG["logLevel"].lower() == "info": +elif APP_CONFIG.logLevel.lower() == 'info': logging.disable(logging.DEBUG) else: logging.disable(logging.INFO) -logger = logging.getLogger("analyzerApp") -APP_CONFIG["appVersion"] = read_version() -es_client = EsClient(APP_CONFIG, SEARCH_CONFIG) +logger = my_logging.getLogger('analyzerApp') +APP_CONFIG.appVersion = read_version() +es_client = EsClient(APP_CONFIG) read_model_settings() application = create_application() @@ -404,11 +412,11 @@ def read_model_settings(): def get_health_status(): status = "" if not es_client.is_healthy(): - status += "Elasticsearch is not healthy;" + status += 'Elasticsearch is not healthy;' if status: - logger.error("Analyzer health check status failed: %s", status) - return Response(json.dumps({"status": status}), status=503, mimetype='application/json') - return jsonify({"status": "healthy"}) + logger.error('Analyzer health check status failed: %s', status) + return Response(json.dumps({'status': status}), status=503, mimetype='application/json') + return jsonify({'status': 'healthy'}) # noinspection PyUnusedLocal @@ -420,7 +428,7 @@ def handler(signal_received, frame): def start_http_server(): application.logger.setLevel(logging.INFO) logger.info("Started http server") - application.run(host='0.0.0.0', port=APP_CONFIG["analyzerHttpPort"], use_reloader=False) + application.run(host='0.0.0.0', port=APP_CONFIG.analyzerHttpPort, use_reloader=False) signal(SIGINT, handler) @@ -429,10 +437,10 @@ def start_http_server(): try: logger.info("Starting waiting for AMQP connection") try: - amqp_client = AmqpClient(APP_CONFIG["amqpUrl"]) + amqp_client = AmqpClient(APP_CONFIG.amqpUrl) except Exception as exc: logger.error("Amqp connection was not established") - logger.error(exc) + logger.exception(exc) time.sleep(10) continue threads = init_amqp(amqp_client) @@ -440,7 +448,7 @@ def start_http_server(): break except Exception as exc: logger.error("The analyzer has failed") - logger.error(exc) + logger.exception(exc) if __name__ == '__main__': logger.info("Program started") diff --git a/app/service/__init__.py b/app/service/__init__.py index 72a32351..317d6032 100644 --- a/app/service/__init__.py +++ b/app/service/__init__.py @@ -24,7 +24,6 @@ from app.service.suggest_patterns_service import SuggestPatternsService from app.service.suggest_service import SuggestService - __all__ = ['AnalyzerService', 'AutoAnalyzerService', 'CleanIndexService', 'ClusterService', 'DeleteIndexService', 'NamespaceFinderService', 'RetrainingService', 'SearchService', 'SuggestInfoService', 'SuggestPatternsService', 'SuggestService'] diff --git a/app/service/analyzer_service.py b/app/service/analyzer_service.py index 9b2aa723..5b20e39f 100644 --- a/app/service/analyzer_service.py +++ b/app/service/analyzer_service.py @@ -12,122 +12,121 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import re -from typing import Any -from app.boosting_decision_making import weighted_similarity_calculator +from app.commons import logging +from app.commons.model.launch_objects import SearchConfig, Launch, TestItemInfo, AnalyzerConf +from app.commons.model.ml import ModelInfo from app.commons.log_merger import LogMerger -from app.commons.log_preparation import LogPreparation +from app.commons.log_requests import LogRequests +from app.commons.model_chooser import ModelChooser from app.utils import utils logger = logging.getLogger("analyzerApp.analyzerService") +def _add_launch_name_boost(query: dict, launch_name: str, launch_boost: float) -> None: + should = utils.create_path(query, ('query', 'bool', 'should'), []) + should.append({'term': {'launch_name': {'value': launch_name, 'boost': launch_boost}}}) + + +def _add_launch_id_boost(query: dict, launch_id: int, launch_boost: float) -> None: + should = utils.create_path(query, ('query', 'bool', 'should'), []) + should.append({'term': {'launch_id': {'value': launch_id, 'boost': launch_boost}}}) + + +def _add_launch_name_and_id_boost(query: dict, launch_name: str, launch_id: int, launch_boost: float) -> None: + _add_launch_id_boost(query, launch_id, launch_boost) + _add_launch_name_boost(query, launch_name, launch_boost) + + +def add_constraints_for_launches_into_query(query: dict, launch: Launch, launch_boost: float) -> dict: + previous_launch_id = getattr(launch, 'previousLaunchId', 0) or 0 + previous_launch_id = int(previous_launch_id) + analyzer_mode = launch.analyzerConfig.analyzerMode + launch_name = launch.launchName + launch_id = launch.launchId + if analyzer_mode == 'LAUNCH_NAME': + # Previous launches with the same name + must = utils.create_path(query, ('query', 'bool', 'must'), []) + must_not = utils.create_path(query, ('query', 'bool', 'must_not'), []) + must.append({'term': {'launch_name': launch_name}}) + must_not.append({'term': {'launch_id': launch_id}}) + elif analyzer_mode == 'CURRENT_AND_THE_SAME_NAME': + # All launches with the same name + must = utils.create_path(query, ('query', 'bool', 'must'), []) + must.append({'term': {'launch_name': launch_name}}) + _add_launch_id_boost(query, launch_id, launch_boost) + elif analyzer_mode == 'CURRENT_LAUNCH': + # Just current launch + must = utils.create_path(query, ('query', 'bool', 'must'), []) + must.append({'term': {'launch_id': launch_id}}) + elif analyzer_mode == 'PREVIOUS_LAUNCH': + # Just previous launch + must = utils.create_path(query, ('query', 'bool', 'must'), []) + must.append({'term': {'launch_id': previous_launch_id}}) + elif analyzer_mode == 'ALL': + # All previous launches + must_not = utils.create_path(query, ('query', 'bool', 'must_not'), []) + must_not.append({'term': {'launch_id': launch_id}}) + else: + # Boost launches with the same name and ID, but do not ignore any + _add_launch_name_and_id_boost(query, launch_name, launch_id, launch_boost) + return query + + +def add_constraints_for_launches_into_query_suggest(query: dict, test_item_info: TestItemInfo, + launch_boost: float) -> dict: + previous_launch_id = getattr(test_item_info, 'previousLaunchId', 0) or 0 + previous_launch_id = int(previous_launch_id) + analyzer_mode = test_item_info.analyzerConfig.analyzerMode + launch_name = test_item_info.launchName + launch_id = test_item_info.launchId + if analyzer_mode in {'LAUNCH_NAME', 'ALL'}: + # Previous launches with the same name + _add_launch_name_boost(query, launch_name, launch_boost) + should = utils.create_path(query, ('query', 'bool', 'should'), []) + should.append({'term': {'launch_id': {'value': launch_id, 'boost': 1 / launch_boost}}}) + elif analyzer_mode == 'PREVIOUS_LAUNCH': + # Just previous launch + if previous_launch_id: + _add_launch_id_boost(query, previous_launch_id, launch_boost) + else: + # For: + # * CURRENT_LAUNCH + # * CURRENT_AND_THE_SAME_NAME + # Boost launches with the same name, but do not ignore any + _add_launch_name_and_id_boost(query, launch_name, launch_id, launch_boost) + return query + + class AnalyzerService: + search_cfg: SearchConfig launch_boost: float - - def __init__(self, model_chooser, search_cfg=None): - self.search_cfg = search_cfg or {} - self.launch_boost = abs(self.search_cfg['BoostLaunch']) - self.log_preparation = LogPreparation() + log_requests: LogRequests + log_merger: LogMerger + model_chooser: ModelChooser + + def __init__(self, model_chooser: ModelChooser, search_cfg: SearchConfig): + self.search_cfg = search_cfg + self.launch_boost = abs(self.search_cfg.BoostLaunch) + self.log_requests = LogRequests() self.log_merger = LogMerger() self.model_chooser = model_chooser - self.weighted_log_similarity_calculator = None - if self.search_cfg["SimilarityWeightsFolder"].strip(): - self.weighted_log_similarity_calculator = weighted_similarity_calculator. \ - WeightedSimilarityCalculator(folder=self.search_cfg["SimilarityWeightsFolder"]) - def find_min_should_match_threshold(self, analyzer_config): + def find_min_should_match_threshold(self, analyzer_config: AnalyzerConf): return analyzer_config.minShouldMatch if analyzer_config.minShouldMatch > 0 else \ - int(re.search(r"\d+", self.search_cfg["MinShouldMatch"]).group(0)) - - def create_path(self, query: dict, path: tuple[str, ...], value: Any) -> Any: - path_length = len(path) - last_element = path[path_length - 1] - current_node = query - for i in range(path_length - 1): - element = path[i] - if element not in current_node: - current_node[element] = {} - current_node = current_node[element] - if last_element not in current_node: - current_node[last_element] = value - return current_node[last_element] - - def _add_launch_name_boost(self, query: dict, launch_name: str) -> None: - should = self.create_path(query, ('query', 'bool', 'should'), []) - should.append({'term': {'launch_name': {'value': launch_name, 'boost': self.launch_boost}}}) - - def _add_launch_id_boost(self, query: dict, launch_id: int) -> None: - should = self.create_path(query, ('query', 'bool', 'should'), []) - should.append({'term': {'launch_id': {'value': launch_id, 'boost': self.launch_boost}}}) - - def _add_launch_name_and_id_boost(self, query: dict, launch_name: str, launch_id: int): - self._add_launch_id_boost(query, launch_id) - self._add_launch_name_boost(query, launch_name) - - def add_constraints_for_launches_into_query(self, query: dict, launch) -> dict: - previous_launch_id = getattr(launch, 'previousLaunchId', 0) or 0 - previous_launch_id = int(previous_launch_id) - analyzer_mode = launch.analyzerConfig.analyzerMode - launch_name = launch.launchName - launch_id = launch.launchId - if analyzer_mode == 'LAUNCH_NAME': - # Previous launches with the same name - must = self.create_path(query, ('query', 'bool', 'must'), []) - must_not = self.create_path(query, ('query', 'bool', 'must_not'), []) - must.append({'term': {'launch_name': launch_name}}) - must_not.append({'term': {'launch_id': launch_id}}) - elif analyzer_mode == 'CURRENT_AND_THE_SAME_NAME': - # All launches with the same name - must = self.create_path(query, ('query', 'bool', 'must'), []) - must.append({'term': {'launch_name': launch_name}}) - self._add_launch_id_boost(query, launch_id) - elif analyzer_mode == 'CURRENT_LAUNCH': - # Just current launch - must = self.create_path(query, ('query', 'bool', 'must'), []) - must.append({'term': {'launch_id': launch_id}}) - elif analyzer_mode == 'PREVIOUS_LAUNCH': - # Just previous launch - must = self.create_path(query, ('query', 'bool', 'must'), []) - must.append({'term': {'launch_id': previous_launch_id}}) - elif analyzer_mode == 'ALL': - # All previous launches - must_not = self.create_path(query, ('query', 'bool', 'must_not'), []) - must_not.append({'term': {'launch_id': launch_id}}) - else: - # Boost launches with the same name and ID, but do not ignore any - self._add_launch_name_and_id_boost(query, launch_name, launch_id) - return query - - def add_constraints_for_launches_into_query_suggest(self, query: dict, test_item_info) -> dict: - previous_launch_id = getattr(test_item_info, 'previousLaunchId', 0) or 0 - previous_launch_id = int(previous_launch_id) - analyzer_mode = test_item_info.analyzerConfig.analyzerMode - launch_name = test_item_info.launchName - launch_id = test_item_info.launchId - launch_boost = abs(self.search_cfg['BoostLaunch']) - if analyzer_mode in {'LAUNCH_NAME', 'ALL'}: - # Previous launches with the same name - self._add_launch_name_boost(query, launch_name) - should = self.create_path(query, ('query', 'bool', 'should'), []) - should.append({'term': {'launch_id': {'value': launch_id, 'boost': 1 / launch_boost}}}) - elif analyzer_mode == 'PREVIOUS_LAUNCH': - # Just previous launch - if previous_launch_id: - self._add_launch_id_boost(query, previous_launch_id) - else: - # For: - # * CURRENT_LAUNCH - # * CURRENT_AND_THE_SAME_NAME - # Boost launches with the same name, but do not ignore any - self._add_launch_name_and_id_boost(query, launch_name, launch_id) - return query + int(re.search(r"\d+", self.search_cfg.MinShouldMatch).group(0)) + + def add_constraints_for_launches_into_query(self, query: dict, launch: Launch) -> dict: + return add_constraints_for_launches_into_query(query, launch, self.launch_boost) + + def add_constraints_for_launches_into_query_suggest(self, query: dict, test_item_info: TestItemInfo) -> dict: + return add_constraints_for_launches_into_query_suggest(query, test_item_info, self.launch_boost) def build_more_like_this_query(self, - min_should_match, log_message, - field_name="message", boost=1.0, + min_should_match: str, log_message, + field_name: str = "message", boost: float = 1.0, override_min_should_match=None): """Build more like this query""" return utils.build_more_like_this_query( @@ -136,17 +135,18 @@ def build_more_like_this_query(self, field_name=field_name, boost=boost, override_min_should_match=override_min_should_match, - max_query_terms=self.search_cfg["MaxQueryTerms"] + max_query_terms=self.search_cfg.MaxQueryTerms ) - def prepare_restrictions_by_issue_type(self, filter_no_defect=True): + @staticmethod + def prepare_restrictions_by_issue_type(filter_no_defect=True): if filter_no_defect: return [ {"wildcard": {"issue_type": "ti*"}}, {"wildcard": {"issue_type": "nd*"}}] return [{"term": {"issue_type": "ti001"}}] - def build_common_query(self, log, size=10, filter_no_defect=True): + def build_common_query(self, log, size=10, filter_no_defect=True) -> dict: issue_type_conditions = self.prepare_restrictions_by_issue_type( filter_no_defect=filter_no_defect) return {"size": size, @@ -161,15 +161,15 @@ def build_common_query(self, log, size=10, filter_no_defect=True): "should": [ {"term": {"test_case_hash": { "value": log["_source"]["test_case_hash"], - "boost": abs(self.search_cfg["BoostTestCaseHash"])}}}, + "boost": abs(self.search_cfg.BoostTestCaseHash)}}}, {"term": {"is_auto_analyzed": { - "value": str(self.search_cfg["BoostAA"] > 0).lower(), - "boost": abs(self.search_cfg["BoostAA"]), }}}, + "value": str(self.search_cfg.BoostAA > 0).lower(), + "boost": abs(self.search_cfg.BoostAA), }}}, ] } }} - def add_query_with_start_time_decay(self, main_query, start_time): + def add_query_with_start_time_decay(self, main_query: dict, start_time: int) -> dict: return { "size": main_query["size"], "sort": main_query["sort"], @@ -183,7 +183,7 @@ def add_query_with_start_time_decay(self, main_query, start_time): "origin": start_time, "scale": "7d", "offset": "1d", - "decay": self.search_cfg["TimeWeightDecay"] + "decay": self.search_cfg.TimeWeightDecay } } }, @@ -196,32 +196,30 @@ def add_query_with_start_time_decay(self, main_query, start_time): } } - def remove_models(self, model_info): + def remove_models(self, model_info: ModelInfo): try: logger.info("Started removing %s models from project %d", - model_info["model_type"], model_info["project"]) + model_info.model_type.name, model_info.project) deleted_models = self.model_chooser.delete_old_model( - model_name=model_info["model_type"] + "_model", - project_id=model_info["project"]) + model_info.model_type, model_info.project) logger.info("Finished removing %s models from project %d", - model_info["model_type"], model_info["project"]) + model_info.model_type.name, model_info.project) return deleted_models except Exception as err: logger.error("Error while removing models.") - logger.error(err) + logger.exception(err) return 0 - def get_model_info(self, model_info): + def get_model_info(self, model_info: ModelInfo): try: logger.info("Started getting info for %s model from project %d", - model_info["model_type"], model_info["project"]) + model_info.model_type.name, model_info.project) model_folder = self.model_chooser.get_model_info( - model_name=model_info["model_type"] + "_model", - project_id=model_info["project"]) + model_info.model_type, model_info.project) logger.info("Finished getting info for %s model from project %d", - model_info["model_type"], model_info["project"]) + model_info.model_type.name, model_info.project) return {"model_folder": model_folder} except Exception as err: logger.error("Error while getting info for models.") - logger.error(err) + logger.exception(err) return "" diff --git a/app/service/auto_analyzer_service.py b/app/service/auto_analyzer_service.py index 573c5029..69a3d7c2 100644 --- a/app/service/auto_analyzer_service.py +++ b/app/service/auto_analyzer_service.py @@ -12,67 +12,87 @@ # See the License for the specific language governing permissions and # limitations under the License. -from app.utils import utils, text_processing -from app.commons.esclient import EsClient -from app.commons.launch_objects import AnalysisResult, BatchLogInfo, AnalysisCandidate, SuggestAnalysisResult -from app.boosting_decision_making import boosting_featurizer -from app.service.analyzer_service import AnalyzerService -from app.amqp.amqp import AmqpClient -from app.commons.similarity_calculator import SimilarityCalculator -from app.commons.namespace_finder import NamespaceFinder import json -import logging -from time import time, sleep from datetime import datetime from queue import Queue from threading import Thread +from time import time, sleep + +from app.amqp.amqp import AmqpClient +from app.commons import logging +from app.commons import object_saving +from app.commons.esclient import EsClient +from app.commons.log_requests import LogRequests +from app.commons.model.launch_objects import AnalysisResult, BatchLogInfo, AnalysisCandidate, SuggestAnalysisResult, \ + SearchConfig, ApplicationConfig, Launch +from app.commons.model.ml import ModelType +from app.commons.model_chooser import ModelChooser +from app.commons.namespace_finder import NamespaceFinder +from app.commons.similarity_calculator import SimilarityCalculator +from app.machine_learning import boosting_featurizer +from app.machine_learning.models import WeightedSimilarityCalculator, BoostingDecisionMaker, DefectTypeModel +from app.service.analyzer_service import AnalyzerService +from app.utils import utils, text_processing logger = logging.getLogger("analyzerApp.autoAnalyzerService") EARLY_FINISH = False +SPECIAL_FIELDS_BOOST_SCORES = [ + ("detected_message_without_params_extended", 2.0), + ("only_numbers", 2.0), ("potential_status_codes", 8.0), + ("found_tests_and_methods", 2), ("test_item_name", 2.0) +] class AutoAnalyzerService(AnalyzerService): - + app_config: ApplicationConfig + search_cfg: SearchConfig es_client: EsClient namespace_finder: NamespaceFinder + similarity_model: WeightedSimilarityCalculator - def __init__(self, model_chooser, app_config=None, search_cfg=None, es_client: EsClient = None): - self.app_config = app_config or {} - self.search_cfg = search_cfg or {} + def __init__(self, model_chooser: ModelChooser, app_config: ApplicationConfig, search_cfg: SearchConfig, + es_client: EsClient = None): + self.app_config = app_config + self.search_cfg = search_cfg super().__init__(model_chooser, search_cfg=self.search_cfg) - self.es_client = es_client or EsClient(app_config=self.app_config, search_cfg=self.search_cfg) + self.es_client = es_client or EsClient(app_config=self.app_config) self.namespace_finder = NamespaceFinder(app_config) + weights_folder = self.search_cfg.SimilarityWeightsFolder + if not weights_folder: + raise ValueError('SimilarityWeightsFolder is not set') + if weights_folder: + self.similarity_model = (WeightedSimilarityCalculator(object_saving.create_filesystem(weights_folder))) + self.similarity_model.load_model() def get_config_for_boosting(self, analyzer_config): min_should_match = self.find_min_should_match_threshold(analyzer_config) / 100 return { - "max_query_terms": self.search_cfg["MaxQueryTerms"], + "max_query_terms": self.search_cfg.MaxQueryTerms, "min_should_match": min_should_match, - "min_word_length": self.search_cfg["MinWordLength"], + "min_word_length": self.search_cfg.MinWordLength, "filter_min_should_match_any": [], "filter_min_should_match": self.choose_fields_to_filter_strict( analyzer_config.numberOfLogLines, min_should_match), "number_of_log_lines": analyzer_config.numberOfLogLines, "filter_by_test_case_hash": True, - "boosting_model": self.search_cfg["BoostModelFolder"], + "boosting_model": self.search_cfg.BoostModelFolder, "filter_by_all_logs_should_be_similar": analyzer_config.allMessagesShouldMatch, - "time_weight_decay": self.search_cfg["TimeWeightDecay"] + "time_weight_decay": self.search_cfg.TimeWeightDecay } def choose_fields_to_filter_strict(self, log_lines, min_should_match): fields = [ - "detected_message", "message", "potential_status_codes"]\ + "detected_message", "message", "potential_status_codes"] \ if log_lines == -1 else ["message", "potential_status_codes"] if min_should_match > 0.99: fields.append("found_tests_and_methods") return fields - def get_min_should_match_setting(self, launch): - return "{}%".format(launch.analyzerConfig.minShouldMatch)\ - if launch.analyzerConfig.minShouldMatch > 0\ - else self.search_cfg["MinShouldMatch"] + def get_min_should_match_setting(self, launch: Launch) -> str: + return "{}%".format(launch.analyzerConfig.minShouldMatch) \ + if launch.analyzerConfig.minShouldMatch > 0 else self.search_cfg.MinShouldMatch - def build_analyze_query(self, launch, log, size=10): + def build_analyze_query(self, launch: Launch, log: dict, size=10): """Build analyze query""" min_should_match = self.get_min_should_match_setting(launch) @@ -83,7 +103,7 @@ def build_analyze_query(self, launch, log, size=10): log_lines = launch.analyzerConfig.numberOfLogLines query["query"]["bool"]["filter"].append({"term": {"is_merged": False}}) if log_lines == -1: - must = self.create_path(query, ('query', 'bool', 'must'), []) + must = utils.create_path(query, ('query', 'bool', 'must'), []) must.append(self.build_more_like_this_query(min_should_match, log["_source"]["detected_message"], field_name="detected_message", @@ -97,7 +117,7 @@ def build_analyze_query(self, launch, log, size=10): else: query["query"]["bool"]["must_not"].append({"wildcard": {"stacktrace": "*"}}) else: - must = self.create_path(query, ('query', 'bool', 'must'), []) + must = utils.create_path(query, ('query', 'bool', 'must'), []) must.append(self.build_more_like_this_query(min_should_match, log["_source"]["message"], field_name="message", @@ -119,7 +139,7 @@ def build_analyze_query(self, launch, log, size=10): else: query["query"]["bool"]["filter"].append({"term": {"is_merged": True}}) query["query"]["bool"]["must_not"].append({"wildcard": {"message": "*"}}) - must = self.create_path(query, ("query", "bool", "must"), []) + must = utils.create_path(query, ("query", "bool", "must"), []) must.append(self.build_more_like_this_query(min_should_match, log["_source"]["merged_small_logs"], field_name="merged_small_logs", @@ -132,12 +152,9 @@ def build_analyze_query(self, launch, log, size=10): field_name="found_exceptions", boost=8.0, override_min_should_match="1")) - for field, boost_score in [ - ("detected_message_without_params_extended", 2.0), - ("only_numbers", 2.0), ("potential_status_codes", 8.0), - ("found_tests_and_methods", 2), ("test_item_name", 2.0)]: + for field, boost_score in SPECIAL_FIELDS_BOOST_SCORES: if log["_source"][field].strip(): - should = self.create_path(query, ('query', 'bool', 'should'), []) + should = utils.create_path(query, ('query', 'bool', 'should'), []) should.append( self.build_more_like_this_query("1", log["_source"][field], @@ -147,7 +164,7 @@ def build_analyze_query(self, launch, log, size=10): return self.add_query_with_start_time_decay(query, log["_source"]["start_time"]) - def build_query_with_no_defect(self, launch, log, size=10): + def build_query_with_no_defect(self, launch: Launch, log: dict, size=10): min_should_match = self.get_min_should_match_setting(launch) query = { "size": size, @@ -189,7 +206,7 @@ def build_query_with_no_defect(self, launch, log, size=10): field_name="found_exceptions", boost=8.0, override_min_should_match="1")) - utils.append_potential_status_codes(query, log, max_query_terms=self.search_cfg["MaxQueryTerms"]) + utils.append_potential_status_codes(query, log, max_query_terms=self.search_cfg.MaxQueryTerms) return self.add_query_with_start_time_decay(query, log["_source"]["start_time"]) def leave_only_similar_logs(self, candidates_with_no_defect, boosting_config): @@ -202,7 +219,7 @@ def leave_only_similar_logs(self, candidates_with_no_defect, boosting_config): new_search_res = [] _similarity_calculator = SimilarityCalculator( boosting_config, - weighted_similarity_calculator=self.weighted_log_similarity_calculator) + similarity_model=self.similarity_model) if no_defect_candidate_exists: _similarity_calculator.find_similarity( [(log_info, search_res)], @@ -290,7 +307,7 @@ def _send_result_to_queue(self, test_item_dict, batches, batch_logs): candidatesWithNoDefect=candidates_with_no_defect )) - def _query_elasticsearch(self, launches, max_batch_size=30): + def _query_elasticsearch(self, launches: list[Launch], max_batch_size=30): t_start = time() batches = [] batch_logs = [] @@ -301,39 +318,39 @@ def _query_elasticsearch(self, launches, max_batch_size=30): test_items_number_to_process = 0 try: for launch in launches: - index_name = text_processing.unite_project_name( - str(launch.project), self.app_config["esProjectIndexPrefix"]) + index_name = text_processing.unite_project_name(launch.project, self.app_config.esProjectIndexPrefix) if not self.es_client.index_exists(index_name): continue - if test_items_number_to_process >= self.search_cfg["MaxAutoAnalysisItemsToProcess"]: + if test_items_number_to_process >= self.search_cfg.MaxAutoAnalysisItemsToProcess: logger.info("Only first %d test items were taken", - self.search_cfg["MaxAutoAnalysisItemsToProcess"]) + self.search_cfg.MaxAutoAnalysisItemsToProcess) break if EARLY_FINISH: logger.info("Early finish from analyzer before timeout") break for test_item in launch.testItems: - if test_items_number_to_process >= self.search_cfg["MaxAutoAnalysisItemsToProcess"]: + if test_items_number_to_process >= self.search_cfg.MaxAutoAnalysisItemsToProcess: logger.info("Only first %d test items were taken", - self.search_cfg["MaxAutoAnalysisItemsToProcess"]) + self.search_cfg.MaxAutoAnalysisItemsToProcess) break if EARLY_FINISH: logger.info("Early finish from analyzer before timeout") break unique_logs = text_processing.leave_only_unique_logs(test_item.logs) - prepared_logs = [self.log_preparation._prepare_log(launch, test_item, log, index_name) + prepared_logs = [LogRequests._prepare_log(launch, test_item, log, index_name) for log in unique_logs if log.logLevel >= utils.ERROR_LOGGING_LEVEL] results, _ = self.log_merger.decompose_logs_merged_and_without_duplicates(prepared_logs) for log in results: message = log["_source"]["message"].strip() merged_logs = log["_source"]["merged_small_logs"].strip() - if log["_source"]["log_level"] < utils.ERROR_LOGGING_LEVEL or\ + if log["_source"]["log_level"] < utils.ERROR_LOGGING_LEVEL or \ (not message and not merged_logs): continue for query_type, query in [ - ("without no defect", self.build_analyze_query(launch, log)), - ("with no defect", self.build_query_with_no_defect(launch, log))]: + ("without no defect", self.build_analyze_query(launch, log)), + ("with no defect", self.build_query_with_no_defect(launch, log)) + ]: full_query = "{}\n{}".format( json.dumps({"index": index_name}), json.dumps(query)) batches.append(full_query) @@ -364,35 +381,37 @@ def _query_elasticsearch(self, launches, max_batch_size=30): if len(batches) > 0: self._send_result_to_queue(test_item_dict, batches, batch_logs) - except Exception as err: + except Exception as exc: logger.error("Error in ES query") - logger.exception(err) + logger.exception(exc) self.finished_queue.put("Finished") logger.info("Es queries finished %.2f s.", time() - t_start) @utils.ignore_warnings - def analyze_logs(self, launches): + def analyze_logs(self, launches: list[Launch]): global EARLY_FINISH cnt_launches = len(launches) logger.info("Started analysis for %d launches", cnt_launches) logger.info("ES Url %s", text_processing.remove_credentials_from_url(self.es_client.host)) self.queue = Queue() self.finished_queue = Queue() - defect_type_model_to_use = {} - es_query_thread = Thread(target=self._query_elasticsearch, args=(launches, )) + # noinspection PyTypeChecker + defect_type_model_to_use: dict[int, DefectTypeModel] = {} + es_query_thread = Thread(target=self._query_elasticsearch, args=(launches,)) es_query_thread.daemon = True es_query_thread.start() analyzed_results_for_index = [] + t_start = time() + results = [] try: - results = [] - t_start = time() del launches cnt_items_to_process = 0 results_to_share = {} chosen_namespaces = {} while self.finished_queue.empty() or not self.queue.empty(): - if (self.search_cfg["AutoAnalysisTimeout"] - (time() - t_start)) <= 5: # check whether we are running out of time # noqa + if (self.search_cfg.AutoAnalysisTimeout - ( + time() - t_start)) <= 5: # check whether we are running out of time # noqa EARLY_FINISH = True break if self.queue.empty(): @@ -416,7 +435,7 @@ def analyze_logs(self, launches): "min_should_match": self.find_min_should_match_threshold( analyzer_candidates.analyzerConfig), "model_info": set(), - "module_version": [self.app_config["appVersion"]], + "module_version": [self.app_config.appVersion], "errors": [], "errors_count": 0} @@ -430,13 +449,14 @@ def analyze_logs(self, launches): chosen_namespaces[project_id] = self.namespace_finder.get_chosen_namespaces( project_id) boosting_config["chosen_namespaces"] = chosen_namespaces[project_id] - _boosting_decision_maker = self.model_chooser.choose_model( - project_id, "auto_analysis_model/", - custom_model_prob=self.search_cfg["ProbabilityForCustomModelAutoAnalysis"]) - features_dict_objects = _boosting_decision_maker.features_dict_with_saved_objects + # noinspection PyTypeChecker + _boosting_decision_maker: BoostingDecisionMaker = self.model_chooser.choose_model( + project_id, ModelType.auto_analysis, + custom_model_prob=self.search_cfg.ProbabilityForCustomModelAutoAnalysis) if project_id not in defect_type_model_to_use: + # noinspection PyTypeChecker defect_type_model_to_use[project_id] = self.model_chooser.choose_model( - project_id, "defect_type_model/") + project_id, ModelType.defect_type) relevant_with_no_defect_candidate = self.find_relevant_with_no_defect( analyzer_candidates.candidatesWithNoDefect, boosting_config) @@ -452,28 +472,28 @@ def analyze_logs(self, launches): boosting_data_gatherer = boosting_featurizer.BoostingFeaturizer( candidates, boosting_config, - feature_ids=_boosting_decision_maker.get_feature_ids(), - weighted_log_similarity_calculator=self.weighted_log_similarity_calculator, - features_dict_with_saved_objects=features_dict_objects) + feature_ids=_boosting_decision_maker.feature_ids, + weighted_log_similarity_calculator=self.similarity_model) boosting_data_gatherer.set_defect_type_model(defect_type_model_to_use[project_id]) feature_data, issue_type_names = boosting_data_gatherer.gather_features_info() - model_info_tags = boosting_data_gatherer.get_used_model_info() +\ - _boosting_decision_maker.get_model_info() + model_info_tags = (boosting_data_gatherer.get_used_model_info() + + _boosting_decision_maker.get_model_info()) results_to_share[launch_id]["model_info"].update(model_info_tags) if len(feature_data) > 0: - predicted_labels, predicted_labels_probability =\ - _boosting_decision_maker.predict(feature_data) + # noinspection PyUnresolvedReferences + predicted_labels, predicted_labels_probability = _boosting_decision_maker.predict( + feature_data) - scores_by_issue_type = boosting_data_gatherer.scores_by_issue_type + scores_by_issue_type = boosting_data_gatherer.find_most_relevant_by_type() for i in range(len(issue_type_names)): logger.debug( "Most relevant item with issue type %s has id %s", issue_type_names[i], - boosting_data_gatherer. - scores_by_issue_type[issue_type_names[i]]["mrHit"]["_id"]) + boosting_data_gatherer + .find_most_relevant_by_type()[issue_type_names[i]]["mrHit"]["_id"]) logger.debug( "Issue type %s has label %d and probability %.3f for features %s", issue_type_names[i], @@ -485,14 +505,14 @@ def analyze_logs(self, launches): predicted_labels, predicted_labels_probability, issue_type_names, - boosting_data_gatherer.scores_by_issue_type) + boosting_data_gatherer.find_most_relevant_by_type()) if predicted_issue_type: chosen_type = scores_by_issue_type[predicted_issue_type] relevant_item = chosen_type["mrHit"]["_source"]["test_item"] - analysis_result = AnalysisResult(testItem=analyzer_candidates.testItemId, - issueType=predicted_issue_type, - relevantItem=relevant_item) + analysis_result = AnalysisResult( + testItem=analyzer_candidates.testItemId, issueType=predicted_issue_type, + relevantItem=relevant_item) relevant_log_id = utils.extract_real_id(chosen_type["mrHit"]["_id"]) test_item_log_id = utils.extract_real_id(chosen_type["compared_log"]["_id"]) analyzed_results_for_index.append(SuggestAnalysisResult( @@ -509,7 +529,7 @@ def analyze_logs(self, launches): matchScore=round(prob * 100, 2), esScore=round(chosen_type["mrHit"]["_score"], 2), esPosition=chosen_type["mrHit"]["es_pos"], - modelFeatureNames=";".join(_boosting_decision_maker.get_feature_names()), + modelFeatureNames=";".join([str(i) for i in _boosting_decision_maker.feature_ids]), modelFeatureValues=";".join( [str(feature) for feature in feature_data[global_idx]]), modelInfo=";".join(model_info_tags), @@ -537,23 +557,22 @@ def analyze_logs(self, launches): if not found_result: results_to_share[launch_id]["not_found"] += 1 results_to_share[launch_id]["processed_time"] += (time() - t_start_item) - except Exception as err: - logger.error(err) + except Exception as exc: + logger.exception(exc) if launch_id in results_to_share: - results_to_share[launch_id]["errors"].append( - utils.extract_exception(err)) + results_to_share[launch_id]["errors"].append(utils.extract_exception(exc)) results_to_share[launch_id]["errors_count"] += 1 - if "amqpUrl" in self.app_config and self.app_config["amqpUrl"].strip() and analyzed_results_for_index: - AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue( - self.app_config["exchangeName"], "index_suggest_info", + if self.app_config.amqpUrl and analyzed_results_for_index: + amqp_client = AmqpClient(self.app_config.amqpUrl) + amqp_client.send_to_inner_queue( + self.app_config.exchangeName, 'index_suggest_info', json.dumps([_info.dict() for _info in analyzed_results_for_index])) for launch_id in results_to_share: - results_to_share[launch_id]["model_info"] = list( - results_to_share[launch_id]["model_info"]) - AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue( - self.app_config["exchangeName"], "stats_info", json.dumps(results_to_share)) - except Exception as err: - logger.error(err) + results_to_share[launch_id]['model_info'] = list(results_to_share[launch_id]['model_info']) + amqp_client.send_to_inner_queue( + self.app_config.exchangeName, 'stats_info', json.dumps(results_to_share)) + except Exception as exc: + logger.exception(exc) es_query_thread.join() EARLY_FINISH = False self.queue = Queue() diff --git a/app/service/clean_index_service.py b/app/service/clean_index_service.py index c28185cb..ad773dea 100644 --- a/app/service/clean_index_service.py +++ b/app/service/clean_index_service.py @@ -12,24 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -from app.utils import utils from time import time + +from app.commons import logging from app.commons.esclient import EsClient -from app.commons.launch_objects import CleanIndexStrIds -from app.service import suggest_info_service +from app.commons.model.launch_objects import CleanIndexStrIds, ApplicationConfig +from app.service.suggest_info_service import SuggestInfoService +from app.utils import utils logger = logging.getLogger("analyzerApp.cleanIndexService") class CleanIndexService: + es_client: EsClient + suggest_info_service: SuggestInfoService - def __init__(self, app_config=None, search_cfg=None): - self.app_config = app_config or {} - self.search_cfg = search_cfg or {} - self.es_client = EsClient(app_config=self.app_config, search_cfg=self.search_cfg) - self.suggest_info_service = suggest_info_service.SuggestInfoService( - app_config=self.app_config, search_cfg=self.search_cfg) + def __init__(self, app_config: ApplicationConfig): + self.app_config = app_config + self.es_client = EsClient(app_config=self.app_config) + self.suggest_info_service = SuggestInfoService(app_config=app_config) @utils.ignore_warnings def delete_logs(self, clean_index): diff --git a/app/service/cluster_service.py b/app/service/cluster_service.py index 802dee91..8da7ffc4 100644 --- a/app/service/cluster_service.py +++ b/app/service/cluster_service.py @@ -12,36 +12,71 @@ # See the License for the specific language governing permissions and # limitations under the License. -from app.commons.esclient import EsClient -from app.commons import clusterizer -from app.utils import utils, text_processing -from app.commons.launch_objects import ClusterResult, ClusterInfo -from app.commons.log_preparation import LogPreparation -from app.commons.log_merger import LogMerger -from sklearn.feature_extraction.text import CountVectorizer -import numpy as np -from app.amqp.amqp import AmqpClient +import hashlib import json -import logging -from time import time +from collections import defaultdict from datetime import datetime -import hashlib +from time import time +from typing import Any + +import numpy as np +from sklearn.feature_extraction.text import CountVectorizer + +from app.amqp.amqp import AmqpClient +from app.commons import clusterizer, logging +from app.commons.esclient import EsClient +from app.commons.log_merger import LogMerger +from app.commons.log_requests import LogRequests +from app.commons.model.launch_objects import (ClusterResult, ClusterInfo, SearchConfig, ApplicationConfig, + LaunchInfoForClustering) +from app.utils import utils, text_processing logger = logging.getLogger("analyzerApp.clusterService") class ClusterService: + app_config: ApplicationConfig + search_cfg: SearchConfig + es_client: EsClient + log_requests: LogRequests + log_merger: LogMerger - def __init__(self, app_config=None, search_cfg=None): - self.app_config = app_config or {} - self.search_cfg = search_cfg or {} - self.es_client = EsClient(app_config=self.app_config, search_cfg=self.search_cfg) - self.log_preparation = LogPreparation() + def __init__(self, app_config: ApplicationConfig, search_cfg: SearchConfig): + self.app_config = app_config + self.search_cfg = search_cfg + self.es_client = EsClient(app_config=self.app_config) + self.log_requests = LogRequests() self.log_merger = LogMerger() - def build_search_similar_items_query(self, queried_log, message, - launch_info, - min_should_match="95%"): + def add_query_with_start_time_decay(self, main_query: dict[str, Any]) -> dict[str, Any]: + return { + "size": main_query["size"], + "query": { + "function_score": { + "query": main_query["query"], + "functions": [ + { + "exp": { + "start_time": { + "origin": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "scale": "7d", + "offset": "1d", + "decay": self.search_cfg.TimeWeightDecay + } + } + }, + { + "script_score": {"script": {"source": "0.2"}} + }], + "score_mode": "max", + "boost_mode": "multiply" + } + } + } + + def build_search_similar_items_query( + self, queried_log: dict[str, Any], message: str, launch_info: LaunchInfoForClustering, + min_should_match: str = "95%") -> dict[str, Any]: """Build search query""" query = { "_source": ["whole_message", "test_item", "is_merged", @@ -65,8 +100,11 @@ def build_search_similar_items_query(self, queried_log, message, min_should_match, message, field_name="whole_message", boost=1.0, override_min_should_match=None, - max_query_terms=self.search_cfg["MaxQueryTerms"]) - ]}}} + max_query_terms=self.search_cfg.MaxQueryTerms) + ] + } + } + } if launch_info.forUpdate: query["query"]["bool"]["should"].append( {"term": {"launch_id": queried_log["_source"]["launch_id"]}}) @@ -86,41 +124,16 @@ def build_search_similar_items_query(self, queried_log, message, queried_log["_source"]["found_exceptions"], field_name="found_exceptions", boost=1.0, override_min_should_match="1", - max_query_terms=self.search_cfg["MaxQueryTerms"])) - utils.append_potential_status_codes(query, queried_log, boost=1.0, - max_query_terms=self.search_cfg["MaxQueryTerms"]) + max_query_terms=self.search_cfg.MaxQueryTerms + ) + ) + utils.append_potential_status_codes( + query, queried_log, boost=1.0, max_query_terms=self.search_cfg.MaxQueryTerms) return self.add_query_with_start_time_decay(query) - def add_query_with_start_time_decay(self, main_query): - return { - "size": main_query["size"], - "query": { - "function_score": { - "query": main_query["query"], - "functions": [ - { - "exp": { - "start_time": { - "origin": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "scale": "7d", - "offset": "1d", - "decay": self.search_cfg["TimeWeightDecay"] - } - } - }, - { - "script_score": {"script": {"source": "0.2"}} - }], - "score_mode": "max", - "boost_mode": "multiply" - } - } - } - - def find_similar_items_from_es( - self, groups, log_dict, - log_messages, log_ids, launch_info, - additional_results, unique_errors_min_should_match): + def find_similar_items_from_es(self, groups: dict[int, list[int]], log_dict: dict[int, dict[str, Any]], + log_messages: list[str], log_ids: set[str], launch_info: LaunchInfoForClustering, + unique_errors_min_should_match: float) -> dict[int, ClusterInfo]: new_clusters = {} _clusterizer = clusterizer.Clusterizer() for global_group in groups: @@ -129,14 +142,9 @@ def find_similar_items_from_es( log_messages[first_item_ind], unique_errors_min_should_match) query = self.build_search_similar_items_query( - log_dict[first_item_ind], - log_messages[first_item_ind], - launch_info, - min_should_match=text_processing.prepare_es_min_should_match( - min_should_match)) - search_results = self.es_client.es_client.search( - index=log_dict[first_item_ind]["_index"], - body=query) + log_dict[first_item_ind], log_messages[first_item_ind], launch_info, + min_should_match=text_processing.prepare_es_min_should_match(min_should_match)) + search_results = self.es_client.es_client.search(index=log_dict[first_item_ind]["_index"], body=query) log_messages_part = [log_messages[first_item_ind]] log_dict_part = {0: log_dict[first_item_ind]} ind = 1 @@ -203,6 +211,7 @@ def find_similar_items_from_es( break if new_group: new_clusters[global_group] = new_group + additional_results = {} for group in new_clusters: if group in additional_results: additional_results[group].logIds.extend(new_clusters[group].logIds) @@ -211,7 +220,46 @@ def find_similar_items_from_es( additional_results[group] = new_clusters[group] return additional_results - def calculate_hash(self, group_ids, log_dict, log_messages, launch_info): + def regroup_by_error_and_status_codes( + self, log_messages: list[str], log_dict: dict[int, dict[str, Any]]) -> dict[tuple[str, str], list[int]]: + regroupped_by_error = defaultdict(list) + for i in range(len(log_messages)): + found_exceptions = " ".join( + sorted(log_dict[i]["_source"]["found_exceptions"].split())) + potential_status_codes = " ".join( + sorted(log_dict[i]["_source"]["potential_status_codes"].split())) + group_key = (found_exceptions, potential_status_codes) + regroupped_by_error[group_key].append(i) + return regroupped_by_error + + def cluster_messages_with_grouping_by_error( + self, log_messages: list[str], log_dict: dict[int, dict[str, Any]], + unique_errors_min_should_match: float) -> dict[int, list[int]]: + regroupped_by_error = self.regroup_by_error_and_status_codes( + log_messages, log_dict) + _clusterizer = clusterizer.Clusterizer() + all_groups = {} + start_group_id = 0 + for group in regroupped_by_error.values(): + log_messages_part = [] + log_messages_idx_dict = {} + for i, idx in enumerate(group): + log_messages_part.append(log_messages[idx]) + log_messages_idx_dict[i] = idx + groups = _clusterizer.find_clusters(log_messages_part, threshold=unique_errors_min_should_match) + max_group_id = max(groups.keys()) + for group_id in groups: + global_idx = start_group_id + group_id + if global_idx not in all_groups: + all_groups[global_idx] = [] + for i in groups[group_id]: + all_groups[global_idx].append(log_messages_idx_dict[i]) + start_group_id = start_group_id + max_group_id + 1 + return all_groups + + def calculate_hash( + self, group_ids: list[int], log_dict: dict[int, dict[str, Any]], log_messages: list[str], + launch_info: LaunchInfoForClustering) -> tuple[int, str]: group_logs = [] log_message = "" for i in range(min(100, len(group_ids))): @@ -239,8 +287,10 @@ def calculate_hash(self, group_ids, log_dict, log_messages, launch_info): return hash_message, log_message def gather_cluster_results( - self, groups, additional_results, log_dict, log_messages, - log_ids_for_merged_logs, launch_info): + self, groups: dict[int, list[int]], additional_results: dict[int, ClusterInfo], + log_dict: dict[int, dict[str, Any]], log_messages: list[str], + log_ids_for_merged_logs: dict[str, list[int]], + launch_info: LaunchInfoForClustering) -> tuple[list[ClusterInfo], int, dict[str, tuple[int, str]]]: merged_logs_to_update = {} clusters_found = {} cluster_message_by_id = {} @@ -288,50 +338,10 @@ def gather_cluster_results( itemIds=list(set(clusters_found[cluster_id][1])))) return results_to_return, len(results_to_return), merged_logs_to_update - def regroup_by_error_ans_status_codes(self, log_messages, log_dict): - regroupped_by_error = {} - for i in range(len(log_messages)): - found_exceptions = " ".join( - sorted(log_dict[i]["_source"]["found_exceptions"].split())) - potential_status_codes = " ".join( - sorted(log_dict[i]["_source"]["potential_status_codes"].split())) - group_key = (found_exceptions, potential_status_codes) - if group_key not in regroupped_by_error: - regroupped_by_error[group_key] = [] - regroupped_by_error[group_key].append(i) - return regroupped_by_error - - def cluster_messages_with_groupping_by_error(self, log_messages, log_dict, - unique_errors_min_should_match): - regroupped_by_error = self.regroup_by_error_ans_status_codes( - log_messages, log_dict) - _clusterizer = clusterizer.Clusterizer() - all_groups = {} - start_group_id = 0 - for group_key in regroupped_by_error: - log_messages_part = [] - log_messages_idx_dict = {} - for i, idx in enumerate(regroupped_by_error[group_key]): - log_messages_part.append(log_messages[idx]) - log_messages_idx_dict[i] = idx - groups = _clusterizer.find_clusters( - log_messages_part, - threshold=unique_errors_min_should_match) - max_group_id = max(groups.keys()) - for group_id in groups: - global_idx = start_group_id + group_id - if global_idx not in all_groups: - all_groups[global_idx] = [] - for i in groups[group_id]: - all_groups[global_idx].append(log_messages_idx_dict[i]) - start_group_id = start_group_id + max_group_id + 1 - return all_groups - @utils.ignore_warnings - def find_clusters(self, launch_info): + def find_clusters(self, launch_info: LaunchInfoForClustering): logger.info("Started clusterizing logs") - index_name = text_processing.unite_project_name( - str(launch_info.project), self.app_config["esProjectIndexPrefix"]) + index_name = text_processing.unite_project_name(launch_info.project, self.app_config.esProjectIndexPrefix) if not self.es_client.index_exists(index_name): logger.info("Project %s doesn't exist", index_name) logger.info("Finished clustering log with 0 clusters.") @@ -344,30 +354,23 @@ def find_clusters(self, launch_info): errors_count = 0 cluster_num = 0 clusters = [] - log_ids = [] + log_ids = {} try: - unique_errors_min_should_match = launch_info.launch.analyzerConfig.uniqueErrorsMinShouldMatch / 100 # noqa - log_messages, log_dict, log_ids_for_merged_logs = self.log_preparation.prepare_logs_for_clustering( # noqa - launch_info.launch, launch_info.numberOfLogLines, - launch_info.cleanNumbers, index_name) + unique_errors_min_should_match = launch_info.launch.analyzerConfig.uniqueErrorsMinShouldMatch / 100.0 # noqa + log_messages, log_dict, log_ids_for_merged_logs = self.log_requests.prepare_logs_for_clustering( # noqa + launch_info.launch, launch_info.numberOfLogLines, launch_info.cleanNumbers, index_name) log_ids = set([str(log["_id"]) for log in log_dict.values()]) - - groups = self.cluster_messages_with_groupping_by_error( - log_messages, log_dict, - unique_errors_min_should_match) + groups = self.cluster_messages_with_grouping_by_error( + log_messages, log_dict, unique_errors_min_should_match) logger.debug("Groups: %s", groups) additional_results = self.find_similar_items_from_es( - groups, log_dict, log_messages, - log_ids, launch_info, - {}, unique_errors_min_should_match) + groups, log_dict, log_messages, log_ids, launch_info, unique_errors_min_should_match) clusters, cluster_num, merged_logs_to_update = self.gather_cluster_results( - groups, additional_results, log_dict, log_messages, - log_ids_for_merged_logs, launch_info) + groups, additional_results, log_dict, log_messages, log_ids_for_merged_logs, launch_info) if clusters: bodies = [] for result in clusters: - logger.debug("Cluster Id: %s, Cluster message: %s", - result.clusterId, result.clusterMessage) + logger.debug("Cluster Id: %s, Cluster message: %s", result.clusterId, result.clusterMessage) logger.debug("Cluster Ids: %s", result.logIds) for log_id in result.logIds: bodies.append({ @@ -387,10 +390,10 @@ def find_clusters(self, launch_info): "cluster_message": cluster_message, "cluster_with_numbers": not launch_info.cleanNumbers}}) self.es_client._bulk_index( - bodies, refresh=False, chunk_size=self.app_config["esChunkNumberUpdateClusters"]) - except Exception as err: - logger.error(err) - errors_found.append(utils.extract_exception(err)) + bodies, refresh=False, chunk_size=self.app_config.esChunkNumberUpdateClusters) + except Exception as exc: + logger.exception(exc) + errors_found.append(utils.extract_exception(exc)) errors_count += 1 results_to_share = {launch_info.launch.launchId: { @@ -400,17 +403,25 @@ def find_clusters(self, launch_info): "project_id": launch_info.project, "method": "find_clusters", "gather_date": datetime.now().strftime("%Y-%m-%d"), "gather_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "module_version": [self.app_config["appVersion"]], + "module_version": [self.app_config.appVersion], "model_info": [], "errors": errors_found, "errors_count": errors_count}} - if "amqpUrl" in self.app_config and self.app_config["amqpUrl"].strip(): - AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue( - self.app_config["exchangeName"], "stats_info", json.dumps(results_to_share)) + if self.app_config.amqpUrl.strip(): + AmqpClient(self.app_config.amqpUrl).send_to_inner_queue( + self.app_config.exchangeName, 'stats_info', json.dumps(results_to_share)) logger.debug("Stats info %s", results_to_share) logger.info("Processed the launch. It took %.2f sec.", time() - t_start) logger.info("Finished clustering for the launch with %d clusters.", cluster_num) + for cluster in clusters: + # Set original messages for clusters to show in UI + log_ids = set(cluster.logIds) + for test_item in launch_info.launch.testItems: + for log in test_item.logs: + if log.logId in log_ids: + cluster.clusterMessage = log.message + break return ClusterResult( project=launch_info.project, launchId=launch_info.launch.launchId, diff --git a/app/service/delete_index_service.py b/app/service/delete_index_service.py index 0faf6542..54651ec2 100644 --- a/app/service/delete_index_service.py +++ b/app/service/delete_index_service.py @@ -12,34 +12,40 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging - -from app.utils import utils, text_processing from time import time -from app.commons import namespace_finder + +from app.commons import logging, namespace_finder, trigger_manager from app.commons.esclient import EsClient -from app.commons import trigger_manager +from app.commons.model.launch_objects import SearchConfig, ApplicationConfig +from app.utils import utils, text_processing +from app.commons.model_chooser import ModelChooser logger = logging.getLogger("analyzerApp.deleteIndexService") class DeleteIndexService: + app_config: ApplicationConfig + search_cfg: SearchConfig + namespace_finder: namespace_finder.NamespaceFinder + trigger_manager: trigger_manager.TriggerManager + es_client: EsClient + model_chooser: ModelChooser - def __init__(self, model_chooser, app_config=None, search_cfg=None): - self.app_config = app_config or {} - self.search_cfg = search_cfg or {} + def __init__(self, model_chooser: ModelChooser, app_config: ApplicationConfig, search_cfg: SearchConfig): + self.app_config = app_config + self.search_cfg = search_cfg self.namespace_finder = namespace_finder.NamespaceFinder(self.app_config) - self.trigger_manager = trigger_manager.TriggerManager( - model_chooser, app_config=self.app_config, search_cfg=self.search_cfg) - self.es_client = EsClient(app_config=self.app_config, search_cfg=self.search_cfg) + self.trigger_manager = trigger_manager.TriggerManager(model_chooser, app_config=self.app_config, + search_cfg=self.search_cfg) + self.es_client = EsClient(app_config=self.app_config) self.model_chooser = model_chooser @utils.ignore_warnings - def delete_index(self, index_name): + def delete_index(self, index_name: int) -> int: logger.info("Started deleting index") t_start = time() is_index_deleted = self.es_client.delete_index(text_processing.unite_project_name( - str(index_name), self.app_config["esProjectIndexPrefix"])) + index_name, self.app_config.esProjectIndexPrefix)) self.namespace_finder.remove_namespaces(index_name) self.trigger_manager.delete_triggers(index_name) self.model_chooser.delete_all_custom_models(index_name) diff --git a/app/service/namespace_finder_service.py b/app/service/namespace_finder_service.py index 715dc13e..f0e27929 100644 --- a/app/service/namespace_finder_service.py +++ b/app/service/namespace_finder_service.py @@ -12,30 +12,30 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -from app.utils import utils from time import time -from app.commons import namespace_finder -from app.commons.log_preparation import LogPreparation + +from app.commons import logging, namespace_finder +from app.commons.model.launch_objects import ApplicationConfig, Launch +from app.commons.log_requests import LogRequests +from app.utils import utils logger = logging.getLogger("analyzerApp.namespaceFinderService") class NamespaceFinderService: + namespace_finder: namespace_finder.NamespaceFinder + log_requests: LogRequests - def __init__(self, app_config=None, search_cfg=None): - self.app_config = app_config or {} - self.search_cfg = search_cfg or {} - self.namespace_finder = namespace_finder.NamespaceFinder(self.app_config) - self.log_preparation = LogPreparation() + def __init__(self, app_config: ApplicationConfig): + self.namespace_finder = namespace_finder.NamespaceFinder(app_config) + self.log_requests = LogRequests() @utils.ignore_warnings - def update_chosen_namespaces(self, launches): + def update_chosen_namespaces(self, launches: list[Launch]): logger.info("Started updating chosen namespaces") t_start = time() - log_words, project_id = self.log_preparation.prepare_log_words(launches) - logger.debug("Project id %s", project_id) + log_words, project_id = LogRequests.prepare_log_words(launches) + logger.debug(f'Project id {project_id}') if project_id is not None: - self.namespace_finder.update_namespaces( - project_id, log_words) - logger.info("Finished updating chosen namespaces %.2f s", time() - t_start) + self.namespace_finder.update_namespaces(project_id, log_words) + logger.info('Finished updating chosen namespaces %.2f s', time() - t_start) diff --git a/app/service/retraining_service.py b/app/service/retraining_service.py index 3b907067..9f51f109 100644 --- a/app/service/retraining_service.py +++ b/app/service/retraining_service.py @@ -12,47 +12,42 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import json -from app.utils import utils from time import time -from app.commons.esclient import EsClient -from app.commons import trigger_manager + from app.amqp.amqp import AmqpClient +from app.commons.model.ml import TrainInfo +from app.commons import logging, trigger_manager +from app.commons.model.launch_objects import SearchConfig, ApplicationConfig +from app.commons.model_chooser import ModelChooser +from app.utils import utils logger = logging.getLogger("analyzerApp.retrainingService") class RetrainingService: + app_config: ApplicationConfig + search_cfg: SearchConfig + trigger_manager: trigger_manager.TriggerManager - def __init__(self, model_chooser, app_config=None, search_cfg=None): - self.app_config = app_config or {} - self.search_cfg = search_cfg or {} - self.trigger_manager = trigger_manager.TriggerManager( - model_chooser, app_config=self.app_config, search_cfg=self.search_cfg) - self.es_client = EsClient(app_config=self.app_config, search_cfg=self.search_cfg) + def __init__(self, model_chooser: ModelChooser, app_config: ApplicationConfig, search_cfg: SearchConfig): + self.app_config = app_config + self.search_cfg = search_cfg + self.trigger_manager = trigger_manager.TriggerManager(model_chooser, app_config=self.app_config, + search_cfg=self.search_cfg) @utils.ignore_warnings - def train_models(self, train_info): - logger.info("Started training") + def train_models(self, train_info: TrainInfo) -> None: + assert self.trigger_manager.does_trigger_exist(train_info.model_type) + logger.info('Started training') t_start = time() - assert self.trigger_manager.does_trigger_exist(train_info["model_type"]) - - _retraining_triggering, _retraining = self.trigger_manager.get_trigger_info(train_info["model_type"]) - is_model_trained = 0 + _retraining_triggering, _retraining = self.trigger_manager.get_trigger_info(train_info.model_type) if _retraining_triggering.should_model_training_be_triggered(train_info): - logger.debug("Should be trained ", train_info) - try: - gathered_data, training_log_info = _retraining.train(train_info) - _retraining_triggering.clean_triggering_info(train_info, gathered_data) - logger.debug(training_log_info) - if "amqpUrl" in self.app_config and self.app_config["amqpUrl"].strip(): - AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue( - self.app_config["exchangeName"], "stats_info", json.dumps(training_log_info)) - is_model_trained = 1 - except Exception as exc: - logger.error("Training finished with errors") - logger.exception(exc) - is_model_trained = 0 + logger.debug(f'Should be trained: {train_info.json()}') + gathered_data, training_log_info = _retraining.train(train_info) + _retraining_triggering.clean_triggering_info(train_info.project, gathered_data) + logger.debug(training_log_info) + if self.app_config.amqpUrl: + AmqpClient(self.app_config.amqpUrl).send_to_inner_queue( + self.app_config.exchangeName, 'stats_info', json.dumps(training_log_info)) logger.info("Finished training %.2f s", time() - t_start) - return is_model_trained diff --git a/app/service/search_service.py b/app/service/search_service.py index 3861247f..ede9f862 100644 --- a/app/service/search_service.py +++ b/app/service/search_service.py @@ -12,40 +12,48 @@ # See the License for the specific language governing permissions and # limitations under the License. -from app.commons.esclient import EsClient -from app.utils import utils, text_processing -from app.commons.launch_objects import SearchLogInfo, Log -from app.commons.log_preparation import LogPreparation -from app.commons.log_merger import LogMerger -from app.boosting_decision_making import weighted_similarity_calculator -from app.commons import similarity_calculator +from time import time + import elasticsearch import elasticsearch.helpers -import logging -from time import time + +from app.commons import logging, similarity_calculator, object_saving +from app.commons.esclient import EsClient +from app.commons.model.launch_objects import SearchLogInfo, Log, SearchConfig, ApplicationConfig +from app.commons.log_merger import LogMerger +from app.commons.log_requests import LogRequests, create_log_template +from app.machine_learning.models.weighted_similarity_calculator import WeightedSimilarityCalculator +from app.utils import utils, text_processing logger = logging.getLogger("analyzerApp.searchService") class SearchService: + app_config: ApplicationConfig + search_cfg: SearchConfig + es_client: EsClient + log_requests: LogRequests + log_merger: LogMerger + similarity_model: WeightedSimilarityCalculator - def __init__(self, app_config=None, search_cfg=None): - self.app_config = app_config or {} - self.search_cfg = search_cfg or {} - self.es_client = EsClient(app_config=self.app_config, search_cfg=self.search_cfg) - self.log_preparation = LogPreparation() + def __init__(self, app_config: ApplicationConfig, search_cfg: SearchConfig): + self.app_config = app_config + self.search_cfg = search_cfg + self.es_client = EsClient(app_config=self.app_config) + self.log_requests = LogRequests() self.log_merger = LogMerger() - self.weighted_log_similarity_calculator = None - if self.search_cfg["SimilarityWeightsFolder"].strip(): - self.weighted_log_similarity_calculator = weighted_similarity_calculator.\ - WeightedSimilarityCalculator(folder=self.search_cfg["SimilarityWeightsFolder"]) + if not self.search_cfg.SimilarityWeightsFolder: + raise ValueError('SimilarityWeightsFolder is not set') + self.similarity_model = ( + WeightedSimilarityCalculator(object_saving.create_filesystem(self.search_cfg.SimilarityWeightsFolder))) + self.similarity_model.load_model() def build_search_query(self, search_req, queried_log, search_min_should_match="95%"): """Build search query""" query = { "_source": ["message", "test_item", "detected_message", "stacktrace", "potential_status_codes", "merged_small_logs"], - "size": self.app_config["esChunkNumber"], + "size": self.app_config.esChunkNumber, "query": { "bool": { "filter": [ @@ -77,7 +85,7 @@ def build_search_query(self, search_req, queried_log, search_min_should_match="9 queried_log["_source"]["message"], field_name="message", boost=1.0, override_min_should_match=None, - max_query_terms=self.search_cfg["MaxQueryTerms"])) + max_query_terms=self.search_cfg.MaxQueryTerms)) else: query["query"]["bool"]["filter"].append({"term": {"is_merged": True}}) query["query"]["bool"]["must_not"].append({"wildcard": {"message": "*"}}) @@ -87,7 +95,7 @@ def build_search_query(self, search_req, queried_log, search_min_should_match="9 queried_log["_source"]["merged_small_logs"], field_name="merged_small_logs", boost=1.0, override_min_should_match=None, - max_query_terms=self.search_cfg["MaxQueryTerms"])) + max_query_terms=self.search_cfg.MaxQueryTerms)) if queried_log["_source"]["found_exceptions"].strip(): query["query"]["bool"]["must"].append( utils.build_more_like_this_query( @@ -95,9 +103,9 @@ def build_search_query(self, search_req, queried_log, search_min_should_match="9 queried_log["_source"]["found_exceptions"], field_name="found_exceptions", boost=1.0, override_min_should_match="1", - max_query_terms=self.search_cfg["MaxQueryTerms"])) + max_query_terms=self.search_cfg.MaxQueryTerms)) utils.append_potential_status_codes(query, queried_log, boost=1.0, - max_query_terms=self.search_cfg["MaxQueryTerms"]) + max_query_terms=self.search_cfg.MaxQueryTerms) return query def find_log_ids_for_test_items_with_merged_logs(self, test_item_ids, index_name, batch_size=1000): @@ -153,11 +161,9 @@ def prepare_messages_for_queries(self, search_req): if not message.strip(): continue - queried_log = self.log_preparation._create_log_template() - queried_log = self.log_preparation._fill_log_fields( - queried_log, - Log(logId=global_id, message=message), - search_req.logLines) + queried_log = create_log_template() + queried_log = LogRequests._fill_log_fields(queried_log, Log(logId=global_id, message=message), + search_req.logLines) msg_words = " ".join(text_processing.split_words(queried_log["_source"]["message"])) if not msg_words.strip() or msg_words in searched_logs: @@ -192,8 +198,7 @@ def search_logs(self, search_req): logger.info(f'Started searching for test item with id: {search_req.itemId}') logger.debug(f'Started searching by request: {search_req.json()}') logger.info("ES Url %s", text_processing.remove_credentials_from_url(self.es_client.host)) - index_name = text_processing.unite_project_name( - str(search_req.projectId), self.app_config["esProjectIndexPrefix"]) + index_name = text_processing.unite_project_name(search_req.projectId, self.app_config.esProjectIndexPrefix) t_start = time() if not self.es_client.index_exists(index_name): return [] @@ -216,11 +221,11 @@ def search_logs(self, search_req): _similarity_calculator = similarity_calculator.SimilarityCalculator( { - "max_query_terms": self.search_cfg["MaxQueryTerms"], - "min_word_length": self.search_cfg["MinWordLength"], + "max_query_terms": self.search_cfg.MaxQueryTerms, + "min_word_length": self.search_cfg.MinWordLength, "number_of_log_lines": search_req.logLines }, - weighted_similarity_calculator=self.weighted_log_similarity_calculator) + similarity_model=self.similarity_model) _similarity_calculator.find_similarity( [(queried_log, search_results)], ["message", "potential_status_codes", "merged_small_logs"]) @@ -242,8 +247,7 @@ def search_logs(self, search_req): log_id_extracted = utils.extract_real_id(log_id) is_merged = log_id != str(log_id_extracted) test_item_id = int(test_item_info[log_id]) - match_score = max(round(similarity_percent, 2), - round(global_search_min_should_match, 2)) + match_score = max(round(similarity_percent, 2), round(global_search_min_should_match, 2)) similar_log_ids[(log_id_extracted, test_item_id, is_merged)] = SearchLogInfo( logId=log_id_extracted, testItemId=test_item_id, diff --git a/app/service/suggest_info_service.py b/app/service/suggest_info_service.py index 8a3cd525..6e819291 100644 --- a/app/service/suggest_info_service.py +++ b/app/service/suggest_info_service.py @@ -13,7 +13,6 @@ # limitations under the License. import json -import logging from datetime import datetime from time import time @@ -21,23 +20,29 @@ import elasticsearch.helpers from app.amqp.amqp import AmqpClient +from app.commons import logging from app.commons.esclient import EsClient -from app.commons.triggering_training.retraining_triggering import GATHERED_METRIC_TOTAL +from app.commons.model.launch_objects import ApplicationConfig +from app.commons.model.ml import TrainInfo, ModelType from app.utils import utils, text_processing logger = logging.getLogger("analyzerApp.suggestInfoService") class SuggestInfoService: - """This service saves `SuggestAnalysisResult` entities to {project_id}_suggest ES/OS index. + """This service saves and manage `SuggestAnalysisResult` entities to {project_id}_suggest ES/OS index. This is necessary for further use in custom model training. """ - def __init__(self, app_config=None, search_cfg=None): - self.app_config = app_config or {} - self.search_cfg = search_cfg or {} - self.es_client = EsClient(app_config=self.app_config, search_cfg=self.search_cfg) + app_config: ApplicationConfig + es_client: EsClient + rp_suggest_index_template: str + rp_suggest_metrics_index_template: str + + def __init__(self, app_config: ApplicationConfig): + self.app_config = app_config + self.es_client = EsClient(app_config=self.app_config) self.rp_suggest_index_template = "rp_suggestions_info" self.rp_suggest_metrics_index_template = "rp_suggestions_info_metrics" @@ -58,17 +63,16 @@ def index_suggest_info(self, suggest_info_list): obj_info = json.loads(obj.json()) obj_info["savedDate"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") obj_info["modelInfo"] = [obj.strip() for obj in obj_info["modelInfo"].split(";") if obj.strip()] - obj_info["module_version"] = [self.app_config["appVersion"]] + obj_info["module_version"] = [self.app_config.appVersion] if obj_info["testItem"] not in metrics_data_by_test_item: metrics_data_by_test_item[obj_info["testItem"]] = [] metrics_data_by_test_item[obj_info["testItem"]].append(obj_info) project_index_name = self.build_index_name(obj_info["project"]) project_index_name = text_processing.unite_project_name( - project_index_name, self.app_config["esProjectIndexPrefix"]) + project_index_name, self.app_config.esProjectIndexPrefix) if project_index_name not in project_index_names: self.es_client.create_index_for_stats_info( - self.rp_suggest_index_template, - override_index_name=project_index_name) + self.rp_suggest_index_template, override_index_name=project_index_name) project_index_names.add(project_index_name) bodies.append({ "_index": project_index_name, @@ -107,13 +111,13 @@ def remove_suggest_info(self, project_id): logger.info("Removing suggest_info index") project_index_name = self.build_index_name(project_id) project_index_name = text_processing.unite_project_name( - project_index_name, self.app_config["esProjectIndexPrefix"]) + project_index_name, self.app_config.esProjectIndexPrefix) return self.es_client.delete_index(project_index_name) def build_suggest_info_ids_query(self, log_ids): return { "_source": ["testItem"], - "size": self.app_config["esChunkNumber"], + "size": self.app_config.esChunkNumber, "query": { "bool": { "should": [ @@ -140,15 +144,14 @@ def build_suggest_info_ids_query_by_launch_ids(self, launch_ids): def clean_suggest_info_logs(self, clean_index): """Delete logs from elasticsearch""" index_name = self.build_index_name(clean_index.project) - index_name = text_processing.unite_project_name( - index_name, self.app_config["esProjectIndexPrefix"]) + index_name = text_processing.unite_project_name(index_name, self.app_config.esProjectIndexPrefix) logger.info("Delete logs %s for the index %s", clean_index.ids, index_name) t_start = time() if not self.es_client.index_exists(index_name, print_error=False): logger.info("Didn't find index '%s'", index_name) return 0 - sugggest_log_ids = set() + suggest_log_ids = set() try: search_query = self.build_suggest_info_ids_query( clean_index.ids) @@ -156,12 +159,12 @@ def clean_suggest_info_logs(self, clean_index): query=search_query, index=index_name, scroll="5m"): - sugggest_log_ids.add(res["_id"]) - except Exception as err: + suggest_log_ids.add(res["_id"]) + except Exception as exc: logger.error("Couldn't find logs with specified ids") - logger.error(err) + logger.exception(exc) bodies = [] - for _id in sugggest_log_ids: + for _id in suggest_log_ids: bodies.append({ "_op_type": "delete", "_id": _id, @@ -175,8 +178,7 @@ def clean_suggest_info_logs(self, clean_index): def clean_suggest_info_logs_by_test_item(self, remove_items_info): """Delete logs from elasticsearch""" index_name = self.build_index_name(remove_items_info["project"]) - index_name = text_processing.unite_project_name( - index_name, self.app_config["esProjectIndexPrefix"]) + index_name = text_processing.unite_project_name(index_name, self.app_config.esProjectIndexPrefix) logger.info("Delete test items %s for the index %s", remove_items_info["itemsToDelete"], index_name) t_start = time() @@ -192,9 +194,7 @@ def clean_suggest_info_logs_by_launch_id(self, launch_remove_info): project = launch_remove_info["project"] launch_ids = launch_remove_info["launch_ids"] index_name = self.build_index_name(project) - index_name = text_processing.unite_project_name( - index_name, self.app_config["esProjectIndexPrefix"] - ) + index_name = text_processing.unite_project_name(index_name, self.app_config.esProjectIndexPrefix) logger.info("Delete launches %s for the index %s", launch_ids, index_name) t_start = time() deleted_logs = self.es_client.delete_by_query( @@ -213,7 +213,7 @@ def clean_suggest_info_logs_by_launch_id(self, launch_remove_info): def build_query_for_getting_suggest_info(self, test_item_ids): return { "_source": ["testItem", "issueType"], - "size": self.app_config["esChunkNumber"], + "size": self.app_config.esChunkNumber, "query": { "bool": { "must": [ @@ -231,7 +231,7 @@ def update_suggest_info(self, defect_update_info): defect_update_info["itemsToUpdate"] = { int(key_): val for key_, val in defect_update_info["itemsToUpdate"].items()} index_name = self.build_index_name(defect_update_info["project"]) - index_name = text_processing.unite_project_name(index_name, self.app_config["esProjectIndexPrefix"]) + index_name = text_processing.unite_project_name(index_name, self.app_config.esProjectIndexPrefix) if not self.es_client.index_exists(index_name): return 0 batch_size = 1000 @@ -261,14 +261,12 @@ def update_suggest_info(self, defect_update_info): }) result = self.es_client._bulk_index(log_update_queries) try: - if "amqpUrl" in self.app_config and self.app_config["amqpUrl"].strip(): - for model_type in ["suggestion", "auto_analysis"]: - AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue( - self.app_config["exchangeName"], "train_models", json.dumps({ - "model_type": model_type, - "project_id": defect_update_info["project"], - GATHERED_METRIC_TOTAL: result.took - })) + if self.app_config.amqpUrl: + for model_type in [ModelType.suggestion, ModelType.auto_analysis]: + AmqpClient(self.app_config.amqpUrl).send_to_inner_queue( + self.app_config.exchangeName, 'train_models', + TrainInfo(model_type=model_type, project=defect_update_info['project'], + gathered_metric_total=result.took).json()) except Exception as exc: logger.exception(exc) logger.info("Finished updating suggest info for %.2f sec.", time() - t_start) diff --git a/app/service/suggest_patterns_service.py b/app/service/suggest_patterns_service.py index 9366fd70..4287822f 100644 --- a/app/service/suggest_patterns_service.py +++ b/app/service/suggest_patterns_service.py @@ -12,24 +12,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging +from time import time -from app.utils import utils, text_processing import elasticsearch import elasticsearch.helpers -from time import time + +from app.commons import logging from app.commons.esclient import EsClient -from app.commons.launch_objects import SuggestPattern, SuggestPatternLabel +from app.commons.model.launch_objects import SuggestPattern, SuggestPatternLabel, SearchConfig, ApplicationConfig +from app.utils import utils, text_processing logger = logging.getLogger("analyzerApp.suggestPatternsService") class SuggestPatternsService: + app_config: ApplicationConfig + search_cfg: SearchConfig + es_client: EsClient - def __init__(self, app_config=None, search_cfg=None): - self.app_config = app_config or {} - self.search_cfg = search_cfg or {} - self.es_client = EsClient(app_config=self.app_config, search_cfg=self.search_cfg) + def __init__(self, app_config: ApplicationConfig, search_cfg: SearchConfig): + self.app_config = app_config + self.search_cfg = search_cfg + self.es_client = EsClient(app_config=self.app_config) def query_data(self, project, label): data = [] @@ -39,7 +43,7 @@ def query_data(self, project, label): query={ "_source": ["detected_message", "issue_type"], "sort": {"start_time": "desc"}, - "size": self.app_config["esChunkNumber"], + "size": self.app_config.esChunkNumber, "query": { "bool": { "must": [ @@ -63,35 +67,34 @@ def query_data(self, project, label): return data def get_patterns_with_labels(self, exceptions_with_labels): - min_count = self.search_cfg["PatternLabelMinCountToSuggest"] - min_percent = self.search_cfg["PatternLabelMinPercentToSuggest"] - suggestedPatternsWithLabels = [] + min_count = self.search_cfg.PatternLabelMinCountToSuggest + min_percent = self.search_cfg.PatternLabelMinPercentToSuggest + suggested_patterns_with_labels = [] for exception in exceptions_with_labels: sum_all = sum(exceptions_with_labels[exception].values()) for issue_type in exceptions_with_labels[exception]: percent_for_label = round(exceptions_with_labels[exception][issue_type] / sum_all, 2) count_for_exception_with_label = exceptions_with_labels[exception][issue_type] if percent_for_label >= min_percent and count_for_exception_with_label >= min_count: - suggestedPatternsWithLabels.append(SuggestPatternLabel( + suggested_patterns_with_labels.append(SuggestPatternLabel( pattern=exception, totalCount=sum_all, percentTestItemsWithLabel=percent_for_label, label=issue_type)) - return suggestedPatternsWithLabels + return suggested_patterns_with_labels def get_patterns_without_labels(self, all_exceptions): - suggestedPatternsWithoutLabels = [] + suggested_patterns_without_labels = [] for exception in all_exceptions: - if all_exceptions[exception] >= self.search_cfg["PatternMinCountToSuggest"]: - suggestedPatternsWithoutLabels.append(SuggestPatternLabel( + if all_exceptions[exception] >= self.search_cfg.PatternMinCountToSuggest: + suggested_patterns_without_labels.append(SuggestPatternLabel( pattern=exception, totalCount=all_exceptions[exception])) - return suggestedPatternsWithoutLabels + return suggested_patterns_without_labels @utils.ignore_warnings def suggest_patterns(self, project_id): - index_name = text_processing.unite_project_name( - str(project_id), self.app_config["esProjectIndexPrefix"]) + index_name = text_processing.unite_project_name(project_id, self.app_config.esProjectIndexPrefix) logger.info("Started suggesting patterns for project '%s'", index_name) t_start = time() found_data = [] @@ -116,9 +119,9 @@ def suggest_patterns(self, project_id): if label not in exceptions_with_labels[exception]: exceptions_with_labels[exception][label] = 0 exceptions_with_labels[exception][label] += 1 - suggestedPatternsWithLabels = self.get_patterns_with_labels(exceptions_with_labels) - suggestedPatternsWithoutLabels = self.get_patterns_without_labels(all_exceptions) + suggested_patterns_with_labels = self.get_patterns_with_labels(exceptions_with_labels) + suggested_patterns_without_labels = self.get_patterns_without_labels(all_exceptions) logger.info("Finished suggesting patterns %.2f s", time() - t_start) return SuggestPattern( - suggestionsWithLabels=suggestedPatternsWithLabels, - suggestionsWithoutLabels=suggestedPatternsWithoutLabels) + suggestionsWithLabels=suggested_patterns_with_labels, + suggestionsWithoutLabels=suggested_patterns_without_labels) diff --git a/app/service/suggest_service.py b/app/service/suggest_service.py index 46685f39..e3523f25 100644 --- a/app/service/suggest_service.py +++ b/app/service/suggest_service.py @@ -13,7 +13,6 @@ # limitations under the License. import json -import logging from datetime import datetime from functools import reduce from time import time @@ -21,12 +20,16 @@ import elasticsearch.helpers from app.amqp.amqp import AmqpClient -from app.boosting_decision_making.suggest_boosting_featurizer import SuggestBoostingFeaturizer -from app.commons import similarity_calculator +from app.commons import logging, similarity_calculator, object_saving from app.commons.esclient import EsClient -from app.commons.launch_objects import SuggestAnalysisResult +from app.commons.log_requests import LogRequests +from app.commons.model.launch_objects import SuggestAnalysisResult, SearchConfig, ApplicationConfig, TestItemInfo, \ + AnalyzerConf +from app.commons.model.ml import ModelType, TrainInfo +from app.commons.model_chooser import ModelChooser from app.commons.namespace_finder import NamespaceFinder -from app.commons.triggering_training.retraining_triggering import GATHERED_METRIC_TOTAL +from app.machine_learning.models import WeightedSimilarityCalculator, BoostingDecisionMaker +from app.machine_learning.suggest_boosting_featurizer import SuggestBoostingFeaturizer from app.service.analyzer_service import AnalyzerService from app.utils import utils, text_processing @@ -42,34 +45,43 @@ class SuggestService(AnalyzerService): """The service serves suggestion lists in Make Decision modal.""" + app_config: ApplicationConfig + search_cfg: SearchConfig es_client: EsClient namespace_finder: NamespaceFinder + similarity_model: WeightedSimilarityCalculator - def __init__(self, model_chooser, app_config=None, search_cfg=None, es_client: EsClient = None): - self.app_config = app_config or {} - self.search_cfg = search_cfg or {} + def __init__(self, model_chooser: ModelChooser, app_config: ApplicationConfig, search_cfg: SearchConfig, + es_client: EsClient = None): + self.app_config = app_config + self.search_cfg = search_cfg super().__init__(model_chooser, search_cfg=self.search_cfg) - self.es_client = es_client or EsClient(app_config=self.app_config, search_cfg=self.search_cfg) + self.es_client = es_client or EsClient(app_config=self.app_config) self.suggest_threshold = 0.4 - self.rp_suggest_index_template = "rp_suggestions_info" - self.rp_suggest_metrics_index_template = "rp_suggestions_info_metrics" + self.rp_suggest_index_template = 'rp_suggestions_info' + self.rp_suggest_metrics_index_template = 'rp_suggestions_info_metrics' self.namespace_finder = NamespaceFinder(app_config) - - def get_config_for_boosting_suggests(self, analyzerConfig): + weights_folder = self.search_cfg.SimilarityWeightsFolder + if not weights_folder: + raise ValueError('SimilarityWeightsFolder is not set') + if weights_folder: + self.similarity_model = WeightedSimilarityCalculator(object_saving.create_filesystem(weights_folder)) + self.similarity_model.load_model() + + def get_config_for_boosting_suggests(self, analyzer_config: AnalyzerConf) -> dict: return { - "max_query_terms": self.search_cfg["MaxQueryTerms"], + "max_query_terms": self.search_cfg.MaxQueryTerms, "min_should_match": 0.4, - "min_word_length": self.search_cfg["MinWordLength"], + "min_word_length": self.search_cfg.MinWordLength, "filter_min_should_match": [], - "filter_min_should_match_any": self.choose_fields_to_filter_suggests( - analyzerConfig.numberOfLogLines), - "number_of_log_lines": analyzerConfig.numberOfLogLines, + "filter_min_should_match_any": self.choose_fields_to_filter_suggests(analyzer_config.numberOfLogLines), + "number_of_log_lines": analyzer_config.numberOfLogLines, "filter_by_test_case_hash": True, - "boosting_model": self.search_cfg["SuggestBoostModelFolder"], - "time_weight_decay": self.search_cfg["TimeWeightDecay"] + "boosting_model": self.search_cfg.SuggestBoostModelFolder, + "time_weight_decay": self.search_cfg.TimeWeightDecay } - def choose_fields_to_filter_suggests(self, log_lines_num): + def choose_fields_to_filter_suggests(self, log_lines_num: int) -> list[str]: if log_lines_num == -1: return [ "detected_message_extended", @@ -78,12 +90,12 @@ def choose_fields_to_filter_suggests(self, log_lines_num): return ["message_extended", "message_without_params_extended", "message_without_params_and_brackets"] - def build_suggest_query(self, test_item_info, log, size=10, - message_field="message", det_mes_field="detected_message", - stacktrace_field="stacktrace"): + def build_suggest_query(self, test_item_info: TestItemInfo, log: dict, size: int = 10, + message_field: str = "message", det_mes_field: str = "detected_message", + stacktrace_field: str = "stacktrace"): min_should_match = "{}%".format(test_item_info.analyzerConfig.minShouldMatch) \ if test_item_info.analyzerConfig.minShouldMatch > 0 \ - else self.search_cfg["MinShouldMatch"] + else self.search_cfg.MinShouldMatch log_lines = test_item_info.analyzerConfig.numberOfLogLines query = self.build_common_query(log, size=size, filter_no_defect=False) @@ -92,7 +104,7 @@ def build_suggest_query(self, test_item_info, log, size=10, if log["_source"]["message"].strip(): query["query"]["bool"]["filter"].append({"term": {"is_merged": False}}) if log_lines == -1: - must = self.create_path(query, ('query', 'bool', 'must'), []) + must = utils.create_path(query, ('query', 'bool', 'must'), []) must.append(self.build_more_like_this_query("60%", log["_source"][det_mes_field], field_name=det_mes_field, @@ -105,7 +117,7 @@ def build_suggest_query(self, test_item_info, log, size=10, else: query["query"]["bool"]["must_not"].append({"wildcard": {stacktrace_field: "*"}}) else: - must = self.create_path(query, ('query', 'bool', 'must'), []) + must = utils.create_path(query, ('query', 'bool', 'must'), []) must.append(self.build_more_like_this_query("60%", log["_source"][message_field], field_name=message_field, @@ -123,13 +135,13 @@ def build_suggest_query(self, test_item_info, log, size=10, else: query["query"]["bool"]["filter"].append({"term": {"is_merged": True}}) query["query"]["bool"]["must_not"].append({"wildcard": {"message": "*"}}) - must = self.create_path(query, ('query', 'bool', 'must'), []) + must = utils.create_path(query, ('query', 'bool', 'must'), []) must.append(self.build_more_like_this_query(min_should_match, log["_source"]["merged_small_logs"], field_name="merged_small_logs", boost=2.0)) - utils.append_potential_status_codes(query, log, max_query_terms=self.search_cfg["MaxQueryTerms"]) + utils.append_potential_status_codes(query, log, max_query_terms=self.search_cfg.MaxQueryTerms) for field, boost_score in SPECIAL_FIELDS_BOOST_SCORES: if log["_source"][field].strip(): @@ -142,10 +154,9 @@ def build_suggest_query(self, test_item_info, log, size=10, return self.add_query_with_start_time_decay(query, log["_source"]["start_time"]) - def query_es_for_suggested_items(self, test_item_info, logs): + def query_es_for_suggested_items(self, test_item_info: TestItemInfo, logs: list[dict]): full_results = [] - index_name = text_processing.unite_project_name( - str(test_item_info.project), self.app_config["esProjectIndexPrefix"]) + index_name = text_processing.unite_project_name(test_item_info.project, self.app_config.esProjectIndexPrefix) for log in logs: message = log["_source"]["message"].strip() @@ -181,12 +192,12 @@ def query_es_for_suggested_items(self, test_item_info, logs): def deduplicate_results(self, gathered_results, scores_by_test_items, test_item_ids): _similarity_calculator = similarity_calculator.SimilarityCalculator( { - "max_query_terms": self.search_cfg["MaxQueryTerms"], - "min_word_length": self.search_cfg["MinWordLength"], + "max_query_terms": self.search_cfg.MaxQueryTerms, + "min_word_length": self.search_cfg.MinWordLength, "min_should_match": "98%", "number_of_log_lines": -1 }, - weighted_similarity_calculator=self.weighted_log_similarity_calculator) + similarity_model=self.similarity_model) all_pairs_to_check = [] for i in range(len(gathered_results)): for j in range(i + 1, len(gathered_results)): @@ -237,9 +248,7 @@ def sort_results(self, scores_by_test_items, test_item_ids, predicted_labels_pro gathered_results = sorted(gathered_results, key=lambda x: (x[1], x[2]), reverse=True) return self.deduplicate_results(gathered_results, scores_by_test_items, test_item_ids) - def prepare_not_found_object_info( - self, test_item_info, - processed_time, model_feature_names, model_info): + def prepare_not_found_object_info(self, test_item_info, processed_time, model_feature_names: str, model_info): return { # reciprocalRank is not filled for not found results not to count in the metrics dashboard "project": test_item_info.project, "testItem": test_item_info.testItemId, @@ -261,7 +270,7 @@ def prepare_not_found_object_info( "processedTime": processed_time, "notFoundResults": 100, "savedDate": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "module_version": [self.app_config["appVersion"]], + "module_version": [self.app_config.appVersion], "methodName": "suggestion", "clusterId": test_item_info.clusterId } @@ -295,7 +304,7 @@ def get_query_for_logs_by_test_item(self, test_item_id): } } - def query_logs_for_cluster(self, test_item_info, index_name): + def query_logs_for_cluster(self, test_item_info: TestItemInfo, index_name: str) -> tuple[list[dict], int]: test_item_id = None test_items = self.es_client.es_client.search( index_name, body=self.get_query_for_test_item_in_cluster(test_item_info)) @@ -305,9 +314,8 @@ def query_logs_for_cluster(self, test_item_info, index_name): if test_item_id is None: return [], 0 logs = [] - for log in elasticsearch.helpers.scan(self.es_client.es_client, - query=self.get_query_for_logs_by_test_item(test_item_id), - index=index_name): + for log in elasticsearch.helpers.scan( + self.es_client.es_client, query=self.get_query_for_logs_by_test_item(test_item_id), index=index_name): # clean test item info not to boost by it log["_source"]["test_item"] = 0 log["_source"]["test_case_hash"] = 0 @@ -316,24 +324,22 @@ def query_logs_for_cluster(self, test_item_info, index_name): logs.append(log) return logs, test_item_id - def prepare_logs_for_suggestions(self, test_item_info, index_name): + def prepare_logs_for_suggestions(self, test_item_info: TestItemInfo, index_name: str) -> tuple[list[dict], int]: test_item_id_for_suggest = test_item_info.testItemId if test_item_info.clusterId != 0: prepared_logs, test_item_id_for_suggest = self.query_logs_for_cluster(test_item_info, index_name) else: unique_logs = text_processing.leave_only_unique_logs(test_item_info.logs) - prepared_logs = [self.log_preparation._prepare_log_for_suggests(test_item_info, log, index_name) + prepared_logs = [LogRequests._prepare_log_for_suggests(test_item_info, log, index_name) for log in unique_logs if log.logLevel >= utils.ERROR_LOGGING_LEVEL] logs, _ = self.log_merger.decompose_logs_merged_and_without_duplicates(prepared_logs) return logs, test_item_id_for_suggest - @utils.ignore_warnings - def suggest_items(self, test_item_info): + def suggest_items(self, test_item_info: TestItemInfo): logger.info(f'Started suggesting for test item with id: {test_item_info.testItemId}') logger.debug(f'Started suggesting items by request: {test_item_info.json()}') logger.info("ES Url %s", text_processing.remove_credentials_from_url(self.es_client.host)) - index_name = text_processing.unite_project_name( - str(test_item_info.project), self.app_config["esProjectIndexPrefix"]) + index_name = text_processing.unite_project_name(test_item_info.project, self.app_config.esProjectIndexPrefix) if not self.es_client.index_exists(index_name): logger.info("Project %s doesn't exist", index_name) logger.info("Finished suggesting for test item with 0 results.") @@ -355,29 +361,27 @@ def suggest_items(self, test_item_info): logger.debug(f'Items for suggestions by FTS (KNN): {json.dumps(searched_res)}') boosting_config = self.get_config_for_boosting_suggests(test_item_info.analyzerConfig) - boosting_config["chosen_namespaces"] = self.namespace_finder.get_chosen_namespaces( - test_item_info.project) - _suggest_decision_maker_to_use = self.model_chooser.choose_model( - test_item_info.project, "suggestion_model/", - custom_model_prob=self.search_cfg["ProbabilityForCustomModelSuggestions"]) - features_dict_objects = _suggest_decision_maker_to_use.features_dict_with_saved_objects + boosting_config["chosen_namespaces"] = self.namespace_finder.get_chosen_namespaces(test_item_info.project) + # noinspection PyTypeChecker + _suggest_decision_maker_to_use: BoostingDecisionMaker = self.model_chooser.choose_model( + test_item_info.project, ModelType.suggestion, + custom_model_prob=self.search_cfg.ProbabilityForCustomModelSuggestions) _boosting_data_gatherer = SuggestBoostingFeaturizer( searched_res, boosting_config, - feature_ids=_suggest_decision_maker_to_use.get_feature_ids(), - weighted_log_similarity_calculator=self.weighted_log_similarity_calculator, - features_dict_with_saved_objects=features_dict_objects) - _boosting_data_gatherer.set_defect_type_model(self.model_chooser.choose_model( - test_item_info.project, "defect_type_model/")) + feature_ids=_suggest_decision_maker_to_use.feature_ids, + weighted_log_similarity_calculator=self.similarity_model) + # noinspection PyTypeChecker + _boosting_data_gatherer.set_defect_type_model( + self.model_chooser.choose_model(test_item_info.project, ModelType.defect_type)) feature_data, test_item_ids = _boosting_data_gatherer.gather_features_info() - scores_by_test_items = _boosting_data_gatherer.scores_by_issue_type - model_info_tags = (_boosting_data_gatherer.get_used_model_info() + - _suggest_decision_maker_to_use.get_model_info()) - feature_names = ";".join(_suggest_decision_maker_to_use.get_feature_names()) + scores_by_test_items = _boosting_data_gatherer.find_most_relevant_by_type() + model_info_tags = (_boosting_data_gatherer.get_used_model_info() + + _suggest_decision_maker_to_use.get_model_info()) + feature_names = ";".join([str(i) for i in _suggest_decision_maker_to_use.feature_ids]) if feature_data: - predicted_labels, predicted_labels_probability = _suggest_decision_maker_to_use.predict( - feature_data) + predicted_labels, predicted_labels_probability = _suggest_decision_maker_to_use.predict(feature_data) sorted_results = self.sort_results( scores_by_test_items, test_item_ids, predicted_labels_probability) @@ -385,11 +389,11 @@ def suggest_items(self, test_item_info): for idx, prob, _ in sorted_results: test_item_id = test_item_ids[idx] issue_type = scores_by_test_items[test_item_id]["mrHit"]["_source"]["issue_type"] - logger.debug("Test item id %d with issue type %s has probability %.2f", + logger.debug("Test item id %s with issue type %s has probability %.2f", test_item_id, issue_type, prob) processed_time = time() - t_start global_idx = 0 - for idx, prob, _ in sorted_results[:self.search_cfg["MaxSuggestionsNumber"]]: + for idx, prob, _ in sorted_results[:self.search_cfg.MaxSuggestionsNumber]: if prob >= self.suggest_threshold: test_item_id = test_item_ids[idx] issue_type = scores_by_test_items[test_item_id]["mrHit"]["_source"]["issue_type"] @@ -414,8 +418,7 @@ def suggest_items(self, test_item_info): esScore=round(scores_by_test_items[test_item_id]["mrHit"]["_score"], 2), esPosition=scores_by_test_items[test_item_id]["mrHit"]["es_pos"], modelFeatureNames=feature_names, - modelFeatureValues=";".join( - [str(feature) for feature in feature_data[idx]]), + modelFeatureValues=";".join([str(feature) for feature in feature_data[idx]]), modelInfo=";".join(model_info_tags), resultPosition=global_idx, usedLogLines=test_item_info.analyzerConfig.numberOfLogLines, @@ -442,7 +445,7 @@ def suggest_items(self, test_item_info): "gather_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "number_of_log_lines": test_item_info.analyzerConfig.numberOfLogLines, "model_info": model_info_tags, - "module_version": [self.app_config["appVersion"]], + "module_version": [self.app_config.appVersion], "min_should_match": self.find_min_should_match_threshold( test_item_info.analyzerConfig), "errors": errors_found, @@ -452,22 +455,19 @@ def suggest_items(self, test_item_info): self.es_client._bulk_index([{ "_index": self.rp_suggest_metrics_index_template, "_source": self.prepare_not_found_object_info( - test_item_info, time() - t_start, - feature_names, - model_info_tags) + test_item_info, time() - t_start, feature_names, model_info_tags) }]) try: - if "amqpUrl" in self.app_config and self.app_config["amqpUrl"].strip(): - AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue( - self.app_config["exchangeName"], "stats_info", json.dumps(results_to_share)) + if self.app_config.amqpUrl: + amqp_client = AmqpClient(self.app_config.amqpUrl) + amqp_client.send_to_inner_queue( + self.app_config.exchangeName, "stats_info", json.dumps(results_to_share)) if results: - for model_type in ["suggestion", "auto_analysis"]: - AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue( - self.app_config["exchangeName"], "train_models", json.dumps({ - "model_type": model_type, - "project_id": test_item_info.project, - GATHERED_METRIC_TOTAL: len(results) - })) + for model_type in [ModelType.suggestion, ModelType.auto_analysis]: + AmqpClient(self.app_config.amqpUrl).send_to_inner_queue( + self.app_config.exchangeName, 'train_models', + TrainInfo(model_type=model_type, project=test_item_info.project, + gathered_metric_total=len(results)).json()) except Exception as exc: logger.exception(exc) logger.debug("Stats info %s", results_to_share) diff --git a/app/utils/__init__.py b/app/utils/__init__.py index e69de29b..f86b6d2f 100644 --- a/app/utils/__init__.py +++ b/app/utils/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/app/utils/defaultdict.py b/app/utils/defaultdict.py new file mode 100644 index 00000000..49bd6b10 --- /dev/null +++ b/app/utils/defaultdict.py @@ -0,0 +1,48 @@ +# Copyright 2024 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict as _defaultdict +from typing import TypeVar, Callable, Optional + +_KT = TypeVar("_KT") +_RT = TypeVar("_RT") + + +class DefaultDict(_defaultdict): + _checked_keys: set[_KT] + _default_factory: Optional[Callable[['DefaultDict', _KT], _RT]] + + def __init__(self, default_factory: Optional[Callable[['DefaultDict', _KT], _RT]] = None, **kwargs): + super().__init__(**kwargs) + self._default_factory = default_factory + self._checked_keys = set() + + def __missing__(self, key: _KT) -> _RT: + if self._default_factory is None: + raise KeyError(key) + self[key] = value = self._default_factory(self, key) + return value + + def __contains__(self, item): + if item in self.keys(): + return True + if item in self._checked_keys: + return False + self._checked_keys.add(item) + try: + # noinspection PyStatementEffect + self[item] + return True + except KeyError: + return False diff --git a/app/utils/log_preparation.py b/app/utils/log_preparation.py new file mode 100644 index 00000000..09ced684 --- /dev/null +++ b/app/utils/log_preparation.py @@ -0,0 +1,70 @@ +# Copyright 2024 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from app.utils import text_processing + + +def basic_prepare(message: str) -> str: + cleaned_message = message.strip() + # Sometimes log level goes first + cleaned_message = text_processing.remove_starting_log_level(cleaned_message) + cleaned_message = text_processing.remove_starting_datetime(cleaned_message) + cleaned_message = text_processing.remove_starting_log_level(cleaned_message) + cleaned_message = text_processing.remove_starting_thread_id(cleaned_message) + cleaned_message = text_processing.remove_starting_thread_name(cleaned_message) + # Sometimes log level goes after thread name + cleaned_message = text_processing.remove_starting_log_level(cleaned_message) + + # This should go right after starting garbage clean-up + cleaned_message = text_processing.unify_line_endings(cleaned_message) + + cleaned_message = text_processing.remove_markdown_mode(cleaned_message) + cleaned_message = text_processing.replace_code_separators(cleaned_message) + cleaned_message = text_processing.remove_webdriver_auxiliary_info(cleaned_message) + cleaned_message = text_processing.replace_tabs_for_newlines(cleaned_message) + cleaned_message = text_processing.fix_big_encoded_urls(cleaned_message) + cleaned_message = text_processing.remove_generated_parts(cleaned_message) + cleaned_message = text_processing.remove_guid_uuids_from_text(cleaned_message) + cleaned_message = text_processing.remove_access_tokens(cleaned_message) + cleaned_message = text_processing.clean_html(cleaned_message) + cleaned_message = text_processing.delete_empty_lines(cleaned_message) + cleaned_message = text_processing.leave_only_unique_lines(cleaned_message) + return cleaned_message + + +def prepare_message(clean_message: str, number_of_lines: int, test_and_methods: set[str]) -> str: + message = text_processing.first_lines(clean_message, number_of_lines) + message = text_processing.replace_text_pieces(message, test_and_methods) + message = text_processing.delete_empty_lines(text_processing.remove_numbers(message)) + return message + + +def prepare_message_no_params(message: str) -> str: + message_without_params = text_processing.remove_numbers(message) + message_without_params = text_processing.clean_from_params(message_without_params) + return message_without_params + + +def prepare_exception_message_and_stacktrace(clean_message: str) -> tuple[str, str]: + exception_message, stacktrace = text_processing.detect_log_description_and_stacktrace(clean_message) + stacktrace = text_processing.clean_from_brackets(stacktrace) + stacktrace = text_processing.remove_numbers(stacktrace) + return exception_message, stacktrace + + +def prepare_exception_message_no_params(exception_message: str) -> str: + result = text_processing.remove_numbers(exception_message) + result = text_processing.clean_from_params(result) + result = text_processing.unify_spaces(result) + return result diff --git a/app/utils/text_processing.py b/app/utils/text_processing.py index 515f912b..2c7c446b 100644 --- a/app/utils/text_processing.py +++ b/app/utils/text_processing.py @@ -12,51 +12,119 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import re import string import urllib.parse -from typing import List +from typing import Iterable from urllib.parse import urlparse import nltk -from dateutil.parser import parse + +from app.commons.model.launch_objects import Log + +try: + from app.commons import logging +except ImportError: + import logging logger = logging.getLogger("analyzerApp.utils.textProcessing") -STOPWORDS = set(nltk.corpus.stopwords.words("english")) +STOPWORDS = nltk.corpus.stopwords.words("english") +STOPWORDS_ALL = set(STOPWORDS) FILE_EXTENSIONS = ["java", "php", "cpp", "cs", "c", "h", "js", "swift", "rb", "py", "scala"] +def create_punctuation_map(split_urls) -> dict[str, str]: + translate_map = {} + for punct in string.punctuation + "<>{}[];=()'\"": + if punct != '.' and (split_urls or punct not in ['/', '\\']): + translate_map[punct] = ' ' + return translate_map + + +PUNCTUATION_MAP_NO_SPLIT_URLS = create_punctuation_map(False) +PUNCTUATION_MAP_SPLIT_URLS = create_punctuation_map(True) + + +def replace_patterns(text: str, patterns: Iterable[tuple[re.Pattern, str]]) -> str: + """Removes starting patterns from the text.""" + result = text + for p, repl in patterns: + result = p.sub(repl, result) + return result + + +def remove_patterns(text: str, patterns: Iterable[re.Pattern]) -> str: + """Removes starting patterns from the text.""" + return replace_patterns(text, map(lambda p: (p, ''), patterns)) + + +EU_DATE: str = r'\d+-\d+-\d+' +EU_TIME: str = r'\d+:\d+:\d+(?:[.,]\d+)?' +US_DATE: str = r'\d+/\d+/\d+' +US_TIME: str = EU_TIME + +EU_DATETIME: str = fr'{EU_DATE}\s+{EU_TIME}' +US_DATETIME: str = fr'{US_DATE}\s+{US_TIME}' + +DELIM: str = r'(?:\s*-\s*)|(?:\s*\|\s*)' + +DATETIME_PATTERNS: Iterable[re.Pattern] = [ + re.compile(fr'^{EU_DATETIME}(?:{DELIM})?\s*'), + re.compile(fr'^{US_DATETIME}(?:{DELIM})?\s*'), + re.compile(fr'^{EU_TIME}(?:{DELIM})?\s*'), + re.compile(fr'^\[{EU_TIME}](?:{DELIM})?\s*'), + re.compile(fr'^\[{EU_DATETIME}](?:{DELIM})?\s*') +] + + def remove_starting_datetime(text: str) -> str: - """Removes datetime at the beginning of the text""" - log_date = "" - idx_text_start = 0 - tokenized_text = text.split(" ") - for idx, str_part in enumerate(tokenized_text): - try: - parsed_info = re.sub(r"[\[\]{},;!#\"$%&'()*<=>?@^_`|~]", "", log_date + " " + str_part) - parse(parsed_info) - log_date = parsed_info - log_date = log_date.strip() - except Exception as e: # noqa - idx_text_start = idx - break - log_date = log_date.replace("'", "").replace("\"", "") - found_regex_log_date = re.search(r"\d{1,7}", log_date) - if found_regex_log_date and found_regex_log_date.group(0) == log_date: - idx_text_start = 0 - - return " ".join(tokenized_text[idx_text_start:]) - - -def filter_empty_lines(log_lines: List[str]) -> List[str]: + """Removes datetime at the beginning of the text.""" + return remove_patterns(text, DATETIME_PATTERNS) + + +LOG_LEVEL: str = r'(?:TRACE|DEBUG|INFO|WARN|ERROR|FATAL)\s?' +LOG_LEVEL_PATTERNS: Iterable[re.Pattern] = [ + re.compile(fr'^{LOG_LEVEL}(?:{DELIM})?\s+'), + re.compile(fr'^\[{LOG_LEVEL}](?:{DELIM})?\s+'), + re.compile(fr'^\({LOG_LEVEL}\)(?:{DELIM})?\s+'), +] + + +def remove_starting_log_level(text: str) -> str: + """ Removes log level at the beginning of the text.""" + return remove_patterns(text, LOG_LEVEL_PATTERNS) + + +THREAD_ID_PATTERN: str = r'\d+\s+-+\s*' +THREAD_ID_PATTERNS: Iterable[re.Pattern] = [ + re.compile(fr'^{THREAD_ID_PATTERN}(?:{DELIM})?\s+'), +] + + +def remove_starting_thread_id(text: str) -> str: + """Removes thread id at the beginning of the text.""" + return remove_patterns(text, THREAD_ID_PATTERNS) + + +THREAD_NAME_PATTERN: str = r'\[[^\]]*]' +THREAD_NAME_PATTERNS: Iterable[re.Pattern] = [ + re.compile(fr'^{THREAD_NAME_PATTERN}(?:{DELIM})?\s+') +] + + +def remove_starting_thread_name(text: str) -> str: + """Removes thread name at the beginning of the text.""" + return remove_patterns(text, THREAD_NAME_PATTERNS) + + +def filter_empty_lines(log_lines: list[str]) -> list[str]: return [line for line in log_lines if line.strip()] def delete_empty_lines(log: str) -> str: """Delete empty lines""" - return "\n".join(filter_empty_lines(log.split("\n"))) + return '\n'.join(filter_empty_lines(log.split('\n'))) def calculate_line_number(text): @@ -83,13 +151,13 @@ def is_starting_message_pattern(text): return False -def get_found_exceptions(text, to_lower=False): +def get_found_exceptions(text: str, to_lower: bool = False) -> str: """Extract exception and errors from logs""" unique_exceptions = set() found_exceptions = [] for word in split_words(text, to_lower=to_lower): for key_word in ["error", "exception", "failure"]: - if re.search(r"[^\s]{3,}%s(\s|$)" % key_word, word.lower()) is not None: + if re.search(r"\S{3,}%s(\s|$)" % key_word, word.lower()) is not None: if word not in unique_exceptions: found_exceptions.append(word) unique_exceptions.add(word) @@ -125,36 +193,34 @@ def detect_log_parts_python(message, default_log_number=1): return "\n".join(detected_message_lines), "\n".join(stacktrace_lines) -def is_line_from_stacktrace(text): - """Deletes line numbers in the stacktrace""" +def is_line_from_stacktrace(text: str) -> bool: + """Detects if the line is a stacktrace part""" if is_starting_message_pattern(text): return False - text = re.sub(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d+)", "", text) - res = re.sub(r"(?<=:)\d+(?=\)?\]?(\n|\r\n|$))", " ", text) + res = re.sub(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d+)", "", text) + res = re.sub(r"(?<=:)\d+(?=\)?]?(\n|$))", " ", res) if res != text: return True - res = re.sub(r"((?<=line )|(?<=line))\s*\d+\s*((?=, in)|(?=,in)|(?=\n)|(?=\r\n)|(?=$))", + res = re.sub(r"((?<=line )|(?<=line))\s*\d+\s*((?=, in)|(?=,in)|(?=\n)|(?=$))", " ", res, flags=re.I) if res != text: return True res = re.sub("|".join([r"\.%s(?!\.)\b" % ext for ext in FILE_EXTENSIONS]), " ", res, flags=re.I) if res != text: return True - result = re.search(r"^\s*at\s+.*\(.*?\)[\s]*$", res) + result = re.search(r"^\s*at\s+.*\(.*?\)\s*$", res) if result and result.group(0) == res: return True else: - result = re.search(r"^\s*\w+([\.\/]\s*\w+)+\s*\(.*?\)[\s]*$", res) + result = re.search(r"^\s*\w+([./]\s*\w+)+\s*\(.*?\)\s*$", res) if result and result.group(0) == res: return True return False -def detect_log_description_and_stacktrace(message): +def detect_log_description_and_stacktrace(message: str) -> tuple[str, str]: """Split a log into a log message and stacktrace""" - message = remove_starting_datetime(message) - message = delete_empty_lines(message) if calculate_line_number(message) > 2: if is_python_log(message): return detect_log_parts_python(message) @@ -197,10 +263,23 @@ def detect_log_description_and_stacktrace_light(message): return message, "" -def clean_from_brackets(text): - for pattern in [r"\[[\s\S]+\]", r"\{[\s\S]+?\}", r"\([\s\S]+?\)"]: - text = re.sub(pattern, "", text) - return text +SQR_BRCKTS = r'\[[^]]*]' +RND_BRCKTS = r'\([^)]*\)' +CRL_BRCKTS = r'\{[^}]*}' +BRCKTS_TXT = re.compile(fr'{SQR_BRCKTS}|{RND_BRCKTS}|{CRL_BRCKTS}') + + +def clean_from_brackets(text: str) -> str: + """Removes all brackets and text inside them from the given text.""" + return BRCKTS_TXT.sub('', text) + + +SPECIAL_CHARACTER_PATTERN = re.compile(r'[/?&=#@:.*!$%^+~\\|,;<>\[\]{}()`"\'_]') + + +def clean_special_chars(text: str) -> str: + """Removes all special characters in the given text.""" + return SPECIAL_CHARACTER_PATTERN.sub(' ', text) def get_potential_status_codes(text): @@ -208,12 +287,14 @@ def get_potential_status_codes(text): potential_codes_list = [] for line in text.split("\n"): line = clean_from_brackets(line) - patterns_to_check = [r"\bcode[^\w\d\.]+(\d+)[^\d]*(\d*)|\bcode[^\w\d\.]+(\d+?)$", - r"\w+_code[^\w\d\.]+(\d+)[^\d]*(\d*)|\w+_code[^\w\d\.]+(\d+?)$", - r"\bstatus[^\w\d\.]+(\d+)[^\d]*(\d*)|\bstatus[^\w\d\.]+(\d+?)$", - r"\w+_status[^\w\d\.]+(\d+)[^\d]*(\d*)|\w+_status[^\w\d\.]+(\d+?)$"] + patterns_to_check = [ + re.compile(r"\bcode[^\w.]+(\d+)\D*(\d*)|\bcode[^\w.]+(\d+?)$", flags=re.IGNORECASE), + re.compile(r"\w+_code[^\w.]+(\d+)\D*(\d*)|\w+_code[^\w.]+(\d+?)$", flags=re.IGNORECASE), + re.compile(r"\bstatus[^\w.]+(\d+)\D*(\d*)|\bstatus[^\w.]+(\d+?)$", flags=re.IGNORECASE), + re.compile(r"\w+_status[^\w.]+(\d+)\D*(\d*)|\w+_status[^\w.]+(\d+?)$", flags=re.IGNORECASE) + ] for pattern in patterns_to_check: - result = re.search(pattern, line, flags=re.IGNORECASE) + result = pattern.search(line) for i in range(1, 4): try: found_code = result.group(i) @@ -226,18 +307,25 @@ def get_potential_status_codes(text): return potential_codes_list -def sanitize_text(text): +NUMBER_PATTERN = re.compile(r'\b\d+\b') +NUMBER_PART_PATTERN = re.compile(r'\d+') +NUMBER_TAG = "SPECIALNUMBER" + + +def remove_numbers(text: str) -> str: """Sanitize text by deleting all numbers""" - return re.sub(r"\d+", "", text) + result = NUMBER_PATTERN.sub(NUMBER_TAG, text) + result = NUMBER_PART_PATTERN.sub('', result) + return result -def first_lines(log_str, n_lines): - """Take n first lines""" - return "\n".join((log_str.split("\n")[:n_lines])) if n_lines >= 0 else log_str +def first_lines(log_str: str, n_lines: int) -> str: + """Take n first lines.""" + return '\n'.join((log_str.split('\n')[:n_lines])) if n_lines >= 0 else log_str -def prepare_message_for_clustering(message, number_of_log_lines, clean_numbers, - leave_log_structure=False): +def prepare_message_for_clustering(message: str, number_of_log_lines: int, clean_numbers: bool, + leave_log_structure: bool = False) -> str: potential_status_codes = get_potential_status_codes(message) message = remove_starting_datetime(message) if clean_numbers: @@ -245,10 +333,10 @@ def prepare_message_for_clustering(message, number_of_log_lines, clean_numbers, for idx, code in enumerate(potential_status_codes): replaced_code = "#&#" * (idx + 1) status_codes_replaced[replaced_code] = code - message = re.sub(r"\b%s\b" % code, replaced_code, message) - message = sanitize_text(message) + message = re.sub(fr"\b{code}\b", replaced_code, message) + message = remove_numbers(message) for code_replaced in sorted(status_codes_replaced.keys(), reverse=True): - message = re.sub(r"%s" % code_replaced, str(code), message) + message = re.sub(code_replaced, str(status_codes_replaced[code_replaced]), message) message = delete_empty_lines(message) message = first_lines(message, number_of_log_lines) if leave_log_structure: @@ -259,14 +347,16 @@ def prepare_message_for_clustering(message, number_of_log_lines, clean_numbers, return " ".join(words) +REGEX_STYLE_TAG = re.compile('[\\s\\S]*?') +REGEX_SCRIPT_TAG = re.compile('[\\s\\S]*?') +REGEX_HTML_TAGS = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});') + + def clean_text_from_html_tags(message): """Removes style and script tags together with inner text and removes html tags""" - regex_style_tag = re.compile('[\\s\\S]*?') - message = re.sub(regex_style_tag, " ", message) - regex_script_tag = re.compile('[\\s\\S]*?') - message = re.sub(regex_script_tag, " ", message) - regex_html_tags = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});') - message = re.sub(regex_html_tags, " ", message) + message = re.sub(REGEX_STYLE_TAG, " ", message) + message = re.sub(REGEX_SCRIPT_TAG, " ", message) + message = re.sub(REGEX_HTML_TAGS, " ", message) return message @@ -297,32 +387,41 @@ def clean_html(message): return delete_empty_lines("\n".join(all_lines)) -def split_words(text, min_word_length=0, only_unique=True, split_urls=True, to_lower=True): +def split_words(text: str, min_word_length: int = 0, only_unique: bool = True, split_urls: bool = True, + to_lower: bool = True) -> list[str]: + if not text: + return [] all_unique_words = set() all_words = [] - translate_map = {} - for punct in string.punctuation + "<>{}[];=()'\"": - if punct != "." and (split_urls or punct not in ["/", "\\"]): - translate_map[punct] = " " - text = text.translate(text.maketrans(translate_map)).strip().strip(".") - for word_part in text.split(): - word_part = word_part.strip().strip(".") - for w in word_part.split(): - if to_lower: - w = w.lower() - if w != "" and len(w) >= min_word_length: - if w in STOPWORDS: + + if split_urls: + result = text.translate(text.maketrans(PUNCTUATION_MAP_SPLIT_URLS)) + else: + result = text.translate(text.maketrans(PUNCTUATION_MAP_NO_SPLIT_URLS)) + result = result.strip().strip('.') + if to_lower: + result = result.lower() + for w in result.split(): + w = w.strip().strip('.') + if w != "" and len(w) >= min_word_length: + if w in STOPWORDS_ALL: + continue + if only_unique: + if w in all_unique_words: continue - if not only_unique or w not in all_unique_words: - all_unique_words.add(w) - all_words.append(w) + all_unique_words.add(w) + all_words.append(w) return all_words -def find_only_numbers(detected_message_with_numbers): +def normalize_message(message: str) -> str: + return " ".join(sorted(split_words(message, to_lower=True))) + + +def find_only_numbers(detected_message_with_numbers: str) -> str: """Removes all non digit symbols and concatenates unique numbers""" - detected_message_only_numbers = re.sub(r"[^\d \._]", "", detected_message_with_numbers) - return " ".join(split_words(detected_message_only_numbers, only_unique=True)) + detected_message_only_numbers = re.sub(r'[^\d ._]', '', detected_message_with_numbers) + return " ".join(split_words(detected_message_only_numbers)) def enrich_text_with_method_and_classes(text): @@ -330,7 +429,7 @@ def enrich_text_with_method_and_classes(text): for line in text.split("\n"): new_line = line found_values = [] - for w in split_words(line, min_word_length=0, only_unique=True, split_urls=True, to_lower=False): + for w in split_words(line, split_urls=True, to_lower=False): if len(w.split(".")) > 2: last_word = w.split(".")[-1] if len(last_word) > 3: @@ -341,15 +440,15 @@ def enrich_text_with_method_and_classes(text): for i in [2, 1]: full_path = full_path + " " + ".".join(words[-i:]) full_path = full_path + " " - new_line = re.sub(r"\b(? str: + result = text.replace("-", " ").replace("_", " ") all_words = [] - words = split_words(text, to_lower=False, only_unique=False) + words = split_words(result, to_lower=False, only_unique=False) for w in words: if "." not in w: all_words.extend([s.strip() for s in re.split("([A-Z][^A-Z]+)", w) if s.strip()]) @@ -361,10 +460,10 @@ def preprocess_test_item_name(text): return " ".join(all_words) -def find_test_methods_in_text(text): +def find_test_methods_in_text(text: str) -> set[str]: test_methods = set() for m in re.findall( - r"([^ \(\)\/\\\\:]+(Test|Step)[s]*\.[^ \(\)\/\\\\:]+)|([^ \(\)\/\\\\:]+\.spec\.js)", text): + r"([^ ()/\\:]+(Test|Step)s*\.[^ ()/\\:]+)|([^ ()/\\:]+\.spec\.js)", text): if m[0].strip(): test_methods.add(m[0].strip()) if m[2].strip(): @@ -377,7 +476,7 @@ def find_test_methods_in_text(text): return final_test_methods -def preprocess_found_test_methods(text): +def preprocess_found_test_methods(text: str) -> str: all_words = [] words = split_words(text, to_lower=False, only_unique=False) for w in words: @@ -390,56 +489,75 @@ def preprocess_found_test_methods(text): def compress(text): """compress sentence to consist of only unique words""" - return " ".join(split_words(text, only_unique=True)) + return " ".join(split_words(text)) def preprocess_words(text): all_words = [] - for w in re.finditer(r"[\w\._]+", text): - word_normalized = re.sub(r"^[\w]\.", "", w.group(0)) + for w in re.finditer(r"[\w._]+", text): + word_normalized = re.sub(r"^\w\.", "", w.group(0)) word = word_normalized.replace("_", "") if len(word) >= 3: all_words.append(word.lower()) split_parts = word_normalized.split("_") - split_words = [] + split_words_list = [] if len(split_parts) > 2: for idx in range(len(split_parts)): if idx != len(split_parts) - 1: - split_words.append("".join(split_parts[idx:idx + 2]).lower()) - all_words.extend(split_words) + split_words_list.append("".join(split_parts[idx:idx + 2]).lower()) + all_words.extend(split_words_list) if "." not in word_normalized: - split_words = [] + split_words_list = [] split_parts = [s.strip() for s in re.split("([A-Z][^A-Z]+)", word) if s.strip()] if len(split_parts) > 2: for idx in range(len(split_parts)): if idx != len(split_parts) - 1: if len("".join(split_parts[idx:idx + 2]).lower()) > 3: - split_words.append("".join(split_parts[idx:idx + 2]).lower()) - all_words.extend(split_words) + split_words_list.append("".join(split_parts[idx:idx + 2]).lower()) + all_words.extend(split_words_list) return all_words -def remove_guid_uids_from_text(text): - for pattern in [ - r"[0-9a-fA-F]{16,48}|[0-9a-fA-F]{10,48}\.\.\.", - r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}", - r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-\w+" - ]: - strings_to_replace = set() - for m in re.findall(pattern, text): - if not m.isdigit() and m.strip(): - strings_to_replace.add(m) - for _str in sorted(strings_to_replace, key=lambda x: (len(x), x), reverse=True): - text = text.replace(_str, " ") - return text +UUID = r'[0-9a-fA-F]{8}-?[0-9a-fA-F]{4}-?[0-9a-fA-F]{4}-?[0-9a-fA-F]{4}-?[0-9a-fA-F]{12}' +TRUNCATED_UUID = r'[0-9a-fA-F]{16,48}|[0-9a-fA-F]{10,48}\.\.\.' +NAMED_UUID = r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-(\w+)' +UUID_TAG = "SPECIALUUID" +GUID_UUID_PATTERNS: Iterable[tuple[re.Pattern, str]] = [ + (re.compile(fr'\b{UUID}\b'), UUID_TAG), + (re.compile(fr'\b{TRUNCATED_UUID}\b'), UUID_TAG), + (re.compile(fr'\b{NAMED_UUID}\b'), fr'{UUID_TAG} \1'), +] + + +def remove_guid_uuids_from_text(text: str) -> str: + return replace_patterns(text, GUID_UUID_PATTERNS) + +def replace_tabs_for_newlines(message: str) -> str: + return message.replace('\t', '\n') -def replace_tabs_for_newlines(message): - return message.replace("\t", "\n") +HORIZONTAL_WHITESPACE = (r' \t\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A' + r'\u202F\u205F\u3000') +LINE_ENDING_PATTERN = re.compile(fr'[{HORIZONTAL_WHITESPACE}]*\r?\n') -def unify_line_endings(message): - return message.replace("\r\n", "\n") + +def unify_line_endings(message: str) -> str: + return LINE_ENDING_PATTERN.sub(r'\n', message) + + +SPACE_PATTERN = re.compile(fr'[{HORIZONTAL_WHITESPACE}]+') +NEWLINE_SPACE_PATTERN = re.compile(fr'[{HORIZONTAL_WHITESPACE}]*\n[{HORIZONTAL_WHITESPACE}]*') +SPACE_REPLACEMENT = ' ' +NEWLINE_SPACE_REPLACEMENT = '\n' +SPACE_PATTERNS: Iterable[tuple[re.Pattern, str]] = [ + (SPACE_PATTERN, SPACE_REPLACEMENT), + (NEWLINE_SPACE_PATTERN, NEWLINE_SPACE_REPLACEMENT), +] + + +def unify_spaces(message: str) -> str: + return replace_patterns(message, SPACE_PATTERNS) def fix_big_encoded_urls(message): @@ -450,7 +568,7 @@ def fix_big_encoded_urls(message): except: # noqa pass if new_message != message: - return re.sub(r"[\(\)\{\}#%]", " ", new_message) + return re.sub(r"[(){}#%]", " ", new_message) return message @@ -472,44 +590,24 @@ def has_more_lines_pattern(line): return False +INNER_CLASS_EXTERNAL_PATTERN = re.compile(r'\b((?:[a-zA-Z0-9_-]+/|\\)+)([a-zA-Z0-9_-]+)\$([a-zA-Z0-9_-]+\.class)\b') +INNER_CLASS_INTERNAL_PATTERN = re.compile(r'(?<=[.$])([a-zA-Z0-9_-]+)\$(?=[a-zA-Z0-9_-]+[.$(@])') +GENERATED_LINE_PATTERN = re.compile((r'\s*(?:at\s*)?(?:[a-zA-Z0-9_-]+\.)+(?:[a-zA-Z0-9_-]+\$\$)+[0-9a-f]+\.' + r'(?:[a-zA-Z0-9_-]+\$|\.)*[a-zA-Z0-9_-]+\(\).*')) +CLASS_NAME_WITH_MEMORY_REFERENCE_PATTERN = re.compile(r'\b((?:[a-zA-Z0-9_-]+\.)+)([a-zA-Z0-9_-]+)@[0-9a-f]+\b') +TRUNCATED_STACKTRACE_PATTERN = re.compile(r'\s*\.\.\. \d+ more.*') +STACKTRACE_PATTERNS: Iterable[tuple[re.Pattern, str]] = [ + (GENERATED_LINE_PATTERN, r''), + (INNER_CLASS_EXTERNAL_PATTERN, r'\1\2.\3'), + (INNER_CLASS_INTERNAL_PATTERN, r'\1.'), + (CLASS_NAME_WITH_MEMORY_REFERENCE_PATTERN, r'\1\2'), + (TRUNCATED_STACKTRACE_PATTERN, r''), +] + + def remove_generated_parts(message): """Removes lines with '' keyword and removes parts, like $ab24b, @c321e from words""" - all_lines = [] - for line in message.split("\n"): - if "" in line.lower(): - continue - if has_stacktrace_keywords(line) or has_more_lines_pattern(line): - continue - for symbol in [r"\$", "@"]: - all_found_parts = set() - for m in re.finditer(r"%s+(.+?)\b" % symbol, line): - try: - found_part = m.group(1).strip().strip(symbol).strip() - if found_part != "": - all_found_parts.add((found_part, m.group(0).strip())) - except Exception as err: - logger.error(err) - sorted_parts = sorted(list(all_found_parts), key=lambda x: len(x[1]), reverse=True) - for found_part in sorted_parts: - whole_found_part = found_part[1].replace("$", r"\$") - found_part = found_part[0] - part_to_replace = "" - if re.search(r"\d", found_part): - part_with_numbers_in_the_end = re.search(r"[a-zA-z]{5,}\d+", found_part) - if part_with_numbers_in_the_end and part_with_numbers_in_the_end.group(0) == found_part: - part_to_replace = " %s" % found_part - else: - part_to_replace = "" - else: - part_to_replace = ".%s" % found_part - try: - line = re.sub(whole_found_part, part_to_replace, line) - except: # noqa - pass - - line = re.sub(r"\.+", ".", line) - all_lines.append(line) - return "\n".join(all_lines) + return replace_patterns(message, STACKTRACE_PATTERNS) def leave_only_unique_lines(message): @@ -525,39 +623,36 @@ def leave_only_unique_lines(message): return "\n".join(all_lines) -def leave_only_unique_logs(logs): +def leave_only_unique_logs(logs: list[Log]) -> list[Log]: unique_logs = set() all_logs = [] for log in logs: - if log.message.strip() not in unique_logs: + stripped_message = log.message.strip() + if stripped_message not in unique_logs: all_logs.append(log) - unique_logs.add(log.message.strip()) + unique_logs.add(stripped_message) return all_logs -def clean_colon_stacking(text): +def clean_colon_stacking(text: str) -> str: return text.replace(":", " : ") -def clean_from_params(text): - text = re.sub(r"(?<=[^\w])('.+?'|\".+?\")(?=[^\w]|$)|(?<=^)('.+?'|\".+?\")(?=[^\w]|$)", " ", text) - return re.sub(r" +", " ", text).strip() +def clean_from_params(text: str) -> str: + return clean_special_chars(text) -def clean_from_paths(text): - text = re.sub(r"(^|(?<=[^\w:\\\/]))(\w:)?([\w\d\.\-_]+)?([\\\/]+[\w\d\.\-_]+){2,}", " ", text) - return re.sub(r" +", " ", text).strip() +def clean_from_paths(text: str): + return re.sub(r"(^|(?<=[^\w:\\/]))(\w:)?([\w.\-_]+)?([\\/]+[\w.\-_]+){2,}", " ", text) -def clean_from_urls(text): - text = re.sub(r"(http|https|ftp):[^\s]+|\bwww\.[^\s]+", " ", text) - return re.sub(r" +", " ", text).strip() +URL_PATTERN = re.compile(r'[a-z]+:/+\S+', re.IGNORECASE) -def extract_urls(text): +def extract_urls(text: str) -> list[str]: all_unique = set() all_urls = [] - for param in re.findall(r"((http|https|ftp):[^\s]+|\bwww\.[^\s]+)", text): + for param in URL_PATTERN.findall(text): url = param[0].strip() if url not in all_unique: all_unique.add(url) @@ -568,7 +663,7 @@ def extract_urls(text): def extract_paths(text): all_unique = set() all_paths = [] - for param in re.findall(r"((^|(?<=[^\w:\\\/]))(\w:)?([\w\d\.\-_ ]+)?([\\\/]+[\w\d\.\-_ ]+){2,})", text): + for param in re.findall(r"((^|(?<=[^\w:\\/]))(\w:)?([\w.\-_ ]+)?([\\/]+[\w.\-_ ]+){2,})", text): path = param[0].strip() if path not in all_unique: all_unique.add(path) @@ -579,7 +674,7 @@ def extract_paths(text): def extract_message_params(text): all_unique = set() all_params = [] - for param in re.findall(r"(^|[^\w])('.+?'|\".+?\")([^\w]|$|\n)", text): + for param in re.findall(r"(^|\W)('.+?'|\".+?\")(\W|$|\n)", text): param = re.search(r"[^\'\"]+", param[1].strip()) if param is not None: param = param.group(0).strip() @@ -626,7 +721,7 @@ def enrich_found_exceptions(text): return " ".join(new_words) -def transform_string_feature_range_into_list(text): +def transform_string_feature_range_into_list(text: str) -> list[int]: """Converts features from string to list of ids""" values = [] for part in text.split(","): @@ -640,15 +735,93 @@ def transform_string_feature_range_into_list(text): return values -def unite_project_name(project_id: str, prefix: str) -> str: - return prefix + project_id +def unite_project_name(project_id: str | int, prefix: str) -> str: + return f'{prefix}{project_id}' -def replace_text_pieces(text, text_pieces): +def replace_text_pieces(text: str, text_pieces: Iterable[str]) -> str: + result = text for w in sorted(text_pieces, key=lambda x: len(x), reverse=True): - text = text.replace(w, " ") - return text + result = result.replace(w, " ") + return result -def prepare_es_min_should_match(min_should_match): +def prepare_es_min_should_match(min_should_match: float) -> str: return str(int(min_should_match * 100)) + "%" + + +ACCESS_OR_REFRESH_TOKEN_PATTERN = r'(?:access|refresh|biometric|jwt)_?token' +JSON_ACCESS_TOKEN = fr'("{ACCESS_OR_REFRESH_TOKEN_PATTERN}"\s*:\s*")[^"]+' +HTTP_ACCESS_TOKEN = (r'(Authorization\s*:\s*' + r'(?:Bearer|Basic|Digest|HOBA|Mutual|Negotiate|NTLM|VAPID|SCRAM|AWS4-HMAC-SHA256)) .*') +TOKEN_TAG = 'SPECIALTOKEN' +TOKEN_REPLACEMENT = fr'\1{TOKEN_TAG}' +ACCESS_TOKEN_PATTERNS: Iterable[tuple[re.Pattern, str]] = [ + (re.compile(JSON_ACCESS_TOKEN, re.RegexFlag.IGNORECASE), TOKEN_REPLACEMENT), + (re.compile(HTTP_ACCESS_TOKEN, re.RegexFlag.IGNORECASE), TOKEN_REPLACEMENT), +] + + +def remove_access_tokens(text: str) -> str: + return replace_patterns(text, ACCESS_TOKEN_PATTERNS) + + +MARKDOWN_MODE_PATTERN = re.compile(r'!!!MARKDOWN_MODE!!!\s*') +MARKDOWN_MODE_REPLACEMENT = '' +MARKDOWN_MODE_PATTERNS: Iterable[tuple[re.Pattern, str]] = [ + (MARKDOWN_MODE_PATTERN, MARKDOWN_MODE_REPLACEMENT) +] + + +def remove_markdown_mode(text: str) -> str: + return replace_patterns(text, MARKDOWN_MODE_PATTERNS) + + +MARKDOWN_CODE_SEPARATOR: str = r'`{3}' +FANCY_TEXT_SEPARATOR_START: str = r'-{3,}=+' +FANCY_TEXT_SEPARATOR_END: str = r'={3,}-+' +MARKDOWN_TEXT_SEPARATOR: str = r'-{3,}' +EQUALITY_TEXT_SEPARATOR: str = r'={3,}' +UNDERSCORE_TEXT_SEPARATOR: str = r'_{3,}' +TEXT_SEPARATORS_PATTERN: str = (fr'(?:{FANCY_TEXT_SEPARATOR_START}|{FANCY_TEXT_SEPARATOR_END}' + fr'|{MARKDOWN_CODE_SEPARATOR}|{MARKDOWN_TEXT_SEPARATOR}|{EQUALITY_TEXT_SEPARATOR}' + fr'|{UNDERSCORE_TEXT_SEPARATOR})') +CODE_SEPARATOR_REPLACEMENT: str = 'TEXTDELIMITER' +CODE_SEPARATOR_PATTERNS: Iterable[tuple[re.Pattern, str]] = [ + (re.compile(fr'\n{TEXT_SEPARATORS_PATTERN}\n'), fr' {CODE_SEPARATOR_REPLACEMENT}\n'), + (re.compile(fr'^{TEXT_SEPARATORS_PATTERN}\n'), fr' {CODE_SEPARATOR_REPLACEMENT}\n'), + (re.compile(fr'\s*{TEXT_SEPARATORS_PATTERN}\n'), fr' {CODE_SEPARATOR_REPLACEMENT}\n'), + (re.compile(fr'\n{TEXT_SEPARATORS_PATTERN}\s+'), fr'\n{CODE_SEPARATOR_REPLACEMENT} '), + (re.compile(fr'\s+{TEXT_SEPARATORS_PATTERN}\s+'), fr' {CODE_SEPARATOR_REPLACEMENT} '), + (re.compile(fr'^{TEXT_SEPARATORS_PATTERN}\s*'), fr'{CODE_SEPARATOR_REPLACEMENT} '), + (re.compile(fr'\s+{TEXT_SEPARATORS_PATTERN}$'), fr' {CODE_SEPARATOR_REPLACEMENT}'), + (re.compile(fr'{TEXT_SEPARATORS_PATTERN}$'), fr' {CODE_SEPARATOR_REPLACEMENT}'), +] + + +def replace_code_separators(text: str) -> str: + return replace_patterns(text, CODE_SEPARATOR_PATTERNS) + + +WEBDRIVER_SCREENSHOT_PATTERN = re.compile(r'(?:\s*-*>\s*)?Webdriver screenshot captured: [^/\0\n.]+\.\w+') +WEBDRIVER_SCREENSHOT_REFERENCE_PATTERN = re.compile(r'\s*Screenshot: file:/(?:[^/\0\n]+/)*[^/\0\n]+') +WEBDRIVER_PAGE_SOURCE_REFERENCE_PATTERN = re.compile(r'\s*Page source: file:/(?:[^/\0\n]+/)*[^/\0\n]+') +WEBDRIVER_BUILD_INFO_PATTERN = re.compile(r"\s*Build info: version: '[^']+', revision: '[^']+'") +WEBDRIVER_DRIVER_INFO_PATTERN = re.compile(r'\s*Driver info: [\w.]+') +WEBDRIVER_SYSTEM_INFO_PATTERN = re.compile(r"\s*System info: (?:[\w.]+: '[^']+', )+[\w.]+: '[^']+'") +WEBDRIVER_DRIVER_CAPABILITIES_PATTERN = re.compile(r'\s*Capabilities {\w+: [^\n]+') + +WEBDRIVER_AUXILIARY_INFO_REPLACEMENT = '' +WEBDRIVER_AUXILIARY_PATTERNS: Iterable[tuple[re.Pattern, str]] = [ + (WEBDRIVER_SCREENSHOT_PATTERN, WEBDRIVER_AUXILIARY_INFO_REPLACEMENT), + (WEBDRIVER_SCREENSHOT_REFERENCE_PATTERN, WEBDRIVER_AUXILIARY_INFO_REPLACEMENT), + (WEBDRIVER_PAGE_SOURCE_REFERENCE_PATTERN, WEBDRIVER_AUXILIARY_INFO_REPLACEMENT), + (WEBDRIVER_BUILD_INFO_PATTERN, WEBDRIVER_AUXILIARY_INFO_REPLACEMENT), + (WEBDRIVER_DRIVER_INFO_PATTERN, WEBDRIVER_AUXILIARY_INFO_REPLACEMENT), + (WEBDRIVER_SYSTEM_INFO_PATTERN, WEBDRIVER_AUXILIARY_INFO_REPLACEMENT), + (WEBDRIVER_DRIVER_CAPABILITIES_PATTERN, WEBDRIVER_AUXILIARY_INFO_REPLACEMENT), +] + + +def remove_webdriver_auxiliary_info(text: str) -> str: + return replace_patterns(text, WEBDRIVER_AUXILIARY_PATTERNS) diff --git a/app/utils/utils.py b/app/utils/utils.py index 160f0e6a..846118cd 100644 --- a/app/utils/utils.py +++ b/app/utils/utils.py @@ -12,21 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from functools import wraps - -import logging -import warnings -import os import json -from typing import Union, Any - -import requests -from app.commons import launch_objects -from collections import Counter +import os import random -import numpy as np import traceback +import warnings +from collections import Counter +from functools import wraps +from typing import Any +import numpy as np +import requests + +from app.commons import logging +from app.commons.model import launch_objects from app.utils.text_processing import split_words, remove_credentials_from_url logger = logging.getLogger("analyzerApp.utils") @@ -42,21 +41,32 @@ def _inner(*args, **kwargs): warnings.simplefilter("ignore") result = func(*args, **kwargs) return result + return _inner def read_file(folder: str, filename: str) -> str: - """Read file content as string (UTF-8)""" + """Read file content as string (UTF-8).""" with open(os.path.join(folder, filename), "r") as file: return file.read() -def read_json_file(folder: str, filename: str, to_json=False) -> Union[str, Any]: - """Read fixture from file""" +def read_json_file(folder: str, filename: str, to_json: bool = False) -> Any: + """Read fixture from file.""" content = read_file(folder, filename) return content if not to_json else json.loads(content) +def validate_folder(folder_path: str) -> bool: + """Check that passed path points to a directory and it exists.""" + return folder_path and folder_path.strip() and os.path.exists(folder_path) and os.path.isdir(folder_path) + + +def validate_file(file_path: str) -> bool: + """Check that passed path points to a file and it exists.""" + return file_path and file_path.strip() and os.path.exists(file_path) and os.path.isfile(file_path) + + def extract_real_id(elastic_id): real_id = str(elastic_id) if real_id[-2:] == "_m": @@ -80,9 +90,9 @@ def choose_issue_type(predicted_labels, predicted_labels_probability, chosen_type = scores_by_issue_type[issue_type] start_time = chosen_type["mrHit"]["_source"]["start_time"] predicted_prob = round(predicted_labels_probability[i][1], 4) - if (predicted_prob > max_prob) or\ - ((predicted_prob == max_prob) and # noqa - (max_val_start_time is None or start_time > max_val_start_time)): + if (predicted_prob > max_prob) or \ + ((predicted_prob == max_prob) and # noqa + (max_val_start_time is None or start_time > max_val_start_time)): max_prob = predicted_prob predicted_issue_type = issue_type global_idx = i @@ -119,32 +129,51 @@ def extract_all_exceptions(bodies): return logs_with_exceptions -def calculate_proportions_for_labels(labels): +MINIMAL_VALUE_FOR_GOOD_PROPORTION = 2 + + +def calculate_proportions_for_labels(labels: list[int]) -> float: counted_labels = Counter(labels) if len(counted_labels.keys()) >= 2: min_val = min(counted_labels.values()) max_val = max(counted_labels.values()) - if max_val > 0: + if min_val > MINIMAL_VALUE_FOR_GOOD_PROPORTION: return np.round(min_val / max_val, 3) return 0.0 -def rebalance_data(train_data, train_labels, due_proportion): - one_data = [train_data[i] for i in range(len(train_data)) if train_labels[i] == 1] - zero_data = [train_data[i] for i in range(len(train_data)) if train_labels[i] == 0] +def balance_data(train_data_indexes: list[int], train_labels: list[int], + due_proportion: float) -> tuple[list[int], list[int], float]: + one_data = [train_data_indexes[i] for i in range(len(train_data_indexes)) if train_labels[i] == 1] + zero_data = [train_data_indexes[i] for i in range(len(train_data_indexes)) if train_labels[i] == 0] zero_count = len(zero_data) one_count = len(one_data) + min_count = min(zero_count, one_count) + max_count = max(zero_count, one_count) + if zero_count > one_count: + min_data = one_data + max_data = zero_data + min_label = 1 + max_label = 0 + else: + min_data = zero_data + max_data = one_data + min_label = 0 + max_label = 1 + all_data = [] all_data_labels = [] - real_proportion = 0.0 if zero_count < 0.001 else np.round(one_count / zero_count, 3) - if zero_count > 0 and real_proportion < due_proportion: - all_data.extend(one_data) - all_data_labels.extend([1] * len(one_data)) + real_proportion = 0.0 + if min_count > MINIMAL_VALUE_FOR_GOOD_PROPORTION: + real_proportion = np.round(min_count / max_count, 3) + if min_count > 0 and real_proportion < due_proportion: + all_data.extend(min_data) + all_data_labels.extend([min_label] * len(min_data)) random.seed(1763) - random.shuffle(zero_data) - zero_size = int(one_count * (1 / due_proportion) - 1) - all_data.extend(zero_data[:zero_size]) - all_data_labels.extend([0] * zero_size) + random.shuffle(max_data) + max_size = int(min_count * (1 / due_proportion) - 1) + all_data.extend(max_data[:max_size]) + all_data_labels.extend([max_label] * max_size) real_proportion = calculate_proportions_for_labels(all_data_labels) if real_proportion / due_proportion >= 0.9: real_proportion = due_proportion @@ -156,7 +185,7 @@ def rebalance_data(train_data, train_labels, due_proportion): return all_data, all_data_labels, real_proportion -def topological_sort(feature_graph): +def topological_sort(feature_graph: dict[int, list[int]]) -> list[int]: visited = {} for key_ in feature_graph: visited[key_] = 0 @@ -183,25 +212,26 @@ def topological_sort(feature_graph): return stack -def to_number_list(features_list): +def to_int_list(features_list: str) -> list[int]: feature_numbers_list = [] for feature_name in features_list.split(";"): feature_name = feature_name.split("_")[0] - try: - feature_numbers_list.append(int(feature_name)) - except: # noqa - try: - feature_numbers_list.append(float(feature_name)) - except: # noqa - pass + feature_numbers_list.append(int(feature_name)) return feature_numbers_list -def fill_prevously_gathered_features(feature_list, feature_ids): +def to_float_list(features_list: str) -> list[float]: + feature_numbers_list = [] + for feature_name in features_list.split(";"): + feature_name = feature_name.split("_")[0] + feature_numbers_list.append(float(feature_name)) + return feature_numbers_list + + +def fill_previously_gathered_features(feature_list: list[list[float]], + feature_ids: list[int]) -> dict[int, list[list[float]]]: previously_gathered_features = {} try: - if type(feature_ids) == str: - feature_ids = to_number_list(feature_ids) for i in range(len(feature_list)): for idx, feature in enumerate(feature_ids): if feature not in previously_gathered_features: @@ -214,8 +244,8 @@ def fill_prevously_gathered_features(feature_list, feature_ids): return previously_gathered_features -def gather_feature_list(gathered_data_dict, feature_ids, to_list=False): - features_array = None +def gather_feature_list(gathered_data_dict: dict[int, list[list[float]]], feature_ids: list[int]) -> list[list[float]]: + features_array: np.array = None axis_x_size = max(map(lambda x: len(x), gathered_data_dict.values())) if axis_x_size <= 0: return [] @@ -226,10 +256,10 @@ def gather_feature_list(gathered_data_dict, feature_ids, to_list=False): features_array = np.asarray(gathered_data_dict[feature]) else: features_array = np.concatenate([features_array, gathered_data_dict[feature]], axis=1) - return features_array.tolist() if to_list else features_array + return features_array.tolist() -def extract_exception(err): +def extract_exception(err: Exception) -> str: err_message = traceback.format_exception_only(type(err), err) if len(err_message): err_message = err_message[-1] @@ -263,16 +293,16 @@ def calculate_threshold( def calculate_threshold_for_text(text, cur_threshold, min_recalculated_threshold=0.8): - text_size = len(split_words(text, only_unique=True)) + text_size = len(split_words(text)) return calculate_threshold( text_size, cur_threshold, min_recalculated_threshold=min_recalculated_threshold) -def build_more_like_this_query(min_should_match, log_message, - field_name="message", boost=1.0, +def build_more_like_this_query(min_should_match: str, log_message, + field_name: str = "message", boost: float = 1.0, override_min_should_match=None, - max_query_terms=50): + max_query_terms: int = 50): return {"more_like_this": { "fields": [field_name], "like": log_message, @@ -283,7 +313,8 @@ def build_more_like_this_query(min_should_match, log_message, "boost": boost}} -def append_potential_status_codes(query, log, *, boost=8.0, max_query_terms=50): +def append_potential_status_codes( + query: dict[str, Any], log: dict[str, Any], *, boost: float = 8.0, max_query_terms: int = 50) -> None: potential_status_codes = log["_source"]["potential_status_codes"].strip() if potential_status_codes: number_of_status_codes = str(len(set(potential_status_codes.split()))) @@ -304,3 +335,18 @@ def extract_clustering_setting(cluster_id): return False last_bit = cluster_id % 10 return (last_bit % 2) == 1 + + +def create_path(query: dict, path: tuple[str, ...], value: Any) -> Any: + """Create path in a dictionary and assign passed value on the last element in path.""" + path_length = len(path) + last_element = path[path_length - 1] + current_node = query + for i in range(path_length - 1): + element = path[i] + if element not in current_node: + current_node[element] = {} + current_node = current_node[element] + if last_element not in current_node: + current_node[last_element] = value + return current_node[last_element] diff --git a/requirements-dev.txt b/requirements-dev.txt index c9d81183..e3bafc55 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,7 +1,8 @@ +pytest +pytest-cov +moto[server,s3] pika-stubs==0.1.3 httpretty==1.0.5 -pytest==7.4.0 -pytest-cov==4.1.0 flake8==5.0.4 sure==2.0.1 freezegun==1.2.2 diff --git a/requirements.txt b/requirements.txt index 6e0dcfd7..a6db77ec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,22 @@ flask==2.3.2 -flask-cors==3.0.10 +flask-cors==5.0.0 +Flask-WTF==1.2.1 waitress==2.1.2 uWSGI==2.0.22 pika==1.2.1 pydantic==1.10.13 elasticsearch==7.0.0 -requests==2.31.0 +requests==2.32.2 bump2version==1.0.1 nltk==3.8.1 -scikit-learn==1.2.1 +scikit-learn==1.5.0 numpy==1.23.5 scipy==1.10.1 xgboost==1.7.3 python-dateutil==2.8.2 minio==6.0.0 -gensim==4.3.0 +gensim==4.3.2 pandas==1.5.3 -imbalanced-learn==0.9.1 -urllib3==1.26.18 +imbalanced-learn==0.12.3 +urllib3==1.26.19 certifi>=2023.7.22 diff --git a/requirements_windows.txt b/requirements_windows.txt index acde0cbe..98a46ee0 100644 --- a/requirements_windows.txt +++ b/requirements_windows.txt @@ -1,15 +1,15 @@ flask==2.3.2 -flask-cors==3.0.10 +flask-cors==5.0.0 waitress==2.1.2 pika==1.2.1 pydantic==1.10.13 elasticsearch==7.0.0 -requests==2.31.0 +requests==2.32.0 assertpy==1.1 bump2version==1.0.1 flake8==5.0.4 nltk==3.8.1 -scikit-learn==1.2.1 +scikit-learn==1.5.0 numpy==1.23.5 scipy==1.10.1 xgboost==1.7.3 @@ -17,5 +17,5 @@ python-dateutil==2.8.2 minio==6.0.0 gensim==4.3.0 pandas==1.5.3 -imbalanced-learn==0.9.1 -urllib3==1.26.18 +imbalanced-learn==0.12.3 +urllib3==1.26.19 diff --git a/res/logging.conf b/res/logging.conf index 4df21b9c..fe141446 100644 --- a/res/logging.conf +++ b/res/logging.conf @@ -35,4 +35,4 @@ formatter=myFormatter args=('%(logfilename)s',) [formatter_myFormatter] -format=%(asctime)s - %(levelname)s - %(name)s - %(message)s +format=%(asctime)s - [%(correlation_id)s] - %(levelname)s - %(name)s - %(message)s diff --git a/res/model/auto_2023-08-17/boost_model.pickle b/res/model/auto_2023-08-17/boost_model.pickle deleted file mode 100644 index c45e8c16..00000000 Binary files a/res/model/auto_2023-08-17/boost_model.pickle and /dev/null differ diff --git a/res/model/auto_2023-08-17/data_features_config.pickle b/res/model/auto_2023-08-17/data_features_config.pickle deleted file mode 100644 index 61838867..00000000 Binary files a/res/model/auto_2023-08-17/data_features_config.pickle and /dev/null differ diff --git a/res/model/auto_2023-08-17/features_dict_with_saved_objects.pickle b/res/model/auto_2023-08-17/features_dict_with_saved_objects.pickle deleted file mode 100644 index e2ecf720..00000000 --- a/res/model/auto_2023-08-17/features_dict_with_saved_objects.pickle +++ /dev/null @@ -1 +0,0 @@ -}. \ No newline at end of file diff --git a/res/model/auto_analysis_model_2024-08-14/boost_model.pickle b/res/model/auto_analysis_model_2024-08-14/boost_model.pickle new file mode 100644 index 00000000..11c2e54b Binary files /dev/null and b/res/model/auto_analysis_model_2024-08-14/boost_model.pickle differ diff --git a/res/model/auto_analysis_model_2024-08-14/data_features_config.pickle b/res/model/auto_analysis_model_2024-08-14/data_features_config.pickle new file mode 100644 index 00000000..54c7cac0 Binary files /dev/null and b/res/model/auto_analysis_model_2024-08-14/data_features_config.pickle differ diff --git a/res/model/auto_retrain_data_features_config.pickle b/res/model/auto_retrain_data_features_config.pickle deleted file mode 100644 index 2711d9f9..00000000 Binary files a/res/model/auto_retrain_data_features_config.pickle and /dev/null differ diff --git a/res/model/global_defect_type_model_2023-08-04/count_vectorizer_models.pickle b/res/model/global_defect_type_model_2023-08-04/count_vectorizer_models.pickle deleted file mode 100644 index 2a6f81fa..00000000 Binary files a/res/model/global_defect_type_model_2023-08-04/count_vectorizer_models.pickle and /dev/null differ diff --git a/res/model/global_defect_type_model_2023-08-04/models.pickle b/res/model/global_defect_type_model_2023-08-04/models.pickle deleted file mode 100644 index 965fb8b7..00000000 Binary files a/res/model/global_defect_type_model_2023-08-04/models.pickle and /dev/null differ diff --git a/res/model/global_defect_type_model_2024-07-11/count_vectorizer_models.pickle b/res/model/global_defect_type_model_2024-07-11/count_vectorizer_models.pickle new file mode 100644 index 00000000..e0df8827 Binary files /dev/null and b/res/model/global_defect_type_model_2024-07-11/count_vectorizer_models.pickle differ diff --git a/res/model/global_defect_type_model_2024-07-11/models.pickle b/res/model/global_defect_type_model_2024-07-11/models.pickle new file mode 100644 index 00000000..d31e9994 Binary files /dev/null and b/res/model/global_defect_type_model_2024-07-11/models.pickle differ diff --git a/res/model/suggest_2023-08-17/boost_model.pickle b/res/model/suggest_2023-08-17/boost_model.pickle deleted file mode 100644 index f0606dd3..00000000 Binary files a/res/model/suggest_2023-08-17/boost_model.pickle and /dev/null differ diff --git a/res/model/suggest_2023-08-17/data_features_config.pickle b/res/model/suggest_2023-08-17/data_features_config.pickle deleted file mode 100644 index 5309dc60..00000000 Binary files a/res/model/suggest_2023-08-17/data_features_config.pickle and /dev/null differ diff --git a/res/model/suggest_2023-08-17/features_dict_with_saved_objects.pickle b/res/model/suggest_2023-08-17/features_dict_with_saved_objects.pickle deleted file mode 100644 index e2ecf720..00000000 --- a/res/model/suggest_2023-08-17/features_dict_with_saved_objects.pickle +++ /dev/null @@ -1 +0,0 @@ -}. \ No newline at end of file diff --git a/res/model/suggest_retrain_data_features_config.pickle b/res/model/suggest_retrain_data_features_config.pickle deleted file mode 100644 index 7b591cf7..00000000 Binary files a/res/model/suggest_retrain_data_features_config.pickle and /dev/null differ diff --git a/res/model/suggestion_model_2024-08-14/boost_model.pickle b/res/model/suggestion_model_2024-08-14/boost_model.pickle new file mode 100644 index 00000000..7ab70351 Binary files /dev/null and b/res/model/suggestion_model_2024-08-14/boost_model.pickle differ diff --git a/res/model/suggestion_model_2024-08-14/data_features_config.pickle b/res/model/suggestion_model_2024-08-14/data_features_config.pickle new file mode 100644 index 00000000..54c7cac0 Binary files /dev/null and b/res/model/suggestion_model_2024-08-14/data_features_config.pickle differ diff --git a/res/model/weights_2024-06-12/weights.pickle b/res/model/weights_2024-06-12/weights.pickle new file mode 100644 index 00000000..204d6c07 Binary files /dev/null and b/res/model/weights_2024-06-12/weights.pickle differ diff --git a/res/model/weights_24.11.20/weights.pickle b/res/model/weights_24.11.20/weights.pickle deleted file mode 100644 index 77bd12d6..00000000 Binary files a/res/model/weights_24.11.20/weights.pickle and /dev/null differ diff --git a/res/model_settings.json b/res/model_settings.json index 41c94f40..27ca9293 100644 --- a/res/model_settings.json +++ b/res/model_settings.json @@ -1,8 +1,6 @@ { - "BOOST_MODEL_FOLDER": "res/model/auto_2023-08-17", - "SUGGEST_BOOST_MODEL_FOLDER": "res/model/suggest_2023-08-17", - "SIMILARITY_WEIGHTS_FOLDER": "res/model/weights_24.11.20", - "GLOBAL_DEFECT_TYPE_MODEL_FOLDER": "res/model/global_defect_type_model_2023-08-04", - "RETRAIN_SUGGEST_BOOST_MODEL_CONFIG": "res/model/suggest_retrain_data_features_config.pickle", - "RETRAIN_AUTO_BOOST_MODEL_CONFIG": "res/model/auto_retrain_data_features_config.pickle" + "BOOST_MODEL_FOLDER": "res/model/auto_analysis_model_2024-08-14", + "SUGGEST_BOOST_MODEL_FOLDER": "res/model/suggestion_model_2024-08-14", + "SIMILARITY_WEIGHTS_FOLDER": "res/model/weights_2024-06-12", + "GLOBAL_DEFECT_TYPE_MODEL_FOLDER": "res/model/global_defect_type_model_2024-07-11" } diff --git a/test/__init__.py b/test/__init__.py index edba71ce..d1f0b648 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -13,21 +13,67 @@ # limitations under the License. import os -from typing import List +import random +import string +from app.commons.model.launch_objects import SearchConfig, ApplicationConfig from app.utils.utils import read_json_file -DEFAULT_ES_CONFIG = {'esHost': 'http://localhost:9200', 'esVerifyCerts': False, 'esUseSsl': False, - 'esSslShowWarn': False, 'esCAcert': None, 'esClientCert': None, 'esClientKey': None, - 'esUser': None, 'turnOffSslVerification': True, 'esProjectIndexPrefix': '', 'esChunkNumber': 1000} +DEFAULT_ES_CONFIG = ApplicationConfig( + esHost='http://localhost:9200', + esVerifyCerts=False, + esUseSsl=False, + esSslShowWarn=False, + esCAcert='', + esClientCert='', + esClientKey='', + esUser='', + turnOffSslVerification=True, + esProjectIndexPrefix='', + esChunkNumber=1000 +) + +APP_CONFIG = ApplicationConfig( + esHost='http://localhost:9200', + esUser='', + esPassword='', + esVerifyCerts=False, + esUseSsl=False, + esSslShowWarn=False, + turnOffSslVerification=True, + esCAcert='', + esClientCert='', + esClientKey='', + appVersion='', + minioRegion='', + minioBucketPrefix='', + filesystemDefaultPath='', + esChunkNumber=1000, + binaryStoreType='filesystem', + minioHost='', + minioAccessKey='', + minioSecretKey='', + esProjectIndexPrefix='rp_', + esChunkNumberUpdateClusters=500 +) + DEFAULT_BOOST_LAUNCH = 8.0 -DEFAULT_SEARCH_CONFIG = {'SimilarityWeightsFolder': '', 'BoostLaunch': DEFAULT_BOOST_LAUNCH} +DEFAULT_SEARCH_CONFIG = SearchConfig(BoostLaunch=DEFAULT_BOOST_LAUNCH) def get_fixture(fixture_name, to_json=False): return read_json_file("test_res/fixtures", fixture_name, to_json) -def read_file_lines(folder: str, filename: str) -> List[str]: +def read_file_lines(folder: str, filename: str) -> list[str]: with open(os.path.join(folder, filename), "r") as file: return file.readlines() + + +def read_file(folder: str, filename: str) -> str: + with open(os.path.join(folder, filename), "r") as file: + return file.read() + + +def random_alphanumeric(num: int): + return ''.join(random.choices(string.ascii_letters + string.digits, k=num)) diff --git a/test/commons/test_esclient.py b/test/commons/test_esclient.py index e9f08519..bd4f250e 100644 --- a/test/commons/test_esclient.py +++ b/test/commons/test_esclient.py @@ -18,12 +18,12 @@ from unittest.mock import MagicMock import httpretty -import sure +from elasticsearch import RequestError -import app.commons.launch_objects as launch_objects from app.commons import esclient +from app.commons.model import launch_objects from app.utils import utils -from test import get_fixture +from test import get_fixture, APP_CONFIG from test.mock_service import TestService @@ -61,8 +61,7 @@ def test_list_indices(self): try: self._start_server(test["test_calls"]) - es_client = esclient.EsClient(app_config=self.app_config, - search_cfg=self.get_default_search_config()) + es_client = esclient.EsClient(app_config=self.app_config) response = es_client.list_indices() assert test["expected_count"] == len(response) @@ -72,50 +71,56 @@ def test_list_indices(self): raise AssertionError(f'Error in the test case number: {idx}'). \ with_traceback(err.__traceback__) - @utils.ignore_warnings def test_create_index(self): """Test creating index""" - tests = [ - { - "test_calls": [{"method": httpretty.PUT, - "uri": "/idx0", - "status": HTTPStatus.OK, - "content_type": "application/json", - "rs": get_fixture(self.index_created_rs), - }, ], - "index": "idx0", - "acknowledged": True, - }, - { - "test_calls": [{"method": httpretty.PUT, - "uri": "/idx1", - "status": HTTPStatus.BAD_REQUEST, - "content_type": "application/json", - "rs": get_fixture( - self.index_already_exists_rs), - }, ], - "index": "idx1", - "acknowledged": False, - }, - ] - for idx, test in enumerate(tests): - try: - self._start_server(test["test_calls"]) - - es_client = esclient.EsClient(app_config=self.app_config, - search_cfg=self.get_default_search_config()) - - response = es_client.create_index(test["index"]) - assert test["acknowledged"] == response.acknowledged - - TestEsClient.shutdown_server(test["test_calls"]) - except AssertionError as err: - raise AssertionError(f'Error in the test case number: {idx}'). \ - with_traceback(err.__traceback__) + test = { + "test_calls": [{"method": httpretty.PUT, + "uri": "/idx0", + "status": HTTPStatus.OK, + "content_type": "application/json", + "rs": get_fixture(self.index_created_rs), + }, ], + "index": "idx0", + "acknowledged": True, + } + self._start_server(test["test_calls"]) + + es_client = esclient.EsClient(app_config=self.app_config) + + response = es_client.create_index(test["index"]) + assert test["acknowledged"] == response.acknowledged + + TestEsClient.shutdown_server(test["test_calls"]) + + def test_create_index_error(self): + """Test creating index""" + test = { + "test_calls": [{"method": httpretty.PUT, + "uri": "/idx1", + "status": HTTPStatus.BAD_REQUEST, + "content_type": "application/json", + "rs": get_fixture( + self.index_already_exists_rs), + }, ], + "index": "idx1", + "acknowledged": False, + } + try: + self._start_server(test["test_calls"]) + + es_client = esclient.EsClient(app_config=self.app_config) + + es_client.create_index(test["index"]) + raise AssertionError("Expected RequestError") + + except RequestError: + pass + finally: + TestEsClient.shutdown_server(test["test_calls"]) @utils.ignore_warnings def test_exists_index(self): - """Test existance of a index""" + """Test existence of an index""" tests = [ { "test_calls": [{"method": httpretty.GET, @@ -138,8 +143,7 @@ def test_exists_index(self): try: self._start_server(test["test_calls"]) - es_client = esclient.EsClient(app_config=self.app_config, - search_cfg=self.get_default_search_config()) + es_client = esclient.EsClient(app_config=self.app_config) response = es_client.index_exists(test["index"]) assert test["exists"] == response @@ -178,8 +182,7 @@ def test_delete_index(self): try: self._start_server(test["test_calls"]) - es_client = esclient.EsClient(app_config=self.app_config, - search_cfg=self.get_default_search_config()) + es_client = esclient.EsClient(app_config=self.app_config) response = es_client.delete_index(test["index"]) @@ -305,28 +308,7 @@ def test_clean_index(self): "rs": get_fixture(self.index_logs_rs), }], "rq": launch_objects.CleanIndex(ids=[1], project=1), - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "expected_count": 1 }, { @@ -335,48 +317,25 @@ def test_clean_index(self): "status": HTTPStatus.NOT_FOUND, }], "rq": launch_objects.CleanIndex(ids=[1], project=2), - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "expected_count": 0 } ] for idx, test in enumerate(tests): - with sure.ensure('Error in the test case index: {0}', idx): - self._start_server(test["test_calls"]) - app_config = self.app_config - if "app_config" in test: - app_config = test["app_config"] - es_client = esclient.EsClient(app_config=app_config, - search_cfg=self.get_default_search_config()) - es_client.es_client.scroll = MagicMock(return_value=json.loads( - get_fixture(self.no_hits_search_rs))) + print(f"Test index: {idx}") + self._start_server(test["test_calls"]) + app_config = self.app_config + if "app_config" in test: + app_config = test["app_config"] + es_client = esclient.EsClient(app_config=app_config) + es_client.es_client.scroll = MagicMock(return_value=json.loads(get_fixture(self.no_hits_search_rs))) - response = es_client.delete_logs(test["rq"]) + response = es_client.delete_logs(test["rq"]) - test["expected_count"].should.equal(response) + assert test["expected_count"] == response - TestEsClient.shutdown_server(test["test_calls"]) + TestEsClient.shutdown_server(test["test_calls"]) @utils.ignore_warnings def test_index_logs(self): @@ -423,6 +382,7 @@ def test_index_logs(self): {"method": httpretty.PUT, "uri": "/2", "status": HTTPStatus.OK, + "content_type": "application/json", "rs": get_fixture(self.index_created_rs), }, {"method": httpretty.POST, @@ -476,6 +436,7 @@ def test_index_logs(self): {"method": httpretty.PUT, "uri": "/2", "status": HTTPStatus.OK, + "content_type": "application/json", "rs": get_fixture(self.index_created_rs), }, {"method": httpretty.POST, @@ -525,6 +486,7 @@ def test_index_logs(self): {"method": httpretty.PUT, "uri": "/rp_2", "status": HTTPStatus.OK, + "content_type": "application/json", "rs": get_fixture(self.index_created_rs), }, {"method": httpretty.POST, @@ -564,28 +526,7 @@ def test_index_logs(self): "index_rq": get_fixture(self.launch_w_test_items_w_logs_different_log_level), "has_errors": False, "expected_count": 1, - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "expected_log_exceptions": [launch_objects.LogExceptionResult(logId=1, foundExceptions=[])] }, { @@ -596,6 +537,7 @@ def test_index_logs(self): {"method": httpretty.PUT, "uri": "/2", "status": HTTPStatus.OK, + "content_type": "application/json", "rs": get_fixture(self.index_created_rs), }, {"method": httpretty.POST, @@ -644,21 +586,21 @@ def test_index_logs(self): ] for idx, test in enumerate(tests): - with sure.ensure('Error in the test case index: {0}', idx): - self._start_server(test["test_calls"]) - app_config = self.app_config - if "app_config" in test: - app_config = test["app_config"] - es_client = esclient.EsClient(app_config=app_config, search_cfg=self.get_default_search_config()) - es_client.es_client.scroll = MagicMock(return_value=json.loads(get_fixture(self.no_hits_search_rs))) - launches = [launch_objects.Launch(**launch) for launch in json.loads(test["index_rq"])] - response = es_client.index_logs(launches) - - test["has_errors"].should.equal(response.errors) - test["expected_count"].should.equal(response.took) - test["expected_log_exceptions"].should.equal(response.logResults) - - TestEsClient.shutdown_server(test["test_calls"]) + print(f"Test index: {idx}") + self._start_server(test["test_calls"]) + app_config = self.app_config + if "app_config" in test: + app_config = test["app_config"] + es_client = esclient.EsClient(app_config=app_config) + es_client.es_client.scroll = MagicMock(return_value=json.loads(get_fixture(self.no_hits_search_rs))) + launches = [launch_objects.Launch(**launch) for launch in json.loads(test["index_rq"])] + response = es_client.index_logs(launches) + + assert test["has_errors"] == response.errors + assert test["expected_count"] == response.took + assert test["expected_log_exceptions"] == response.logResults + + TestEsClient.shutdown_server(test["test_calls"]) def test_defect_update(self): tests = [ @@ -739,8 +681,7 @@ def test_defect_update(self): app_config = self.app_config if "app_config" in test: app_config = test["app_config"] - es_client = esclient.EsClient(app_config=app_config, - search_cfg=self.get_default_search_config()) + es_client = esclient.EsClient(app_config=app_config) es_client.es_client.scroll = MagicMock(return_value=json.loads( get_fixture(self.no_hits_search_rs))) response = es_client.defect_update(test["defect_update_info"]) @@ -797,28 +738,7 @@ def test_remove_test_items(self): self.delete_by_query_1), "rs": json.dumps({"deleted": 3}), }], - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "item_remove_info": { "project": 1, "itemsToDelete": [1, 2]}, @@ -831,8 +751,7 @@ def test_remove_test_items(self): app_config = self.app_config if "app_config" in test: app_config = test["app_config"] - es_client = esclient.EsClient(app_config=app_config, - search_cfg=self.get_default_search_config()) + es_client = esclient.EsClient(app_config=app_config) es_client.es_client.scroll = MagicMock(return_value=json.loads( get_fixture(self.no_hits_search_rs))) response = es_client.remove_test_items(test["item_remove_info"]) @@ -888,28 +807,7 @@ def test_launches(self): "rq": get_fixture(self.delete_by_query_2), "rs": json.dumps({"deleted": 3}), }], - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "launch_remove_info": { "project": 1, "launch_ids": [1, 2]}, @@ -922,8 +820,7 @@ def test_launches(self): app_config = self.app_config if "app_config" in test: app_config = test["app_config"] - es_client = esclient.EsClient(app_config=app_config, - search_cfg=self.get_default_search_config()) + es_client = esclient.EsClient(app_config=app_config) es_client.es_client.scroll = MagicMock(return_value=json.loads( get_fixture(self.no_hits_search_rs))) response = es_client.remove_launches(test["launch_remove_info"]) diff --git a/test/boosting_decision_making/__init__.py b/test/machine_learning/__init__.py similarity index 100% rename from test/boosting_decision_making/__init__.py rename to test/machine_learning/__init__.py diff --git a/test/boosting_decision_making/test_boosting_featurizer.py b/test/machine_learning/test_boosting_featurizer.py similarity index 75% rename from test/boosting_decision_making/test_boosting_featurizer.py rename to test/machine_learning/test_boosting_featurizer.py index aade7944..50af06e4 100644 --- a/test/boosting_decision_making/test_boosting_featurizer.py +++ b/test/machine_learning/test_boosting_featurizer.py @@ -12,14 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import unittest -import sure - -from app.boosting_decision_making import weighted_similarity_calculator -from app.boosting_decision_making.boosting_featurizer import BoostingFeaturizer -from app.boosting_decision_making.suggest_boosting_featurizer import SuggestBoostingFeaturizer +from app.commons.object_saving import create_filesystem +from app.machine_learning.boosting_featurizer import BoostingFeaturizer +from app.machine_learning.models.weighted_similarity_calculator import WeightedSimilarityCalculator +from app.machine_learning.suggest_boosting_featurizer import SuggestBoostingFeaturizer from app.utils import utils from test import get_fixture @@ -42,12 +40,7 @@ def setUp(self): self.one_hit_search_rs_explained_wo_params = "one_hit_search_rs_explained_wo_params.json" self.epsilon = 0.0001 model_settings = utils.read_json_file("res", "model_settings.json", to_json=True) - self.weights_folder = model_settings["SIMILARITY_WEIGHTS_FOLDER"] - logging.disable(logging.CRITICAL) - - @utils.ignore_warnings - def tearDown(self): - logging.disable(logging.DEBUG) + self.weights_folder = model_settings['SIMILARITY_WEIGHTS_FOLDER'] @staticmethod @utils.ignore_warnings @@ -61,7 +54,7 @@ def get_default_config( filter_fields_any = [] return { "max_query_terms": 50, - "min_should_match": 0.47, + "min_should_match": 0.41, "min_word_length": 0, "filter_min_should_match": filter_fields, "filter_min_should_match_any": filter_fields_any, @@ -97,36 +90,31 @@ def test_normalize_results(self): }, ]], }, ] - weight_log_sim = weighted_similarity_calculator. \ - WeightedSimilarityCalculator(folder=self.weights_folder) + weight_log_sim = WeightedSimilarityCalculator(create_filesystem(self.weights_folder)) + weight_log_sim.load_model() for idx, test in enumerate(tests): - with sure.ensure('Error in the test case index: {0}', idx): - _boosting_featurizer = BoostingFeaturizer( - test["elastic_results"], - test["config"], - [], - weighted_log_similarity_calculator=weight_log_sim) - _boosting_featurizer.all_results.should.have.length_of(len(test["result"])) - for i in range(len(test["result"])): - for j in range(len(test["result"][i])): - for field in test["result"][i][j]: - elastic_res = _boosting_featurizer.all_results[i][1][j] - elastic_res[field].should.equal(test["result"][i][j][field], - epsilon=self.epsilon) + print(f"Test index: {idx}") + _boosting_featurizer = BoostingFeaturizer( + test["elastic_results"], test["config"], [], weighted_log_similarity_calculator=weight_log_sim) + assert len(_boosting_featurizer.all_results) == len(test["result"]) + for i in range(len(test["result"])): + for j in range(len(test["result"][i])): + for field in test["result"][i][j]: + elastic_res = _boosting_featurizer.all_results[i][1][j] + assert abs(elastic_res[field] - test["result"][i][j][field]) <= self.epsilon def assert_scores_by_issue_type(self, boosting_featurizer, test): scores_by_issue_type = boosting_featurizer.find_most_relevant_by_type() - scores_by_issue_type.should.have.length_of(len(test["result"])) + assert scores_by_issue_type.keys() == test["result"].keys() for issue_type in test["result"]: - scores_by_issue_type.keys().should.contain(issue_type) elastic_res = scores_by_issue_type[issue_type] for field in test["result"][issue_type]: if type(test["result"][issue_type][field]) != dict: - elastic_res[field].should.equal(test["result"][issue_type][field], epsilon=self.epsilon) + assert abs(elastic_res[field] - test["result"][issue_type][field]) <= self.epsilon else: for field_dict in test["result"][issue_type][field]: result_field_dict = test["result"][issue_type][field][field_dict] - elastic_res[field][field_dict].should.equal(result_field_dict, epsilon=self.epsilon) + assert elastic_res[field][field_dict] == result_field_dict @utils.ignore_warnings def test_find_most_relevant_by_type(self): @@ -137,9 +125,10 @@ def test_find_most_relevant_by_type(self): "result": {}, }, { - "elastic_results": [(get_fixture(self.log_message, to_json=True), - get_fixture( - self.one_hit_search_rs_explained, to_json=True))], + "elastic_results": [ + (get_fixture(self.log_message, to_json=True), + get_fixture(self.one_hit_search_rs_explained, to_json=True)) + ], "config": TestBoostingFeaturizer.get_default_config(), "result": {"AB001": {"mrHit": {"_score": 158.08437, "_id": "1"}, @@ -149,8 +138,10 @@ def test_find_most_relevant_by_type(self): } }, { - "elastic_results": [(get_fixture(self.log_message, to_json=True), - get_fixture(self.two_hits_search_rs_explained, to_json=True))], + "elastic_results": [ + (get_fixture(self.log_message, to_json=True), + get_fixture(self.two_hits_search_rs_explained, to_json=True)) + ], "config": TestBoostingFeaturizer.get_default_config(), "result": {"AB001": {"mrHit": {"_score": 158.08437, "_id": "1"}, @@ -165,10 +156,12 @@ def test_find_most_relevant_by_type(self): } }, { - "elastic_results": [(get_fixture(self.log_message, to_json=True), - get_fixture(self.two_hits_search_rs_explained, to_json=True)), - (get_fixture(self.log_message, to_json=True), - get_fixture(self.one_hit_search_rs_explained, to_json=True))], + "elastic_results": [ + (get_fixture(self.log_message, to_json=True), + get_fixture(self.two_hits_search_rs_explained, to_json=True)), + (get_fixture(self.log_message, to_json=True), + get_fixture(self.one_hit_search_rs_explained, to_json=True)) + ], "config": TestBoostingFeaturizer.get_default_config(), "result": {"AB001": {"mrHit": {"_score": 158.08437, "_id": "1"}, @@ -183,23 +176,20 @@ def test_find_most_relevant_by_type(self): } }, ] - weight_log_sim = weighted_similarity_calculator. \ - WeightedSimilarityCalculator(folder=self.weights_folder) + weight_log_sim = WeightedSimilarityCalculator(create_filesystem(self.weights_folder)) + weight_log_sim.load_model() for idx, test in enumerate(tests): - with sure.ensure('Error in the test case index: {0}', idx): - _boosting_featurizer = BoostingFeaturizer( - test["elastic_results"], - test["config"], - [], - weighted_log_similarity_calculator=weight_log_sim) - self.assert_scores_by_issue_type(_boosting_featurizer, test) + print(f"Test index: {idx}") + _boosting_featurizer = BoostingFeaturizer( + test["elastic_results"], test["config"], [], weighted_log_similarity_calculator=weight_log_sim) + self.assert_scores_by_issue_type(_boosting_featurizer, test) def assert_elastic_results(self, results, test): - results.should.have.length_of(len(test["result"])) + assert len(results) == len(test["result"]) for idx_res, (log, hits) in enumerate(results): - log["_id"].should.equal(test["result"][idx_res][0]["_id"]) + assert log["_id"] == test["result"][idx_res][0]["_id"] for i, hit in enumerate(hits["hits"]["hits"]): - hit["_id"].should.equal(test["result"][idx_res][1]["hits"]["hits"][i]["_id"]) + assert hit["_id"] == hits["hits"]["hits"][i]["_id"] @utils.ignore_warnings def test_filter_by_min_should_match(self): @@ -276,8 +266,8 @@ def test_filter_by_min_should_match(self): get_fixture(self.two_hits_search_rs_small_logs, to_json=True))] }, ] - weight_log_sim = weighted_similarity_calculator. \ - WeightedSimilarityCalculator(folder=self.weights_folder) + weight_log_sim = WeightedSimilarityCalculator(create_filesystem(self.weights_folder)) + weight_log_sim.load_model() for idx, test in enumerate(tests): try: _boosting_featurizer = BoostingFeaturizer( @@ -305,24 +295,24 @@ def test_find_most_relevant_by_type_for_suggests(self): "elastic_results": [(get_fixture(self.log_message, to_json=True), get_fixture(self.one_hit_search_rs_explained, to_json=True))], "config": TestBoostingFeaturizer.get_default_config(), - "result": {1: {"mrHit": {"_score": 158.08437, - "_id": "1"}, - "compared_log": get_fixture(self.log_message, to_json=True), - "score": 1.0, }, + "result": {'1': {"mrHit": {"_score": 158.08437, + "_id": "1"}, + "compared_log": get_fixture(self.log_message, to_json=True), + "score": 1.0, }, } }, { "elastic_results": [(get_fixture(self.log_message, to_json=True), get_fixture(self.two_hits_search_rs_explained, to_json=True))], "config": TestBoostingFeaturizer.get_default_config(), - "result": {1: {"mrHit": {"_score": 158.08437, - "_id": "1"}, - "compared_log": get_fixture(self.log_message, to_json=True), - "score": 1.0, }, - 2: {"mrHit": {"_score": 77.53298, - "_id": "2"}, - "compared_log": get_fixture(self.log_message, to_json=True), - "score": 0.4905, }, + "result": {'1': {"mrHit": {"_score": 158.08437, + "_id": "1"}, + "compared_log": get_fixture(self.log_message, to_json=True), + "score": 1.0, }, + '2': {"mrHit": {"_score": 77.53298, + "_id": "2"}, + "compared_log": get_fixture(self.log_message, to_json=True), + "score": 0.4905, }, } }, { @@ -331,27 +321,27 @@ def test_find_most_relevant_by_type_for_suggests(self): (get_fixture(self.log_message, to_json=True), get_fixture(self.three_hits_search_rs_explained, to_json=True))], "config": TestBoostingFeaturizer.get_default_config(), - "result": {1: {"mrHit": {"_score": 158.08437, - "_id": "1"}, - "compared_log": get_fixture(self.log_message, to_json=True), - "score": 0.9392, }, - 2: {"mrHit": {"_score": 168.31, - "_id": "2"}, - "compared_log": get_fixture(self.log_message, to_json=True), - "score": 1.0, } + "result": {'1': {"mrHit": {"_score": 158.08437, + "_id": "1"}, + "compared_log": get_fixture(self.log_message, to_json=True), + "score": 0.9392, }, + '2': {"mrHit": {"_score": 168.31, + "_id": "2"}, + "compared_log": get_fixture(self.log_message, to_json=True), + "score": 1.0, } } }, ] - weight_log_sim = weighted_similarity_calculator. \ - WeightedSimilarityCalculator(folder=self.weights_folder) + weight_log_sim = WeightedSimilarityCalculator(create_filesystem(self.weights_folder)) + weight_log_sim.load_model() for idx, test in enumerate(tests): - with sure.ensure('Error in the test case index: {0}', idx): - _boosting_featurizer = SuggestBoostingFeaturizer( - test["elastic_results"], - test["config"], - [], - weighted_log_similarity_calculator=weight_log_sim) - self.assert_scores_by_issue_type(_boosting_featurizer, test) + print(f"Test index: {idx}") + _boosting_featurizer = SuggestBoostingFeaturizer( + test["elastic_results"], + test["config"], + [], + weighted_log_similarity_calculator=weight_log_sim) + self.assert_scores_by_issue_type(_boosting_featurizer, test) @utils.ignore_warnings def test_filter_by_min_should_match_any(self): @@ -430,17 +420,13 @@ def test_filter_by_min_should_match_any(self): get_fixture(self.two_hits_search_rs_small_logs, to_json=True))] }, ] - weight_log_sim = weighted_similarity_calculator. \ - WeightedSimilarityCalculator(folder=self.weights_folder) + weight_log_sim = WeightedSimilarityCalculator(create_filesystem(self.weights_folder)) + weight_log_sim.load_model() for idx, test in enumerate(tests): - with sure.ensure('Error in the test case index: {0}', idx): - _boosting_featurizer = SuggestBoostingFeaturizer( - test["elastic_results"], - test["config"], - [], - weighted_log_similarity_calculator=weight_log_sim) - all_results = test["elastic_results"] - all_results = _boosting_featurizer.filter_by_min_should_match_any( - all_results, - fields=test["config"]["filter_min_should_match_any"]) - self.assert_elastic_results(all_results, test) + print(f"Test index: {idx}") + _boosting_featurizer = SuggestBoostingFeaturizer( + test["elastic_results"], test["config"], [], weighted_log_similarity_calculator=weight_log_sim) + all_results = test["elastic_results"] + all_results = _boosting_featurizer.filter_by_min_should_match_any( + all_results, fields=test["config"]["filter_min_should_match_any"]) + self.assert_elastic_results(all_results, test) diff --git a/test/boosting_decision_making/test_boosting_model.py b/test/machine_learning/test_boosting_model.py similarity index 52% rename from test/boosting_decision_making/test_boosting_model.py rename to test/machine_learning/test_boosting_model.py index 62153fd6..9a2e76c6 100644 --- a/test/boosting_decision_making/test_boosting_model.py +++ b/test/machine_learning/test_boosting_model.py @@ -16,13 +16,12 @@ import unittest import numpy as np -import sure -from app.boosting_decision_making import defect_type_model -from app.boosting_decision_making import weighted_similarity_calculator -from app.boosting_decision_making.boosting_decision_maker import BoostingDecisionMaker -from app.boosting_decision_making.boosting_featurizer import BoostingFeaturizer -from app.boosting_decision_making.suggest_boosting_featurizer import SuggestBoostingFeaturizer +from app.commons.object_saving import create_filesystem +from app.machine_learning.models import DefectTypeModel, WeightedSimilarityCalculator +from app.machine_learning.models.boosting_decision_maker import BoostingDecisionMaker +from app.machine_learning.boosting_featurizer import BoostingFeaturizer +from app.machine_learning.suggest_boosting_featurizer import SuggestBoostingFeaturizer from app.utils import utils from test import get_fixture @@ -43,8 +42,7 @@ def setUp(self): self.epsilon = 0.0001 model_settings = utils.read_json_file("res", "model_settings.json", to_json=True) self.boost_model_folder = model_settings["BOOST_MODEL_FOLDER"] - self.suggest_boost_model_folder = \ - model_settings["SUGGEST_BOOST_MODEL_FOLDER"] + self.suggest_boost_model_folder = model_settings["SUGGEST_BOOST_MODEL_FOLDER"] self.weights_folder = model_settings["SIMILARITY_WEIGHTS_FOLDER"] self.global_defect_type_model_folder = model_settings["GLOBAL_DEFECT_TYPE_MODEL_FOLDER"] logging.disable(logging.CRITICAL) @@ -76,12 +74,12 @@ def get_default_config( @utils.ignore_warnings def test_random_run(self): print("Weights model folder: ", self.weights_folder) - for folder in [self.boost_model_folder, - self.suggest_boost_model_folder]: + for folder in [self.boost_model_folder, self.suggest_boost_model_folder]: print("Boost model folder ", folder) - decision_maker = BoostingDecisionMaker(folder) + decision_maker = BoostingDecisionMaker(create_filesystem(folder)) + decision_maker.load_model() test_data_size = 5 - random_data = np.random.rand(test_data_size, len(decision_maker.get_feature_names())) + random_data = np.random.rand(test_data_size, len(decision_maker.feature_ids)).tolist() result, result_probability = decision_maker.predict(random_data) assert len(result) == test_data_size assert len(result_probability) == test_data_size @@ -91,7 +89,8 @@ def test_full_data_check(self): print("Boost model folder : ", self.boost_model_folder) print("Weights model folder : ", self.weights_folder) print("Global defect type model folder : ", self.global_defect_type_model_folder) - decision_maker = BoostingDecisionMaker(folder=self.boost_model_folder) + decision_maker = BoostingDecisionMaker(create_filesystem(self.boost_model_folder)) + decision_maker.load_model() boost_model_results = get_fixture(self.boost_model_results, to_json=True) tests = [] for log_lines, filter_fields, _decision_maker in [ @@ -101,75 +100,53 @@ def test_full_data_check(self): tests.extend([ { "elastic_results": [(get_fixture(self.log_message, to_json=True), - get_fixture( - self.one_hit_search_rs_explained, to_json=True))], - "config": self.get_default_config(number_of_log_lines=log_lines, - filter_fields=filter_fields), + get_fixture(self.one_hit_search_rs_explained, to_json=True))], + "config": self.get_default_config(number_of_log_lines=log_lines, filter_fields=filter_fields), "decision_maker": _decision_maker }, { "elastic_results": [(get_fixture(self.log_message, to_json=True), - get_fixture( - self.two_hits_search_rs_explained, to_json=True))], - "config": self.get_default_config(number_of_log_lines=log_lines, - filter_fields=filter_fields), + get_fixture(self.two_hits_search_rs_explained, to_json=True))], + "config": self.get_default_config(number_of_log_lines=log_lines, filter_fields=filter_fields), "decision_maker": _decision_maker }, { "elastic_results": [(get_fixture(self.log_message, to_json=True), - get_fixture( - self.two_hits_search_rs_explained, to_json=True)), + get_fixture(self.two_hits_search_rs_explained, to_json=True)), (get_fixture(self.log_message, to_json=True), - get_fixture( - self.one_hit_search_rs_explained, to_json=True))], - "config": self.get_default_config(number_of_log_lines=log_lines, - filter_fields=filter_fields), + get_fixture(self.one_hit_search_rs_explained, to_json=True))], + "config": self.get_default_config(number_of_log_lines=log_lines, filter_fields=filter_fields), "decision_maker": _decision_maker }, - # TODO: uncomment after similarity and data obfuscation fix - # { - # "elastic_results": [(self.get_fixture(self.log_message_only_small_logs), - # self.get_fixture(self.two_hits_search_rs_small_logs))], - # "config": TestBoostingModel.get_default_config( - # number_of_log_lines=log_lines, - # filter_fields=filter_fields, - # time_weight_decay=0.95, - # min_should_match=0.4 - # ), - # "decision_maker": _decision_maker - # }, ]) for idx, test in enumerate(tests): - feature_ids = test["decision_maker"].get_feature_ids() - feature_dict_objects = test["decision_maker"].features_dict_with_saved_objects + print(f'Running test {idx}') + feature_ids = test["decision_maker"].feature_ids weight_log_sim = None if self.weights_folder.strip(): - weight_log_sim = weighted_similarity_calculator. \ - WeightedSimilarityCalculator(folder=self.weights_folder) - _boosting_featurizer = BoostingFeaturizer(test["elastic_results"], - test["config"], - feature_ids, - weighted_log_similarity_calculator=weight_log_sim, - features_dict_with_saved_objects=feature_dict_objects) + weight_log_sim = WeightedSimilarityCalculator(create_filesystem(self.weights_folder)) + weight_log_sim.load_model() + _boosting_featurizer = BoostingFeaturizer( + test["elastic_results"], test["config"], feature_ids, + weighted_log_similarity_calculator=weight_log_sim) if self.global_defect_type_model_folder.strip(): - _boosting_featurizer.set_defect_type_model( - defect_type_model.DefectTypeModel(folder=self.global_defect_type_model_folder)) - with sure.ensure('Error in the test case index: {0}', idx): - gathered_data, issue_type_names = _boosting_featurizer.gather_features_info() - predict_label, predict_probability = test["decision_maker"].predict( - gathered_data) - gathered_data.should.equal(boost_model_results[str(idx)][0], epsilon=self.epsilon) - predict_label.tolist().should.equal(boost_model_results[str(idx)][1], epsilon=self.epsilon) - predict_probability.tolist().should.equal(boost_model_results[str(idx)][2], - epsilon=self.epsilon) + model = DefectTypeModel(create_filesystem(self.global_defect_type_model_folder)) + model.load_model() + _boosting_featurizer.set_defect_type_model(model) + gathered_data, issue_type_names = _boosting_featurizer.gather_features_info() + predict_label, predict_probability = test["decision_maker"].predict(gathered_data) + assert gathered_data == boost_model_results[str(idx)][0] + assert predict_label == boost_model_results[str(idx)][1] + assert predict_probability == boost_model_results[str(idx)][2] @utils.ignore_warnings def test_full_data_check_suggests(self): print("Boost model folder suggests: ", self.suggest_boost_model_folder) print("Weights model folder suggests: ", self.weights_folder) print("Global defect type model folder : ", self.global_defect_type_model_folder) - decision_maker = BoostingDecisionMaker(folder=self.suggest_boost_model_folder) + decision_maker = BoostingDecisionMaker(create_filesystem(self.suggest_boost_model_folder)) + decision_maker.load_model() boost_model_results = get_fixture(self.suggest_boost_model_results, to_json=True) tests = [] all_configs = [(-1, @@ -184,74 +161,58 @@ def test_full_data_check_suggests(self): tests.extend([ { "elastic_results": [(get_fixture(self.log_message_suggest, to_json=True), - get_fixture( - self.one_hit_search_rs_explained, to_json=True))], + get_fixture(self.one_hit_search_rs_explained, to_json=True))], "config": TestBoostingModel.get_default_config( - number_of_log_lines=log_lines, - filter_fields=[], - filter_fields_any=filter_fields_any, + number_of_log_lines=log_lines, filter_fields=[], filter_fields_any=filter_fields_any, min_should_match=0.4), "decision_maker": _decision_maker }, { "elastic_results": [(get_fixture(self.log_message_suggest, to_json=True), - get_fixture( - self.two_hits_search_rs_explained, to_json=True))], + get_fixture(self.two_hits_search_rs_explained, to_json=True))], "config": TestBoostingModel.get_default_config( - number_of_log_lines=log_lines, - filter_fields=[], - filter_fields_any=filter_fields_any, + number_of_log_lines=log_lines, filter_fields=[], filter_fields_any=filter_fields_any, min_should_match=0.4), "decision_maker": _decision_maker }, { "elastic_results": [(get_fixture(self.log_message_suggest, to_json=True), - get_fixture( - self.two_hits_search_rs_explained, to_json=True)), + get_fixture(self.two_hits_search_rs_explained, to_json=True)), (get_fixture(self.log_message_suggest, to_json=True), - get_fixture( - self.one_hit_search_rs_explained, to_json=True))], + get_fixture(self.one_hit_search_rs_explained, to_json=True))], "config": TestBoostingModel.get_default_config( - number_of_log_lines=log_lines, - filter_fields=[], - filter_fields_any=filter_fields_any, + number_of_log_lines=log_lines, filter_fields=[], filter_fields_any=filter_fields_any, min_should_match=0.4), "decision_maker": _decision_maker }, { "elastic_results": [(get_fixture(self.log_message_only_small_logs, to_json=True), - get_fixture( - self.two_hits_search_rs_small_logs, to_json=True))], + get_fixture(self.two_hits_search_rs_small_logs, to_json=True))], "config": TestBoostingModel.get_default_config( - number_of_log_lines=log_lines, - filter_fields=[], - filter_fields_any=filter_fields_any, + number_of_log_lines=log_lines, filter_fields=[], filter_fields_any=filter_fields_any, min_should_match=0.0), "decision_maker": _decision_maker }, ]) for idx, test in enumerate(tests): - feature_ids = test["decision_maker"].get_feature_ids() - feature_dict_objects = test["decision_maker"].features_dict_with_saved_objects + print(f'Running test {idx}') + feature_ids = test["decision_maker"].feature_ids weight_log_sim = None if self.weights_folder.strip(): - weight_log_sim = weighted_similarity_calculator. \ - WeightedSimilarityCalculator(folder=self.weights_folder) + weight_log_sim = WeightedSimilarityCalculator(create_filesystem(self.weights_folder)) + weight_log_sim.load_model() _boosting_featurizer = SuggestBoostingFeaturizer( test["elastic_results"], test["config"], feature_ids, - weighted_log_similarity_calculator=weight_log_sim, - features_dict_with_saved_objects=feature_dict_objects) + weighted_log_similarity_calculator=weight_log_sim) if self.global_defect_type_model_folder.strip(): - _boosting_featurizer.set_defect_type_model( - defect_type_model.DefectTypeModel(folder=self.global_defect_type_model_folder)) - with sure.ensure('Error in the test case index: {0}', idx): - gathered_data, test_item_ids = _boosting_featurizer.gather_features_info() - predict_label, predict_probability = test["decision_maker"].predict(gathered_data) - gathered_data.should.equal(boost_model_results[str(idx)][0], - epsilon=self.epsilon) - predict_label.tolist().should.equal(boost_model_results[str(idx)][1], - epsilon=self.epsilon) - predict_probability.tolist().should.equal(boost_model_results[str(idx)][2], - epsilon=self.epsilon) + model = DefectTypeModel(create_filesystem(self.global_defect_type_model_folder)) + model.load_model() + _boosting_featurizer.set_defect_type_model(model) + + gathered_data, test_item_ids = _boosting_featurizer.gather_features_info() + predict_label, predict_probability = test["decision_maker"].predict(gathered_data) + assert gathered_data == boost_model_results[str(idx)][0] + assert predict_label == boost_model_results[str(idx)][1] + assert predict_probability == boost_model_results[str(idx)][2] diff --git a/test/mock_service.py b/test/mock_service.py index d0e0d42f..53ce7b28 100644 --- a/test/mock_service.py +++ b/test/mock_service.py @@ -31,11 +31,14 @@ import httpretty from app.commons import model_chooser +from app.commons.model.launch_objects import SearchConfig, ApplicationConfig from app.utils import utils class TestService(unittest.TestCase): ERROR_LOGGING_LEVEL = 40000 + model_settings: dict + app_config: ApplicationConfig @utils.ignore_warnings def setUp(self): @@ -165,30 +168,34 @@ def setUp(self): self.launch_w_test_items_w_logs_with_clusters = "launch_w_test_items_w_logs_with_clusters.json" self.index_logs_rq_big_messages_with_clusters = \ "index_logs_rq_big_messages_with_clusters.json" - self.app_config = { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "", - "esChunkNumberUpdateClusters": 500 - } - self.model_settings = utils.read_json_file("res", "model_settings.json", to_json=True) + self.app_config = ApplicationConfig( + esHost='http://localhost:9200', + esUser="", + esPassword="", + esVerifyCerts=False, + esUseSsl=False, + esSslShowWarn=False, + turnOffSslVerification=True, + esCAcert="", + esClientCert="", + esClientKey="", + appVersion="", + minioRegion="", + minioBucketPrefix="", + filesystemDefaultPath="", + esChunkNumber=1000, + binaryStoreType="filesystem", + minioHost="", + minioAccessKey="", + minioSecretKey="", + esProjectIndexPrefix="", + esChunkNumberUpdateClusters=500 + ) + model_settings = utils.read_json_file('res', 'model_settings.json', to_json=True) + if model_settings and isinstance(model_settings, dict): + self.model_settings = model_settings + else: + raise RuntimeError('Failed to read model settings') self.model_chooser = model_chooser.ModelChooser(self.app_config, self.get_default_search_config()) logging.disable(logging.CRITICAL) @@ -197,62 +204,44 @@ def tearDown(self): logging.disable(logging.DEBUG) @utils.ignore_warnings - def get_default_search_config(self): + def get_default_search_config(self) -> SearchConfig: """Get default search config""" - return { - "MinShouldMatch": "80%", - "MinTermFreq": 1, - "MinDocFreq": 1, - "BoostAA": -2, - "BoostLaunch": 2, - "BoostTestCaseHash": 2, - "MaxQueryTerms": 50, - "SearchLogsMinShouldMatch": "95%", - "SearchLogsMinSimilarity": 0.95, - "MinWordLength": 0, - "TimeWeightDecay": 0.95, - "PatternLabelMinPercentToSuggest": 0.5, - "PatternLabelMinCountToSuggest": 5, - "PatternMinCountToSuggest": 10, - "BoostModelFolder": - self.model_settings["BOOST_MODEL_FOLDER"], - "SimilarityWeightsFolder": - self.model_settings["SIMILARITY_WEIGHTS_FOLDER"], - "SuggestBoostModelFolder": - self.model_settings["SUGGEST_BOOST_MODEL_FOLDER"], - "GlobalDefectTypeModelFolder": - self.model_settings["GLOBAL_DEFECT_TYPE_MODEL_FOLDER"], - "ProbabilityForCustomModelSuggestions": 0.9, - "ProbabilityForCustomModelAutoAnalysis": 0.1, - "RetrainSuggestBoostModelConfig": - self.model_settings["RETRAIN_SUGGEST_BOOST_MODEL_CONFIG"], - "RetrainAutoBoostModelConfig": - self.model_settings["RETRAIN_AUTO_BOOST_MODEL_CONFIG"], - "MaxSuggestionsNumber": 3, - "AutoAnalysisTimeout": 300, - "MaxAutoAnalysisItemsToProcess": 4000 - } + return SearchConfig( + MinShouldMatch='80%', + BoostAA=-2, + BoostLaunch=2, + BoostTestCaseHash=2, + MaxQueryTerms=50, + SearchLogsMinSimilarity=0.95, + MinWordLength=0, + TimeWeightDecay=0.95, + PatternLabelMinPercentToSuggest=0.5, + PatternLabelMinCountToSuggest=5, + PatternMinCountToSuggest=10, + BoostModelFolder=self.model_settings['BOOST_MODEL_FOLDER'], + SimilarityWeightsFolder=self.model_settings['SIMILARITY_WEIGHTS_FOLDER'], + SuggestBoostModelFolder=self.model_settings['SUGGEST_BOOST_MODEL_FOLDER'], + GlobalDefectTypeModelFolder=self.model_settings['GLOBAL_DEFECT_TYPE_MODEL_FOLDER'], + ProbabilityForCustomModelSuggestions=0.9, + ProbabilityForCustomModelAutoAnalysis=0.1, + MaxSuggestionsNumber=3, + AutoAnalysisTimeout=300, + MaxAutoAnalysisItemsToProcess=4000 + ) @utils.ignore_warnings def _start_server(self, test_calls): httpretty.reset() httpretty.enable(allow_net_connect=False) for test_info in test_calls: - if "content_type" in test_info: - httpretty.register_uri( - test_info["method"], - self.app_config["esHost"] + test_info["uri"], - body=test_info["rs"] if "rs" in test_info else "", - status=test_info["status"], - content_type=test_info["content_type"] - ) - else: - httpretty.register_uri( - test_info["method"], - self.app_config["esHost"] + test_info["uri"], - body=test_info["rs"] if "rs" in test_info else "", - status=test_info["status"] - ) + content_type = test_info.get('content_type', '') + httpretty.register_uri( + test_info["method"], + self.app_config.esHost + test_info["uri"], + body=test_info["rs"] if "rs" in test_info else "", + status=test_info["status"], + content_type=content_type + ) @staticmethod @utils.ignore_warnings @@ -260,14 +249,23 @@ def shutdown_server(test_calls): """Shutdown server and test request calls""" actual_calls = httpretty.latest_requests() assert len(actual_calls) == len(test_calls) - for expected_test_call, test_call in zip(test_calls, actual_calls): + for i, calls in enumerate(zip(test_calls, actual_calls)): + expected_test_call, test_call = calls assert expected_test_call["method"] == test_call.method assert expected_test_call["uri"] == test_call.path if "rq" in expected_test_call: expected_body = expected_test_call["rq"] real_body = test_call.parse_request_body(test_call.body) - if type(expected_body) == str and type(real_body) != str: + json_rq = False + if type(expected_body) is str and type(real_body) is not str: expected_body = json.loads(expected_body) - assert expected_body == real_body + json_rq = True + if expected_body != real_body: + print(f'Error in request {i}') + if json_rq: + expected_body = json.dumps(expected_body) + real_body = json.dumps(real_body) + print(f'Expected: {expected_body}') + print(f'Actual: {real_body}') + raise AssertionError(f'Error in request {i}') httpretty.disable() - httpretty.reset() diff --git a/test/service/test_analyzer_service.py b/test/service/test_analyzer_service.py index 82000048..30f5bcbe 100644 --- a/test/service/test_analyzer_service.py +++ b/test/service/test_analyzer_service.py @@ -20,12 +20,14 @@ import httpretty import sure -from app.boosting_decision_making.boosting_decision_maker import BoostingDecisionMaker -from app.commons import launch_objects +from app.commons import object_saving +from app.commons.model import launch_objects +from app.machine_learning.models.boosting_decision_maker import BoostingDecisionMaker from app.service import AutoAnalyzerService from app.utils import utils from test import get_fixture from test.mock_service import TestService +from test import APP_CONFIG class TestAutoAnalyzerService(TestService): @@ -203,28 +205,7 @@ def test_analyze_logs(self): self.launch_w_test_items_w_logs), "expected_count": 1, "expected_issue_type": "AB001", - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "boost_predict": ([1, 0], [[0.2, 0.8], [0.7, 0.3]]) }, { @@ -340,9 +321,7 @@ def test_analyze_logs(self): analyzer_service = AutoAnalyzerService(self.model_chooser, app_config=app_config, search_cfg=config) - _boosting_decision_maker = BoostingDecisionMaker() - _boosting_decision_maker.get_feature_ids = MagicMock(return_value=[0]) - _boosting_decision_maker.get_feature_names = MagicMock(return_value=["0"]) + _boosting_decision_maker = BoostingDecisionMaker(object_saving.create_filesystem(""), '', features=[0]) _boosting_decision_maker.predict = MagicMock(return_value=test["boost_predict"]) if "msearch_results" in test: analyzer_service.es_client.es_client.msearch = MagicMock( diff --git a/test/service/test_clean_index_service.py b/test/service/test_clean_index_service.py index 24b7b09b..18ac70cd 100644 --- a/test/service/test_clean_index_service.py +++ b/test/service/test_clean_index_service.py @@ -19,10 +19,10 @@ import httpretty -from app.commons import launch_objects +from app.commons.model import launch_objects from app.service.clean_index_service import CleanIndexService from app.utils import utils -from test import get_fixture +from test import get_fixture, APP_CONFIG from test.mock_service import TestService @@ -54,28 +54,7 @@ def test_clean_index(self): "status": HTTPStatus.NOT_FOUND, }, ], "rq": launch_objects.CleanIndex(ids=[1], project=2), - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "expected_count": 0 }, { @@ -278,54 +257,28 @@ def test_clean_index(self): "rs": get_fixture(self.delete_logs_rs), }], "rq": launch_objects.CleanIndex(ids=[1], project=1), - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "expected_count": 1 } ] for idx, test in enumerate(tests): - try: - self._start_server(test["test_calls"]) - app_config = self.app_config - if "app_config" in test: - app_config = test["app_config"] - _clean_index_service = CleanIndexService( - app_config=app_config, - search_cfg=self.get_default_search_config()) - _clean_index_service.es_client.es_client.scroll = MagicMock( - return_value=json.loads(get_fixture(self.no_hits_search_rs))) - _clean_index_service.suggest_info_service.es_client.es_client.scroll = MagicMock( - return_value=json.loads(get_fixture(self.no_hits_search_rs))) + print(f'Test case number: {idx}') + self._start_server(test["test_calls"]) + app_config = self.app_config + if "app_config" in test: + app_config = test["app_config"] + _clean_index_service = CleanIndexService(app_config=app_config) + _clean_index_service.es_client.es_client.scroll = MagicMock( + return_value=json.loads(get_fixture(self.no_hits_search_rs))) + _clean_index_service.suggest_info_service.es_client.es_client.scroll = MagicMock( + return_value=json.loads(get_fixture(self.no_hits_search_rs))) - response = _clean_index_service.delete_logs(test["rq"]) + response = _clean_index_service.delete_logs(test["rq"]) - assert test["expected_count"] == response + assert test["expected_count"] == response - TestCleanIndexService.shutdown_server(test["test_calls"]) - except AssertionError as err: - raise AssertionError(f'Error in the test case number: {idx}'). \ - with_traceback(err.__traceback__) + TestCleanIndexService.shutdown_server(test["test_calls"]) if __name__ == '__main__': diff --git a/test/service/test_cluster_service.py b/test/service/test_cluster_service.py index 5c2e8931..0fb37d9f 100644 --- a/test/service/test_cluster_service.py +++ b/test/service/test_cluster_service.py @@ -18,10 +18,10 @@ import httpretty from freezegun import freeze_time -from app.commons import launch_objects +from app.commons.model import launch_objects from app.service import ClusterService from app.utils import utils -from test import get_fixture +from test import get_fixture, APP_CONFIG from test.mock_service import TestService @@ -72,29 +72,7 @@ def test_find_clusters(self): project=2, forUpdate=False, numberOfLogLines=-1), - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_", - "esChunkNumberUpdateClusters": 500 - }, + "app_config": APP_CONFIG, "expected_result": launch_objects.ClusterResult( project=2, launchId=1, @@ -127,10 +105,8 @@ def test_find_clusters(self): "uri": "/_bulk?refresh=false", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.cluster_update), - "rs": get_fixture( - self.index_logs_rs), + "rq": get_fixture(self.cluster_update), + "rs": get_fixture(self.index_logs_rs), }], "launch_info": launch_objects.LaunchInfoForClustering( launch=launch_objects.Launch( @@ -144,13 +120,13 @@ def test_find_clusters(self): launchId=1, clusters=[ launch_objects.ClusterInfo( - clusterId=51305554424475301, - clusterMessage="error occured \r\n error found \r\n error mined", + clusterId=21874152824769751, + clusterMessage="error occurred \n error found \n error mined", logIds=[4, 5], itemIds=[2, 5]), launch_objects.ClusterInfo( - clusterId=2474938495021661, - clusterMessage="error occured \r\n error found \r\n assert query", + clusterId=44972330576749361, + clusterMessage="error occurred \n error found \n assert query", logIds=[9], itemIds=[6]) ]) @@ -164,19 +140,15 @@ def test_find_clusters(self): "uri": "/2/_search", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_first_group_2lines_not_for_update), - "rs": get_fixture( - self.no_hits_search_rs), + "rq": get_fixture(self.search_logs_rq_first_group_2lines_not_for_update), + "rs": get_fixture(self.no_hits_search_rs), }, {"method": httpretty.POST, "uri": "/_bulk?refresh=false", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.cluster_update_all_the_same), - "rs": get_fixture( - self.index_logs_rs), + "rq": get_fixture(self.cluster_update_all_the_same), + "rs": get_fixture(self.index_logs_rs), }], "launch_info": launch_objects.LaunchInfoForClustering( launch=launch_objects.Launch( @@ -190,8 +162,8 @@ def test_find_clusters(self): launchId=1, clusters=[ launch_objects.ClusterInfo( - clusterId="53490850438321651", - clusterMessage="error occured \r\n error found", + clusterId="48859729558090231", + clusterMessage="error occurred \n error found \n assert query", logIds=[4, 5, 9], itemIds=[2, 5, 6]) ]) @@ -205,33 +177,26 @@ def test_find_clusters(self): "uri": "/2/_search", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_first_group), - "rs": get_fixture( - self.no_hits_search_rs), + "rq": get_fixture(self.search_logs_rq_first_group), + "rs": get_fixture(self.no_hits_search_rs), }, {"method": httpretty.GET, "uri": "/2/_search", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_second_group), - "rs": get_fixture( - self.no_hits_search_rs), + "rq": get_fixture(self.search_logs_rq_second_group), + "rs": get_fixture(self.no_hits_search_rs), }, {"method": httpretty.POST, "uri": "/_bulk?refresh=false", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.cluster_update), - "rs": get_fixture( - self.index_logs_rs), + "rq": get_fixture(self.cluster_update), + "rs": get_fixture(self.index_logs_rs), }], "launch_info": launch_objects.LaunchInfoForClustering( launch=launch_objects.Launch( - **(get_fixture( - self.launch_w_items_clustering, to_json=True))), + **(get_fixture(self.launch_w_items_clustering, to_json=True))), project=2, forUpdate=True, numberOfLogLines=-1), @@ -240,13 +205,13 @@ def test_find_clusters(self): launchId=1, clusters=[ launch_objects.ClusterInfo( - clusterId="51305554424475301", - clusterMessage="error occured \r\n error found \r\n error mined", + clusterId="21874152824769751", + clusterMessage="error occurred \n error found \n error mined", logIds=[4, 5], itemIds=[2, 5]), launch_objects.ClusterInfo( - clusterId="2474938495021661", - clusterMessage="error occured \r\n error found \r\n assert query", + clusterId="44972330576749361", + clusterMessage="error occurred \n error found \n assert query", logIds=[9], itemIds=[6]), ]) @@ -260,28 +225,22 @@ def test_find_clusters(self): "uri": "/2/_search", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_first_group), - "rs": get_fixture( - self.one_hit_search_rs_clustering) + "rq": get_fixture(self.search_logs_rq_first_group), + "rs": get_fixture(self.one_hit_search_rs_clustering) }, {"method": httpretty.GET, "uri": "/2/_search", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_second_group), - "rs": get_fixture( - self.one_hit_search_rs_clustering) + "rq": get_fixture(self.search_logs_rq_second_group), + "rs": get_fixture(self.one_hit_search_rs_clustering) }, {"method": httpretty.POST, "uri": "/_bulk?refresh=false", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.cluster_update_es_update), - "rs": get_fixture( - self.index_logs_rs), + "rq": get_fixture(self.cluster_update_es_update), + "rs": get_fixture(self.index_logs_rs), }], "launch_info": launch_objects.LaunchInfoForClustering( launch=launch_objects.Launch( @@ -296,12 +255,12 @@ def test_find_clusters(self): clusters=[ launch_objects.ClusterInfo( clusterId="123", - clusterMessage="error occured \n error found \n error mined", + clusterMessage="error occurred \n error found \n error mined", logIds=[4, 5, 111], itemIds=[2, 5]), launch_objects.ClusterInfo( - clusterId="2474938495021661", - clusterMessage="error occured \r\n error found \r\n assert query", + clusterId="44972330576749361", + clusterMessage="error occurred \n error found \n assert query", logIds=[9], itemIds=[6]) ]) @@ -315,19 +274,15 @@ def test_find_clusters(self): "uri": "/2/_search", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_first_group_2lines), - "rs": get_fixture( - self.one_hit_search_rs_clustering), + "rq": get_fixture(self.search_logs_rq_first_group_2lines), + "rs": get_fixture(self.one_hit_search_rs_clustering), }, {"method": httpretty.POST, "uri": "/_bulk?refresh=false", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.cluster_update_all_the_same_es_update), - "rs": get_fixture( - self.index_logs_rs), + "rq": get_fixture(self.cluster_update_all_the_same_es_update), + "rs": get_fixture(self.index_logs_rs), }], "launch_info": launch_objects.LaunchInfoForClustering( launch=launch_objects.Launch( @@ -341,8 +296,8 @@ def test_find_clusters(self): launchId=1, clusters=[ launch_objects.ClusterInfo( - clusterId="53490850438321651", - clusterMessage="error occured \r\n error found", + clusterId="48859729558090231", + clusterMessage="error occurred \n error found \n assert query", logIds=[4, 5, 9], itemIds=[2, 5, 6]) ]) @@ -356,19 +311,15 @@ def test_find_clusters(self): "uri": "/rp_2/_search", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_first_group_2lines), - "rs": get_fixture( - self.one_hit_search_rs_clustering), + "rq": get_fixture(self.search_logs_rq_first_group_2lines), + "rs": get_fixture(self.one_hit_search_rs_clustering), }, {"method": httpretty.POST, "uri": "/_bulk?refresh=false", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.cluster_update_all_the_same_es_update_with_prefix), - "rs": get_fixture( - self.index_logs_rs), + "rq": get_fixture(self.cluster_update_all_the_same_es_update_with_prefix), + "rs": get_fixture(self.index_logs_rs), }], "launch_info": launch_objects.LaunchInfoForClustering( launch=launch_objects.Launch( @@ -377,36 +328,14 @@ def test_find_clusters(self): project=2, forUpdate=True, numberOfLogLines=2), - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_", - "esChunkNumberUpdateClusters": 500 - }, + "app_config": APP_CONFIG, "expected_result": launch_objects.ClusterResult( project=2, launchId=1, clusters=[ launch_objects.ClusterInfo( - clusterId="53490850438321651", - clusterMessage="error occured \r\n error found", + clusterId="48859729558090231", + clusterMessage="error occurred \n error found \n assert query", logIds=[4, 5, 9], itemIds=[2, 5, 6]) ]) @@ -429,28 +358,22 @@ def test_find_clusters(self): "uri": "/2/_search", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_first_group_assertion_error_status_code), - "rs": get_fixture( - self.no_hits_search_rs), + "rq": get_fixture(self.search_logs_rq_first_group_assertion_error_status_code), + "rs": get_fixture(self.no_hits_search_rs), }, {"method": httpretty.GET, "uri": "/2/_search", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_first_group_no_such_element), - "rs": get_fixture( - self.no_hits_search_rs), + "rq": get_fixture(self.search_logs_rq_first_group_no_such_element), + "rs": get_fixture(self.no_hits_search_rs), }, {"method": httpretty.POST, "uri": "/_bulk?refresh=false", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.cluster_update_all_the_same_es_with_different_errors), - "rs": get_fixture( - self.index_logs_rs), + "rq": get_fixture(self.cluster_update_all_the_same_es_with_different_errors), + "rs": get_fixture(self.index_logs_rs), }], "launch_info": launch_objects.LaunchInfoForClustering( launch=launch_objects.Launch( @@ -464,18 +387,19 @@ def test_find_clusters(self): launchId=1, clusters=[ launch_objects.ClusterInfo( - clusterId="66538501077545981", - clusterMessage="AssertionError error occured \r\n error found", + clusterId="37711525315085941", + clusterMessage="AssertionError error occurred \n error found \n error mined", logIds=[4], itemIds=[2]), launch_objects.ClusterInfo( - clusterId="30071099716448071", - clusterMessage="AssertionError status code: 500 error occured \r\n error found", + clusterId="48851059259117511", + clusterMessage="AssertionError status code: 500 error occurred \n error found \n error " + "mined", logIds=[5], itemIds=[5]), launch_objects.ClusterInfo( - clusterId="59521687023339221", - clusterMessage="NoSuchElementException error occured \r\n error found", + clusterId="90988898127574211", + clusterMessage="NoSuchElementException error occurred \n error found \n assert query", logIds=[9], itemIds=[6]), ]) @@ -489,37 +413,29 @@ def test_find_clusters(self): "uri": "/2/_search", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_first_group_small_logs), - "rs": get_fixture( - self.no_hits_search_rs), + "rq": get_fixture(self.search_logs_rq_first_group_small_logs), + "rs": get_fixture(self.no_hits_search_rs), }, {"method": httpretty.GET, "uri": "/2/_search", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_second_group_small_logs), - "rs": get_fixture( - self.no_hits_search_rs), + "rq": get_fixture(self.search_logs_rq_second_group_small_logs), + "rs": get_fixture(self.no_hits_search_rs), }, {"method": httpretty.GET, "uri": "/2/_search", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_first_group_no_such_element_all_log_lines), - "rs": get_fixture( - self.no_hits_search_rs), + "rq": get_fixture(self.search_logs_rq_first_group_no_such_element_all_log_lines), + "rs": get_fixture(self.no_hits_search_rs), }, {"method": httpretty.POST, "uri": "/_bulk?refresh=false", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.cluster_update_small_logs), - "rs": get_fixture( - self.index_logs_rs), + "rq": get_fixture(self.cluster_update_small_logs), + "rs": get_fixture(self.index_logs_rs), }], "launch_info": launch_objects.LaunchInfoForClustering( launch=launch_objects.Launch( @@ -533,19 +449,19 @@ def test_find_clusters(self): launchId=1, clusters=[ launch_objects.ClusterInfo( - clusterId="78342974021039661", - clusterMessage="error occured twice \r\nAssertionError error occured \r\n error found", + clusterId="60604459849884091", + clusterMessage="error occurred twice", # noqa logIds=[3, 4], itemIds=[2]), launch_objects.ClusterInfo( - clusterId="37054331802624341", - clusterMessage="AssertionError status code: 500 error occured", + clusterId="9398573272102061", + clusterMessage="AssertionError status code: 500 error occurred", logIds=[5], itemIds=[5]), launch_objects.ClusterInfo( - clusterId="16492834929015971", - clusterMessage="NoSuchElementException error occured \r\n error found \r\n assert query", + clusterId="86465058569810291", + clusterMessage="NoSuchElementException error occurred \n error found \n assert query", # noqa logIds=[9], itemIds=[6]), @@ -554,24 +470,21 @@ def test_find_clusters(self): ] for idx, test in enumerate(tests): - try: - self._start_server(test["test_calls"]) - config = self.get_default_search_config() - app_config = self.app_config - if "app_config" in test: - app_config = test["app_config"] - _cluster_service = ClusterService(app_config=app_config, - search_cfg=config) + print(f'Test case number: {idx}') + self._start_server(test["test_calls"]) + config = self.get_default_search_config() + app_config = self.app_config + if "app_config" in test: + app_config = test["app_config"] + _cluster_service = ClusterService(app_config=app_config, + search_cfg=config) - response = _cluster_service.find_clusters(test["launch_info"]) + response = _cluster_service.find_clusters(test["launch_info"]) - assert len(response.clusters) == len(test["expected_result"].clusters) - assert test["expected_result"] == response + assert len(response.clusters) == len(test["expected_result"].clusters) + assert response == test["expected_result"] - TestClusterService.shutdown_server(test["test_calls"]) - except AssertionError as err: - raise AssertionError(f'Error in the test case number: {idx}'). \ - with_traceback(err.__traceback__) + TestClusterService.shutdown_server(test["test_calls"]) if __name__ == '__main__': diff --git a/test/service/test_delete_index_service.py b/test/service/test_delete_index_service.py index 8ad88c47..1e3ad8e7 100644 --- a/test/service/test_delete_index_service.py +++ b/test/service/test_delete_index_service.py @@ -19,7 +19,7 @@ from app.service import DeleteIndexService from app.utils import utils -from test import get_fixture +from test import get_fixture, APP_CONFIG from test.mock_service import TestService @@ -57,28 +57,7 @@ def test_delete_index(self): "rs": get_fixture(self.index_not_found_rs), }, ], "index": 2, - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "result": False, }, ] diff --git a/test/service/test_esquery.py b/test/service/test_esquery.py index 5136c7b2..72289e08 100644 --- a/test/service/test_esquery.py +++ b/test/service/test_esquery.py @@ -15,8 +15,9 @@ import logging import unittest -from app.commons import launch_objects +from app.commons.model import launch_objects from app.commons import model_chooser +from app.commons.model.launch_objects import SearchConfig from app.service import AutoAnalyzerService from app.service import SearchService from app.service import SuggestService @@ -26,6 +27,8 @@ class TestEsQuery(unittest.TestCase): """Tests building analyze query""" + model_settings: dict + app_config: launch_objects.ApplicationConfig @utils.ignore_warnings def setUp(self): @@ -49,21 +52,27 @@ def setUp(self): self.query_analyze_items_including_no_defect = "query_analyze_items_including_no_defect.json" self.query_analyze_items_including_no_defect_small_logs = \ "query_analyze_items_including_no_defect_small_logs.json" - self.app_config = { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "esChunkNumber": 1000 - } - self.model_settings = utils.read_json_file("res", "model_settings.json", to_json=True) + self.app_config = launch_objects.ApplicationConfig( + esHost="http://localhost:9200", + esUser="", + esPassword="", + esVerifyCerts=False, + esUseSsl=False, + esSslShowWarn=False, + turnOffSslVerification=True, + esCAcert="", + esClientCert="", + esClientKey="", + appVersion="", + esChunkNumber=1000, + binaryStoreType='filesystem', + filesystemDefaultPath='' + ) + model_settings = utils.read_json_file('res', 'model_settings.json', to_json=True) + if model_settings and isinstance(model_settings, dict): + self.model_settings = model_settings + else: + raise RuntimeError('Failed to read model settings') self.model_chooser = model_chooser.ModelChooser(self.app_config, self.get_default_search_config()) logging.disable(logging.CRITICAL) @@ -72,33 +81,22 @@ def tearDown(self): logging.disable(logging.DEBUG) @utils.ignore_warnings - def get_default_search_config(self): + def get_default_search_config(self) -> SearchConfig: """Get default search config""" - return { - "MinShouldMatch": "80%", - "MinTermFreq": 1, - "MinDocFreq": 1, - "BoostAA": -10, - "BoostLaunch": 5, - "BoostTestCaseHash": 3, - "MaxQueryTerms": 50, - "SearchLogsMinShouldMatch": "90%", - "SearchLogsMinSimilarity": 0.9, - "MinWordLength": 0, - "BoostModelFolder": - self.model_settings["BOOST_MODEL_FOLDER"], - "SimilarityWeightsFolder": - self.model_settings["SIMILARITY_WEIGHTS_FOLDER"], - "SuggestBoostModelFolder": - self.model_settings["SUGGEST_BOOST_MODEL_FOLDER"], - "GlobalDefectTypeModelFolder": - self.model_settings["GLOBAL_DEFECT_TYPE_MODEL_FOLDER"], - "TimeWeightDecay": 0.95, - "RetrainSuggestBoostModelConfig": - self.model_settings["RETRAIN_SUGGEST_BOOST_MODEL_CONFIG"], - "RetrainAutoBoostModelConfig": - self.model_settings["RETRAIN_AUTO_BOOST_MODEL_CONFIG"] - } + return SearchConfig( + MinShouldMatch='80%', + BoostAA=-10, + BoostLaunch=5, + BoostTestCaseHash=3, + MaxQueryTerms=50, + SearchLogsMinSimilarity=0.9, + MinWordLength=0, + BoostModelFolder=self.model_settings['BOOST_MODEL_FOLDER'], + SimilarityWeightsFolder=self.model_settings['SIMILARITY_WEIGHTS_FOLDER'], + SuggestBoostModelFolder=self.model_settings['SUGGEST_BOOST_MODEL_FOLDER'], + GlobalDefectTypeModelFolder=self.model_settings['GLOBAL_DEFECT_TYPE_MODEL_FOLDER'], + TimeWeightDecay=0.95, + ) @utils.ignore_warnings def test_build_analyze_query_all_logs_empty_stacktrace(self): diff --git a/test/service/test_retraining_service.py b/test/service/test_retraining_service.py index 965ba96d..beccf4ac 100644 --- a/test/service/test_retraining_service.py +++ b/test/service/test_retraining_service.py @@ -13,8 +13,9 @@ # limitations under the License. import unittest -from unittest.mock import MagicMock +from unittest import mock +from app.commons.model.ml import TrainInfo, ModelType from app.service import RetrainingService from app.utils import utils from test.mock_service import TestService @@ -27,166 +28,128 @@ def test_train_models_triggering(self): """Test train models triggering""" tests = [ { - "train_info": {"model_type": "defect_type", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.defect_type, project=1, gathered_metric_total=5), "trigger_info": {}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "defect_type", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.defect_type, project=1, gathered_metric_total=5), "trigger_info": {"gathered_metric_total": 120}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "defect_type", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.defect_type, project=1, gathered_metric_total=5), "trigger_info": {"gathered_metric_total": 94, "gathered_metric_since_training": 94}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "defect_type", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.defect_type, project=1, gathered_metric_total=5), "trigger_info": {"gathered_metric_total": 95, "gathered_metric_since_training": 95}, "train_result": (123, {}), - "is_model_trained": 1 + "is_model_trained": True }, { - "train_info": {"model_type": "defect_type", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.defect_type, project=1, gathered_metric_total=5), "trigger_info": {"gathered_metric_total": 120, "gathered_metric_since_training": 67}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "defect_type", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.defect_type, project=1, gathered_metric_total=5), "trigger_info": {"gathered_metric_total": 85, "gathered_metric_since_training": 95}, "train_result": (123, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "suggestion", - "project_id": 1, - "gathered_metric_total": 3}, + "train_info": TrainInfo(model_type=ModelType.suggestion, project=1, gathered_metric_total=3), "trigger_info": {}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "suggestion", - "project_id": 1, - "gathered_metric_total": 3}, + "train_info": TrainInfo(model_type=ModelType.suggestion, project=1, gathered_metric_total=3), "trigger_info": {"gathered_metric_total": 14}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "suggestion", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.suggestion, project=1, gathered_metric_total=5), "trigger_info": {"gathered_metric_total": 90, "gathered_metric_since_training": 35}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "suggestion", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.suggestion, project=1, gathered_metric_total=5), "trigger_info": {"gathered_metric_total": 120, "gathered_metric_since_training": 30}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "suggestion", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.suggestion, project=1, gathered_metric_total=5), "trigger_info": {"gathered_metric_total": 95, "gathered_metric_since_training": 45}, "train_result": (100, {}), - "is_model_trained": 1 + "is_model_trained": True }, { - "train_info": {"model_type": "suggestion", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.suggestion, project=1, gathered_metric_total=5), "trigger_info": {"gathered_metric_total": 89, "gathered_metric_since_training": 55}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "auto_analysis", - "project_id": 1, - "gathered_metric_total": 3}, + "train_info": TrainInfo(model_type=ModelType.auto_analysis, project=1, gathered_metric_total=3), "trigger_info": {}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "auto_analysis", - "project_id": 1, - "gathered_metric_total": 3}, + "train_info": TrainInfo(model_type=ModelType.auto_analysis, project=1, gathered_metric_total=3), "trigger_info": {"gathered_metric_total": 14}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "auto_analysis", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.auto_analysis, project=1, gathered_metric_total=5), "trigger_info": {"gathered_metric_total": 290, "gathered_metric_since_training": 92}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "auto_analysis", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.auto_analysis, project=1, gathered_metric_total=5), "trigger_info": {"gathered_metric_total": 301, "gathered_metric_since_training": 93}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False }, { - "train_info": {"model_type": "auto_analysis", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.auto_analysis, project=1, gathered_metric_total=5), "trigger_info": {"gathered_metric_total": 295, "gathered_metric_since_training": 95}, "train_result": (100, {}), - "is_model_trained": 1 + "is_model_trained": True }, { - "train_info": {"model_type": "auto_analysis", - "project_id": 1, - "gathered_metric_total": 5}, + "train_info": TrainInfo(model_type=ModelType.auto_analysis, project=1, gathered_metric_total=5), "trigger_info": {"gathered_metric_total": 291, "gathered_metric_since_training": 95}, "train_result": (0, {}), - "is_model_trained": 0 + "is_model_trained": False } ] for idx, test in enumerate(tests): - try: - _retraining_service = RetrainingService(self.model_chooser, - app_config=self.app_config, - search_cfg=self.get_default_search_config()) - model_triggering = _retraining_service.trigger_manager.model_training_triggering - model_triggering = model_triggering[test["train_info"]["model_type"]] - model_triggering[0].object_saver.get_project_object = MagicMock( - return_value=test["trigger_info"]) - model_triggering[1].train = MagicMock( - return_value=test["train_result"]) - response = _retraining_service.train_models(test["train_info"]) - assert test["is_model_trained"] == response - except AssertionError as err: - raise AssertionError(f'Error in the test case number: {idx}'). \ - with_traceback(err.__traceback__) + print(f'Test case idx: {idx}') + _retraining_service = RetrainingService(self.model_chooser, app_config=self.app_config, + search_cfg=self.get_default_search_config()) + model_triggering = _retraining_service.trigger_manager.model_training_triggering + model_triggering = model_triggering[test["train_info"].model_type] + model_triggering[0].object_saver.get_project_object = mock.Mock(return_value=test["trigger_info"]) + train_mock = mock.Mock(return_value=test["train_result"]) + model_triggering[1].train = train_mock + _retraining_service.train_models(test["train_info"]) + if test["is_model_trained"]: + train_mock.assert_called_once() + else: + train_mock.assert_not_called() if __name__ == '__main__': diff --git a/test/service/test_search_service.py b/test/service/test_search_service.py index 411b7a46..3832eea2 100644 --- a/test/service/test_search_service.py +++ b/test/service/test_search_service.py @@ -21,10 +21,10 @@ import httpretty -from app.commons import launch_objects +from app.commons.model import launch_objects from app.service import SearchService from app.utils import utils -from test import get_fixture +from test import get_fixture, APP_CONFIG from test.mock_service import TestService @@ -76,28 +76,7 @@ def test_search_logs(self): filteredLaunchIds=[1], logMessages=["error"], logLines=-1), - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "expected_count": 0 }, { @@ -164,7 +143,7 @@ def test_search_logs(self): itemId=3, projectId=1, filteredLaunchIds=[1], - logMessages=["error occured once"], + logMessages=["error occurred once"], logLines=-1), "expected_count": 1 }, @@ -177,19 +156,15 @@ def test_search_logs(self): "uri": "/1/_search?scroll=5m&size=1000", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_with_status_codes), - "rs": get_fixture( - self.two_hits_search_rs_search_logs_with_status_codes), + "rq": get_fixture(self.search_logs_rq_with_status_codes), + "rs": get_fixture(self.two_hits_search_rs_search_logs_with_status_codes), }, {"method": httpretty.GET, "uri": "/1/_search?scroll=5m&size=1000", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_not_merged_logs_by_test_item), - "rs": get_fixture( - self.two_hits_search_rs_search_logs_with_status_codes), + "rq": get_fixture(self.search_not_merged_logs_by_test_item), + "rs": get_fixture(self.two_hits_search_rs_search_logs_with_status_codes), }], "rq": launch_objects.SearchLogs( launchId=1, @@ -197,10 +172,10 @@ def test_search_logs(self): itemId=3, projectId=1, filteredLaunchIds=[1], - logMessages=["error occured once status code: 500 but got 200"], + logMessages=["error occurred once status code: 500 but got 200"], logLines=-1), "expected_count": 1, - "response": [launch_objects.SearchLogInfo(logId=2, testItemId=1, matchScore=100)] + "response": [launch_objects.SearchLogInfo(logId=2, testItemId=1, matchScore=95)] }, { "test_calls": [{"method": httpretty.GET, @@ -211,49 +186,24 @@ def test_search_logs(self): "uri": "/rp_1/_search?scroll=5m&size=1000", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_not_found), - "rs": get_fixture( - self.two_hits_search_rs_search_logs), + "rq": get_fixture(self.search_logs_rq_not_found), + "rs": get_fixture(self.two_hits_search_rs_search_logs), }, {"method": httpretty.GET, "uri": "/rp_1/_search?scroll=5m&size=1000", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_not_merged_logs_by_test_item), - "rs": get_fixture( - self.two_hits_search_rs_search_logs), + "rq": get_fixture(self.search_not_merged_logs_by_test_item), + "rs": get_fixture(self.two_hits_search_rs_search_logs), }], "rq": launch_objects.SearchLogs(launchId=1, launchName="Launch 1", itemId=3, projectId=1, filteredLaunchIds=[1], - logMessages=["error occured once"], + logMessages=["error occurred once"], logLines=-1), - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "expected_count": 1, "response": [launch_objects.SearchLogInfo(logId=1, testItemId=1, matchScore=100)] }, @@ -266,26 +216,22 @@ def test_search_logs(self): "uri": "/1/_search?scroll=5m&size=1000", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_logs_rq_not_found), - "rs": get_fixture( - self.two_hits_search_rs_search_logs), + "rq": get_fixture(self.search_logs_rq_not_found), + "rs": get_fixture(self.two_hits_search_rs_search_logs), }, {"method": httpretty.GET, "uri": "/1/_search?scroll=5m&size=1000", "status": HTTPStatus.OK, "content_type": "application/json", - "rq": get_fixture( - self.search_not_merged_logs_by_test_item), - "rs": get_fixture( - self.two_hits_search_rs_search_logs), + "rq": get_fixture(self.search_not_merged_logs_by_test_item), + "rs": get_fixture(self.two_hits_search_rs_search_logs), }], "rq": launch_objects.SearchLogs(launchId=1, launchName="Launch 1", itemId=3, projectId=1, filteredLaunchIds=[1], - logMessages=["error occured once"], + logMessages=["error occurred once"], logLines=-1, analyzerConfig=launch_objects.AnalyzerConf( allMessagesShouldMatch=True)), @@ -295,26 +241,22 @@ def test_search_logs(self): ] for idx, test in enumerate(tests): - try: - self._start_server(test["test_calls"]) - app_config = self.app_config - if "app_config" in test: - app_config = test["app_config"] - search_service = SearchService(app_config=app_config, - search_cfg=self.get_default_search_config()) + print(f'Running test case idx: {idx}') + self._start_server(test["test_calls"]) + app_config = self.app_config + if "app_config" in test: + app_config = test["app_config"] + search_service = SearchService(app_config=app_config, search_cfg=self.get_default_search_config()) - search_service.es_client.es_client.scroll = MagicMock(return_value=json.loads( - get_fixture(self.no_hits_search_rs))) + search_service.es_client.es_client.scroll = MagicMock(return_value=json.loads( + get_fixture(self.no_hits_search_rs))) - response = search_service.search_logs(test["rq"]) - assert len(response) == test["expected_count"] - if "response" in test: - assert response == test["response"] + response = search_service.search_logs(test["rq"]) + assert len(response) == test["expected_count"] + if "response" in test: + assert response == test["response"] - TestSearchService.shutdown_server(test["test_calls"]) - except AssertionError as err: - raise AssertionError(f'Error in the test case number: {idx}'). \ - with_traceback(err.__traceback__) + TestSearchService.shutdown_server(test["test_calls"]) if __name__ == '__main__': diff --git a/test/service/test_suggest_info_service.py b/test/service/test_suggest_info_service.py index 316a9b7d..60d3f671 100644 --- a/test/service/test_suggest_info_service.py +++ b/test/service/test_suggest_info_service.py @@ -21,11 +21,12 @@ import httpretty -from app.commons import launch_objects +from app.commons.model import launch_objects from app.service import SuggestInfoService from app.utils import utils from test import get_fixture from test.mock_service import TestService +from test import APP_CONFIG class TestSuggestInfoService(TestService): @@ -86,28 +87,7 @@ def test_clean_suggest_info_logs(self): self.delete_suggest_logs_rq_with_prefix), "rs": get_fixture(self.delete_logs_rs), }], - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "rq": launch_objects.CleanIndex(ids=[1], project=1), "expected_count": 1 } @@ -119,8 +99,7 @@ def test_clean_suggest_info_logs(self): app_config = self.app_config if "app_config" in test: app_config = test["app_config"] - suggest_info_service = SuggestInfoService(app_config=app_config, - search_cfg=self.get_default_search_config()) + suggest_info_service = SuggestInfoService(app_config=app_config) suggest_info_service.es_client.es_client.scroll = MagicMock( return_value=json.loads(get_fixture(self.no_hits_search_rs))) @@ -162,28 +141,7 @@ def test_delete_suggest_info_index(self): "content_type": "application/json", "rs": get_fixture(self.index_not_found_rs), }, ], - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "index": 2, "result": False, } @@ -194,8 +152,7 @@ def test_delete_suggest_info_index(self): app_config = self.app_config if "app_config" in test: app_config = test["app_config"] - suggest_info_service = SuggestInfoService(app_config=app_config, - search_cfg=self.get_default_search_config()) + suggest_info_service = SuggestInfoService(app_config=app_config) response = suggest_info_service.remove_suggest_info(test["index"]) @@ -317,28 +274,7 @@ def test_index_suggest_info_logs(self): "content_type": "application/json", "rs": get_fixture(self.index_logs_rs), }], - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "index_rq": get_fixture(self.suggest_info_list), "has_errors": False, "expected_count": 2 @@ -351,8 +287,7 @@ def test_index_suggest_info_logs(self): app_config = self.app_config if "app_config" in test: app_config = test["app_config"] - suggest_info_service = SuggestInfoService(app_config=app_config, - search_cfg=self.get_default_search_config()) + suggest_info_service = SuggestInfoService(app_config=app_config) response = suggest_info_service.index_suggest_info( [launch_objects.SuggestAnalysisResult(**res) for res in json.loads(test["index_rq"])]) @@ -409,28 +344,7 @@ def test_remove_test_items_suggests(self): self.delete_by_query_suggest_1), "rs": json.dumps({"deleted": 3}), }], - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "item_remove_info": { "project": 1, "itemsToDelete": [1, 2]}, @@ -444,8 +358,7 @@ def test_remove_test_items_suggests(self): app_config = self.app_config if "app_config" in test: app_config = test["app_config"] - suggest_info_service = SuggestInfoService(app_config=app_config, - search_cfg=self.get_default_search_config()) + suggest_info_service = SuggestInfoService(app_config=app_config) response = suggest_info_service.clean_suggest_info_logs_by_test_item( test["item_remove_info"]) @@ -501,28 +414,7 @@ def test_remove_launches_suggests(self): self.delete_by_query_suggest_2), "rs": json.dumps({"deleted": 3}), }], - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "launch_remove_info": { "project": 1, "launch_ids": [1, 2]}, @@ -536,8 +428,7 @@ def test_remove_launches_suggests(self): app_config = self.app_config if "app_config" in test: app_config = test["app_config"] - suggest_info_service = SuggestInfoService(app_config=app_config, - search_cfg=self.get_default_search_config()) + suggest_info_service = SuggestInfoService(app_config=app_config) response = suggest_info_service.clean_suggest_info_logs_by_launch_id( test["launch_remove_info"]) @@ -627,8 +518,7 @@ def test_suggest_info_update(self): app_config = self.app_config if "app_config" in test: app_config = test["app_config"] - suggest_info_service = SuggestInfoService(app_config=app_config, - search_cfg=self.get_default_search_config()) + suggest_info_service = SuggestInfoService(app_config=app_config) suggest_info_service.es_client.es_client.scroll = MagicMock(return_value=json.loads( get_fixture(self.no_hits_search_rs))) response = suggest_info_service.update_suggest_info(test["defect_update_info"]) diff --git a/test/service/test_suggest_patterns_service.py b/test/service/test_suggest_patterns_service.py index 43410ecf..ebc19ae0 100644 --- a/test/service/test_suggest_patterns_service.py +++ b/test/service/test_suggest_patterns_service.py @@ -20,9 +20,10 @@ import httpretty -from app.commons import launch_objects +from app.commons.model import launch_objects from app.service import SuggestPatternsService from app.utils import utils +from test import APP_CONFIG from test.mock_service import TestService @@ -60,28 +61,7 @@ def test_suggest_patterns(self): "status": HTTPStatus.OK, }, ], - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "rq": 1, "query_data": [], "expected_count_with_labels": [], @@ -120,28 +100,7 @@ def test_suggest_patterns(self): "status": HTTPStatus.OK, }, ], - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "rq": 1, "query_data": [("assertionError notFoundError", "ab001"), ("assertionError ifElseError", "pb001"), diff --git a/test/service/test_suggest_service.py b/test/service/test_suggest_service.py index cffe4c0b..a38dc843 100644 --- a/test/service/test_suggest_service.py +++ b/test/service/test_suggest_service.py @@ -5,7 +5,7 @@ * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * -* http://www.apache.org/licenses/LICENSE-2.0 +* https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -21,11 +21,12 @@ import httpretty -from app.boosting_decision_making.boosting_decision_maker import BoostingDecisionMaker -from app.commons import launch_objects +from app.commons import object_saving +from app.commons.model import launch_objects +from app.machine_learning.models.boosting_decision_maker import BoostingDecisionMaker from app.service import SuggestService from app.utils import utils -from test import get_fixture +from test import get_fixture, APP_CONFIG from test.mock_service import TestService @@ -527,28 +528,7 @@ def test_suggest_items(self): "uri": "/rp_1", "status": HTTPStatus.OK, }], - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "msearch_results": [ get_fixture(self.one_hit_search_rs_merged, to_json=True), get_fixture(self.one_hit_search_rs_merged, to_json=True), @@ -630,28 +610,7 @@ def test_suggest_items(self): "content_type": "application/json", "rs": get_fixture(self.index_logs_rs), }], - "app_config": { - "esHost": "http://localhost:9200", - "esUser": "", - "esPassword": "", - "esVerifyCerts": False, - "esUseSsl": False, - "esSslShowWarn": False, - "turnOffSslVerification": True, - "esCAcert": "", - "esClientCert": "", - "esClientKey": "", - "appVersion": "", - "minioRegion": "", - "minioBucketPrefix": "", - "filesystemDefaultPath": "", - "esChunkNumber": 1000, - "binaryStoreType": "minio", - "minioHost": "", - "minioAccessKey": "", - "minioSecretKey": "", - "esProjectIndexPrefix": "rp_" - }, + "app_config": APP_CONFIG, "msearch_results": [ get_fixture(self.one_hit_search_rs_merged_wrong, to_json=True), get_fixture(self.one_hit_search_rs_merged_wrong, to_json=True), @@ -793,37 +752,29 @@ def test_suggest_items(self): ] for idx, test in enumerate(tests): - try: - self._start_server(test["test_calls"]) - config = self.get_default_search_config() - app_config = self.app_config - if "app_config" in test: - app_config = test["app_config"] - suggest_service = SuggestService(self.model_chooser, - app_config=app_config, - search_cfg=config) - suggest_service.es_client.es_client.scroll = MagicMock(return_value=json.loads( - get_fixture(self.no_hits_search_rs))) - if "msearch_results" in test: - suggest_service.es_client.es_client.msearch = MagicMock( - return_value={"responses": test["msearch_results"]}) - _boosting_decision_maker = BoostingDecisionMaker() - _boosting_decision_maker.get_feature_ids = MagicMock(return_value=[0]) - _boosting_decision_maker.get_feature_names = MagicMock(return_value=["0"]) - _boosting_decision_maker.predict = MagicMock(return_value=test["boost_predict"]) - suggest_service.model_chooser.choose_model = MagicMock( - return_value=_boosting_decision_maker) - response = suggest_service.suggest_items(test["test_item_info"]) + print(f'Running test case idx: {idx}') + self._start_server(test["test_calls"]) + config = self.get_default_search_config() + app_config = self.app_config + if "app_config" in test: + app_config = test["app_config"] + suggest_service = SuggestService(self.model_chooser, app_config=app_config, search_cfg=config) + suggest_service.es_client.es_client.scroll = MagicMock(return_value=json.loads( + get_fixture(self.no_hits_search_rs))) + if "msearch_results" in test: + suggest_service.es_client.es_client.msearch = MagicMock( + return_value={"responses": test["msearch_results"]}) + _boosting_decision_maker = BoostingDecisionMaker(object_saving.create_filesystem(""), '', features=[0]) + _boosting_decision_maker.predict = MagicMock(return_value=test["boost_predict"]) + suggest_service.model_chooser.choose_model = MagicMock(return_value=_boosting_decision_maker) + response = suggest_service.suggest_items(test["test_item_info"]) - assert len(response) == len(test["expected_result"]) - for real_resp, expected_resp in zip(response, test["expected_result"]): - real_resp.processedTime = 10.0 - assert real_resp == expected_resp + assert len(response) == len(test["expected_result"]) + for real_resp, expected_resp in zip(response, test["expected_result"]): + real_resp.processedTime = 10.0 + assert real_resp == expected_resp - TestSuggestService.shutdown_server(test["test_calls"]) - except AssertionError as err: - raise AssertionError(f'Error in the test case number: {idx}'). \ - with_traceback(err.__traceback__) + TestSuggestService.shutdown_server(test["test_calls"]) if __name__ == '__main__': diff --git a/test/unit/__init__.py b/test/unit/__init__.py new file mode 100644 index 00000000..c85b2e50 --- /dev/null +++ b/test/unit/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/test/unit/commons/__init__.py b/test/unit/commons/__init__.py new file mode 100644 index 00000000..c85b2e50 --- /dev/null +++ b/test/unit/commons/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/test/unit/commons/object_saving/__init__.py b/test/unit/commons/object_saving/__init__.py new file mode 100644 index 00000000..c85b2e50 --- /dev/null +++ b/test/unit/commons/object_saving/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/test/unit/commons/object_saving/test_filesystem_saver.py b/test/unit/commons/object_saving/test_filesystem_saver.py new file mode 100644 index 00000000..46b1cc89 --- /dev/null +++ b/test/unit/commons/object_saving/test_filesystem_saver.py @@ -0,0 +1,207 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +# noinspection PyPackageRequirements +import pytest + +from app.commons.object_saving.filesystem_saver import FilesystemSaver +from app.commons.model.launch_objects import ApplicationConfig +from test import random_alphanumeric + + +CREATED_FILES_AND_FOLDERS = [] + + +def create_storage_client(path): + return FilesystemSaver(ApplicationConfig(filesystemDefaultPath=path)) + + +def test_object_not_exists(): + base_path = f'test_{random_alphanumeric(16)}' + object_name = f'{random_alphanumeric(16)}.pickle' + file_system = create_storage_client(base_path) + + assert not file_system.does_object_exists('', object_name) + + +def test_object_exists(): + base_path = f'test_{random_alphanumeric(16)}' + object_name = f'{random_alphanumeric(16)}.pickle' + file_system = create_storage_client(base_path) + expected_path = os.path.join(base_path, object_name) + CREATED_FILES_AND_FOLDERS.append(expected_path) + CREATED_FILES_AND_FOLDERS.append(base_path) + + file_system.put_project_object({'test': True}, '', object_name) + + assert file_system.does_object_exists('', object_name) + + +def test_json_write(): + base_path = 'test' + object_name = f'{random_alphanumeric(16)}.json' + file_system = create_storage_client(base_path) + + expected_path = os.path.join(base_path, object_name) + + CREATED_FILES_AND_FOLDERS.append(expected_path) + CREATED_FILES_AND_FOLDERS.append(base_path) + + file_system.put_project_object({'test': True}, '', object_name, using_json=True) + + with open(expected_path, 'r') as f: + assert f.readline() == '{"test": true}' + + +def test_json_read(): + base_path = 'test' + object_name = f'{random_alphanumeric(16)}.json' + file_system = create_storage_client(base_path) + expected_path = os.path.join(base_path, object_name) + + CREATED_FILES_AND_FOLDERS.append(expected_path) + CREATED_FILES_AND_FOLDERS.append(base_path) + + with open(expected_path, 'w') as f: + f.writelines(['{"test": true}']) + + result = file_system.get_project_object('', object_name, using_json=True) + assert isinstance(result, dict) + assert result['test'] is True + + +def test_not_existing_file_get(): + base_path = 'test' + object_name = f'{random_alphanumeric(16)}.json' + file_system = create_storage_client(base_path) + expected_path = os.path.join(base_path, object_name) + + with pytest.raises(ValueError) as exc: + file_system.get_project_object('', object_name) + assert exc.value.args[0] == f'Unable to get file: {expected_path}' + + +def test_remove_not_existing_folder(): + base_path = f'test_{random_alphanumeric(16)}' + path = 'test' + file_system = create_storage_client(base_path) + + assert not file_system.remove_folder_objects('', path) + + +def test_remove_existing_folder(): + base_path = f'test_{random_alphanumeric(16)}' + path = 'test' + expected_path = os.path.join(base_path, path) + os.makedirs(expected_path) + CREATED_FILES_AND_FOLDERS.append(base_path) + + file_system = create_storage_client(base_path) + + assert file_system.remove_folder_objects('', path) + assert not os.path.exists(expected_path) + + +def test_list_not_existing_folder(): + base_path = f'test_{random_alphanumeric(16)}' + path = 'test' + file_system = create_storage_client(base_path) + + assert file_system.get_folder_objects('', path) == [] + + +def test_list_existing_folder(): + bucket = '6' + base_path = f'test_{random_alphanumeric(16)}' + object_name = f'{random_alphanumeric(16)}.json' + path = 'test' + resource = '/'.join([path, object_name]) + CREATED_FILES_AND_FOLDERS.append('/'.join([base_path, bucket, path, object_name])) + CREATED_FILES_AND_FOLDERS.append('/'.join([base_path, bucket, path])) + CREATED_FILES_AND_FOLDERS.append('/'.join([base_path, bucket])) + CREATED_FILES_AND_FOLDERS.append(base_path) + + file_system = create_storage_client(base_path) + file_system.put_project_object({'test': True}, bucket, resource, using_json=True) + + assert file_system.get_folder_objects(bucket, path) == [path] + + +def test_list_dir_separators(): + bucket = '7' + object_name = f'{random_alphanumeric(16)}.json' + path = 'test/' + resource = path + object_name + CREATED_FILES_AND_FOLDERS.append('/'.join([bucket, path, object_name])) + CREATED_FILES_AND_FOLDERS.append('/'.join([bucket, path])) + CREATED_FILES_AND_FOLDERS.append('/'.join([bucket])) + + file_system = create_storage_client('') + file_system.put_project_object({'test': True}, bucket, resource, using_json=True) + + assert file_system.get_folder_objects(bucket, path) == [resource] + + +def test_remove_project_objects(): + bucket = '8' + object_name = f'{random_alphanumeric(16)}.json' + path = 'test/' + resource = path + object_name + + file_system = create_storage_client('') + file_system.put_project_object({'test': True}, bucket, resource, using_json=True) + + file_system.remove_project_objects(bucket, [resource]) + with pytest.raises(ValueError): + file_system.get_project_object(bucket, resource) + + +@pytest.mark.parametrize('base_path', ['test_base_path', '']) +def test_base_path(base_path): + object_name = f'{random_alphanumeric(16)}.pickle' + file_system = create_storage_client(base_path) + + file_system.put_project_object({'test': True}, '', object_name) + + if base_path: + expected_path = os.path.join(base_path, object_name) + expected_directory = os.path.join(os.getcwd(), base_path) + CREATED_FILES_AND_FOLDERS.append(base_path) + else: + expected_path = object_name + expected_directory = os.getcwd() + CREATED_FILES_AND_FOLDERS.append(expected_path) + assert os.path.exists(expected_path) + assert os.path.isfile(expected_path) + + result = file_system.get_project_object('', object_name) + assert isinstance(result, dict) + assert result['test'] + + assert file_system.get_folder_objects('', '') == os.listdir(expected_directory) + + +@pytest.fixture(autouse=True, scope='session') +def clean_up(): + yield + for file in CREATED_FILES_AND_FOLDERS: + if os.path.exists(file): + if os.path.isdir(file): + try: + os.removedirs(file) + except OSError: + pass + else: + os.remove(file) diff --git a/test/unit/commons/object_saving/test_minio_client.py b/test/unit/commons/object_saving/test_minio_client.py new file mode 100644 index 00000000..3fa957b1 --- /dev/null +++ b/test/unit/commons/object_saving/test_minio_client.py @@ -0,0 +1,181 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# noinspection PyPackageRequirements +import pytest +import requests +# noinspection PyPackageRequirements +from moto.server import ThreadedMotoServer + +from app.commons.model.launch_objects import ApplicationConfig +from app.commons.object_saving.minio_client import MinioClient +from test import random_alphanumeric + +SERVER_PORT = 5123 +REGION = 'us-west-1' +BUCKET_PREFIX = 'prj-' +SERVER_HOST = f'localhost:{SERVER_PORT}' + + +@pytest.fixture(autouse=True, scope='session') +def run_s3(): + server = ThreadedMotoServer(port=SERVER_PORT, verbose=True) + server.start() + yield + server.stop() + + +def create_storage_client(): + return MinioClient(ApplicationConfig(minioHost=SERVER_HOST, minioRegion=REGION, minioBucketPrefix=BUCKET_PREFIX, + minioAccessKey='minio', minioSecretKey='minio', minioUseTls=False)) + + +def test_object_not_exists(): + object_name = f'{random_alphanumeric(16)}.pickle' + minio_client = create_storage_client() + + assert not minio_client.does_object_exists('2', object_name) + + +def test_object_exists(): + object_name = f'{random_alphanumeric(16)}.pickle' + minio_client = create_storage_client() + + minio_client.put_project_object({'test': True}, '2', object_name) + + assert minio_client.does_object_exists('2', object_name) + + +def get_url(bucket, object_name): + # noinspection HttpUrlsUsage + return f'http://{SERVER_HOST}/{BUCKET_PREFIX}{bucket}/{object_name}' + + +def test_json_write(): + bucket = '2' + object_name = 'SIED2wqgAppe4XPl.json' + minio_client = create_storage_client() + + minio_client.put_project_object({'test': True}, bucket, object_name, using_json=True) + + headers = { + 'x-amz-date': '20231124T123217Z', + 'x-amz-content-sha256': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', + 'authorization': 'AWS4-HMAC-SHA256 Credential=minio/20231124/us-west-1/s3/aws4_request, ' + 'SignedHeaders=host;user-agent;x-amz-content-sha256;x-amz-date, ' + 'Signature=dc971726ff2b266f208b250089b2ba0be86352efad2858145b33c2ae085e7d71' + } + response = requests.get(get_url(bucket, object_name), headers=headers) + assert response.text == '{"test": true}' + + +def test_json_read(): + bucket = '2' + object_name = '5ymFfxpAOK2eKYxx.json' + minio_client = create_storage_client() + + headers = { + 'x-amz-date': '20231124T124147Z', + 'x-amz-content-sha256': '80f65706d935d3b928d95207937dd81bad43ab56cd4d3b7ed41772318e734168', + 'authorization': 'AWS4-HMAC-SHA256 Credential=minio/20231124/us-west-1/s3/aws4_request, ' + 'SignedHeaders=content-length;content-type;host;user-agent;x-amz-content-sha256;x-amz-date, ' + 'Signature=d592f084a4f9fd46a8624a37323b5be843120bd9e7c075c925faea573f00511e' + } + requests.put(get_url(bucket, object_name), headers=headers, data='{"test": true}'.encode('utf-8')) + + result = minio_client.get_project_object(bucket, object_name, using_json=True) + assert isinstance(result, dict) + assert result['test'] is True + + +def test_not_existing_file_get(): + object_name = f'{random_alphanumeric(16)}.json' + minio_client = create_storage_client() + + with pytest.raises(ValueError) as exc: + minio_client.get_project_object('2', object_name) + assert exc.value.args[0] == f'Unable to get file: {object_name}' + + +def test_remove_not_existing_folder(): + path = 'test' + minio_client = create_storage_client() + + assert not minio_client.remove_folder_objects('3', path) + + +def test_remove_existing_folder(): + bucket = '5' + object_name = f'{random_alphanumeric(16)}.json' + path = 'test' + resource = '/'.join([path, object_name]) + + minio_client = create_storage_client() + minio_client.put_project_object({'test': True}, bucket, resource) + + assert minio_client.remove_folder_objects(bucket, path) + headers = { + 'x-amz-date': '20231124T123217Z', + 'x-amz-content-sha256': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', + 'authorization': 'AWS4-HMAC-SHA256 Credential=minio/20231124/us-west-1/s3/aws4_request, ' + 'SignedHeaders=host;user-agent;x-amz-content-sha256;x-amz-date, ' + 'Signature=dc971726ff2b266f208b250089b2ba0be86352efad2858145b33c2ae085e7d71' + } + response = requests.get(get_url(bucket, resource), headers=headers) + assert response.status_code == 404 + + +def test_list_not_existing_folder(): + path = 'test' + minio_client = create_storage_client() + + assert minio_client.get_folder_objects('4', path) == [] + + +def test_list_existing_folder(): + bucket = '6' + object_name = f'{random_alphanumeric(16)}.json' + path = 'test' + resource = '/'.join([path, object_name]) + + minio_client = create_storage_client() + minio_client.put_project_object({'test': True}, bucket, resource, using_json=True) + + assert minio_client.get_folder_objects(bucket, path) == [resource] + + +def test_list_dir_separators(): + bucket = '7' + object_name = f'{random_alphanumeric(16)}.json' + path = 'test/' + resource = path + object_name + + minio_client = create_storage_client() + minio_client.put_project_object({'test': True}, bucket, resource, using_json=True) + + assert minio_client.get_folder_objects(bucket, path) == [resource] + + +def test_remove_project_objects(): + bucket = '8' + object_name = f'{random_alphanumeric(16)}.json' + path = 'test/' + resource = path + object_name + + minio_client = create_storage_client() + minio_client.put_project_object({'test': True}, bucket, resource, using_json=True) + + minio_client.remove_project_objects(bucket, [resource]) + with pytest.raises(ValueError): + minio_client.get_project_object(bucket, resource) diff --git a/test/unit/commons/test_prepared_log.py b/test/unit/commons/test_prepared_log.py new file mode 100644 index 00000000..93c74941 --- /dev/null +++ b/test/unit/commons/test_prepared_log.py @@ -0,0 +1,30 @@ +# Copyright 2024 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from test import read_file +from app.commons.prepared_log import PreparedLogMessage + + +@pytest.mark.parametrize( + 'test_file, expected_file', + [ + ('stacktraces/log_stacktrace_js.txt', + 'stacktraces/log_stacktrace_js_exception_message_no_params_and_brackets.txt'), + ] +) +def test_exception_message_no_params_and_brackets(test_file, expected_file): + log = read_file('test_res/test_logs', test_file) + expected_log = read_file('test_res/test_logs', expected_file) + assert PreparedLogMessage(log, -1).exception_message_no_params == expected_log.strip() diff --git a/test/unit/commons/test_esclient.py b/test/unit/commons/test_unit_esclient.py similarity index 76% rename from test/unit/commons/test_esclient.py rename to test/unit/commons/test_unit_esclient.py index 291ecf75..966c1d5e 100644 --- a/test/unit/commons/test_esclient.py +++ b/test/unit/commons/test_unit_esclient.py @@ -16,8 +16,8 @@ import pytest from app.commons import esclient -from app.commons.launch_objects import Launch, TestItem, Log -from test import DEFAULT_ES_CONFIG, DEFAULT_SEARCH_CONFIG +from app.commons.model.launch_objects import Launch, TestItem, Log +from test import DEFAULT_ES_CONFIG TEST_PROJECT_ID = 2 @@ -25,13 +25,13 @@ def create_test_es_client(): es_mock = mock.Mock() es_mock.search.return_value = {'hits': {'hits': []}} - return esclient.EsClient(DEFAULT_ES_CONFIG, DEFAULT_SEARCH_CONFIG, es_mock) + return esclient.EsClient(DEFAULT_ES_CONFIG, es_mock) def create_test_launch_one_item(): logs = [Log(logId=37135, logLevel=40000, message="Environment variable 'SAUCELABS_USER' does not exist.")] - test_items = [TestItem(testItemId=2190, uniqueId='auto:4cf9a1d86e663dd6e8a4a99dbd78e7ce', isAutoAnalyzed=False, - testCaseHash=-2120975783, testItemName='Example page test', logs=logs)] + test_items = [TestItem(testItemId=2190, isAutoAnalyzed=False, testCaseHash=-2120975783, + testItemName='Example page test', logs=logs)] return Launch(launchId=10, project=TEST_PROJECT_ID, launchName='Test Launch', launchNumber=7, testItems=test_items) @@ -39,8 +39,8 @@ def create_test_launch_two_items(): launch = create_test_launch_one_item() test_items = launch.testItems logs = [Log(logId=37136, logLevel=40000, message="Environment variable 'SAUCELABS_USER' does not exist.")] - test_items.append(TestItem(testItemId=2191, uniqueId='auto:4cf9a1d86e663dd6e8a4a99dbd78e7cf', isAutoAnalyzed=False, - testCaseHash=-2120975784, testItemName='Example page test', logs=logs)) + test_items.append(TestItem(testItemId=2191, isAutoAnalyzed=False, testCaseHash=-2120975784, + testItemName='Example page test', logs=logs)) return launch @@ -48,8 +48,8 @@ def create_test_launch_two_items_one_indexed_log(): launch = create_test_launch_one_item() test_items = launch.testItems logs = [Log(logId=37136, logLevel=30000, message="Environment variable 'SAUCELABS_USER' does not exist.")] - test_items.append(TestItem(testItemId=2191, uniqueId='auto:4cf9a1d86e663dd6e8a4a99dbd78e7cf', isAutoAnalyzed=False, - testCaseHash=-2120975784, testItemName='Example page test', logs=logs)) + test_items.append(TestItem(testItemId=2191, isAutoAnalyzed=False, testCaseHash=-2120975784, + testItemName='Example page test', logs=logs)) return launch diff --git a/test/unit/service/__init__.py b/test/unit/service/__init__.py new file mode 100644 index 00000000..c85b2e50 --- /dev/null +++ b/test/unit/service/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/test/unit/utils/__init__.py b/test/unit/utils/__init__.py new file mode 100644 index 00000000..c85b2e50 --- /dev/null +++ b/test/unit/utils/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/test/unit/utils/test_log_preparation.py b/test/unit/utils/test_log_preparation.py new file mode 100644 index 00000000..5d8f4c06 --- /dev/null +++ b/test/unit/utils/test_log_preparation.py @@ -0,0 +1,40 @@ +# Copyright 2024 EPAM Systems +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from test import read_file_lines, read_file +from app.utils import log_preparation + + +def test_remove_starting_thread_name(): + log = read_file_lines('test_res/test_logs', 'log_line_timestamps.txt') + expected_log = read_file_lines('test_res/test_logs', 'log_line_prepared.txt') + for i, line in enumerate(log): + assert log_preparation.basic_prepare(line) == expected_log[i].strip() + + +@pytest.mark.parametrize( + 'test_file, expected_file', + [ + ('separators/mixed_markdown_separators.txt', 'separators/mixed_markdown_separators_prepared.txt'), + ('stacktraces/webdriver_selenide_stacktrace.txt', 'stacktraces/webdriver_selenide_stacktrace_prepared.txt'), + ('stacktraces/log_stacktrace_js.txt', 'stacktraces/log_stacktrace_js_prepared.txt'), + ('webdriver/webdriver_exception_info.txt', 'webdriver/webdriver_exception_info_prepared.txt'), + ] +) +def test_separators_log_prepare(test_file, expected_file): + log = read_file('test_res/test_logs', test_file) + expected_log = read_file('test_res/test_logs', expected_file) + assert log_preparation.basic_prepare(log) == expected_log.strip() diff --git a/test/unit/utils/test_text_processing.py b/test/unit/utils/test_text_processing.py index 43e79995..75b35d90 100644 --- a/test/unit/utils/test_text_processing.py +++ b/test/unit/utils/test_text_processing.py @@ -24,8 +24,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest + from app.utils import utils, text_processing -from test import read_file_lines +from test import read_file_lines, read_file def test_delete_empty_lines(): @@ -43,7 +45,124 @@ def test_filter_empty_lines(): def test_remove_starting_datetime(): - log = read_file_lines('test_res/test_logs', 'log_line.txt') - expected_log = read_file_lines('test_res/test_logs', 'log_line_no_timestamp_current.txt') + log = read_file_lines('test_res/test_logs', 'log_line_timestamps.txt') + expected_log = read_file_lines('test_res/test_logs', 'log_line_no_timestamp.txt') for i, line in enumerate(log): assert text_processing.remove_starting_datetime(line) == expected_log[i] + + +def test_remove_starting_log_level(): + log = read_file_lines('test_res/test_logs', 'log_line_no_timestamp.txt') + expected_log = read_file_lines('test_res/test_logs', 'log_line_no_log_level.txt') + for i, line in enumerate(log): + assert text_processing.remove_starting_log_level(line) == expected_log[i] + + +def test_remove_starting_thread_id(): + log = read_file_lines('test_res/test_logs', 'log_line_no_log_level.txt') + expected_log = read_file_lines('test_res/test_logs', 'log_line_no_thread_id.txt') + for i, line in enumerate(log): + assert text_processing.remove_starting_thread_id(line) == expected_log[i] + + +def test_remove_starting_thread_namer(): + log = read_file_lines('test_res/test_logs', 'log_line_no_thread_id.txt') + expected_log = read_file_lines('test_res/test_logs', 'log_line_no_thread_name.txt') + for i, line in enumerate(log): + assert text_processing.remove_starting_thread_name(line) == expected_log[i] + + +@pytest.mark.parametrize( + 'test_file, expected_file', + [ + ('stacktraces/log_stacktrace_generated.txt', 'stacktraces/log_stacktrace_prepared.txt'), + ('stacktraces/log_stacktrace_generated_2.txt', 'stacktraces/log_stacktrace_prepared_2.txt'), + ('stacktraces/log_stacktrace_generated_3.txt', 'stacktraces/log_stacktrace_prepared_3.txt'), + ('log_locator_with_attribute.txt', 'log_locator_with_attribute_prepared.txt') + ] +) +def test_remove_generated_parts(test_file, expected_file): + log = read_file('test_res/test_logs', test_file) + expected_log = read_file('test_res/test_logs', expected_file) + assert text_processing.remove_generated_parts(log) == expected_log + + +def test_clean_from_brackets(): + log = read_file_lines('test_res/test_logs', 'brackets_test.txt') + expected_log = read_file_lines('test_res/test_logs', 'brackets_test_results.txt') + for i, line in enumerate(log): + assert text_processing.clean_from_brackets(line) == expected_log[i] + + +@pytest.mark.parametrize( + 'message, expected_message', + [ + ('\t \r\n ', '\n '), + ('\r\n', '\n'), + ('\n', '\n'), + ('\u00A0\u00A0\u00A0\n', '\n'), + ('\u00A0\r\n', '\n'), + ] +) +def test_unify_line_endings(message, expected_message): + assert text_processing.unify_line_endings(message) == expected_message + + +@pytest.mark.parametrize( + 'message, expected_message', + [ + ('\t \r\n ', ' \r\n'), + ('\r\n', '\r\n'), + ('\n', '\n'), + ('\u00A0\u00A0\u00A0\n', '\n'), + ('\u00A0\r\n', ' \r\n'), + ('\u00A0\u2000\u2001', ' '), + ('\u202F\u205F\u3000', ' '), + ('a\u202F\u205F\u3000b', 'a b'), + ('\u00A0\u00A0\u00A0\n\u00A0\u00A0\u00A0', '\n'), + ] +) +def test_unify_spaces(message, expected_message): + assert text_processing.unify_spaces(message) == expected_message + + +def test_remove_markdown_mode(): + log = read_file('test_res/test_logs/markdown', 'markdown_at_log.txt') + expected_log = read_file('test_res/test_logs/markdown', 'markdown_at_log_prepared.txt') + assert text_processing.remove_markdown_mode(log) == expected_log + + +@pytest.mark.parametrize( + 'test_file, expected_file', + [ + ('separators/markdown_separator_log.txt', 'separators/markdown_separator_log_prepared.txt'), + ('separators/step_separator_log.txt', 'separators/step_separator_log_prepared.txt'), + ('separators/step_separator_equality_log.txt', 'separators/step_separator_log_prepared.txt'), + ('separators/step_separator_underscore_log.txt', 'separators/step_separator_log_prepared.txt'), + ('separators/fancy_separator_log.txt', 'separators/fancy_separator_log_prepared.txt'), + ] +) +def test_replace_code_separators(test_file, expected_file): + log = read_file('test_res/test_logs', test_file) + expected_log = read_file('test_res/test_logs', expected_file) + assert text_processing.replace_code_separators(log) == expected_log + + +def test_remove_webdriver_auxiliary_info(): + log = read_file_lines('test_res/test_logs/webdriver', 'webdriver_oneliners.txt') + expected_log = read_file_lines('test_res/test_logs/webdriver', 'webdriver_oneliners_prepared.txt') + for i, line in enumerate(log): + assert text_processing.remove_webdriver_auxiliary_info(line) == expected_log[i] + + +@pytest.mark.parametrize( + 'test_file, expected_file', + [ + ('stacktraces/webdriver_selenide_stacktrace.txt', + 'stacktraces/webdriver_selenide_stacktrace_no_webdriver.txt'), + ] +) +def test_remove_webdriver_auxiliary_info_big(test_file, expected_file): + log = read_file('test_res/test_logs', test_file) + expected_log = read_file('test_res/test_logs', expected_file) + assert text_processing.remove_webdriver_auxiliary_info(log) == expected_log diff --git a/test_res/fixtures/boost_model_results.json b/test_res/fixtures/boost_model_results.json index 4813e07d..4e476be1 100644 --- a/test_res/fixtures/boost_model_results.json +++ b/test_res/fixtures/boost_model_results.json @@ -25,8 +25,16 @@ 1.0, 1.0, 1.0, - 0.88, 1.0, + 1.0, + 1.0, + 1.0, + 0.0, + 0.0, + 0.75, + 1.0, + 0.67, + 0.0, 0.0, 0.0, 0.0, @@ -40,10 +48,7 @@ 1 ], [ - [ - 0.0001252889633178711, - 0.9998747110366821 - ] + [0.0013791322708129883, 0.998620867729187] ] ], "1": [ @@ -72,7 +77,15 @@ 1.0, 1.0, 1.0, - 0.88, + 1.0, + 1.0, + 1.0, + 1.0, + 0.0, + 0.0, + 0.75, + 1.0, + 1.0, 1.0, 1.0, 1.0, @@ -96,18 +109,26 @@ 0.0, 1.0, 0.67, - 0.47, + 0.43, 0.0, 0.49, 0.49, 0.49, 1.0, 1.0, + 1.0, + 1.0, 0.89, + 0.88, + 0.0, + 1.0, 0.0, 1.0, - 0.03, + 0.0, + 0.37, 0.68, + 0.67, + 0.0, 0.0, 0.0, 0.0, @@ -117,19 +138,10 @@ 1.0 ] ], + [1, 0], [ - 1, - 0 - ], - [ - [ - 0.0010358095169067383, - 0.9989641904830933 - ], - [ - 0.5298326015472412, - 0.4701673984527588 - ] + [0.003977537155151367, 0.9960224628448486], + [0.9831222295761108, 0.01687774993479252] ] ], "2": [ @@ -158,7 +170,15 @@ 1.0, 1.0, 1.0, - 0.88, + 1.0, + 1.0, + 1.0, + 1.0, + 0.0, + 0.0, + 0.75, + 1.0, + 1.0, 1.0, 1.0, 1.0, @@ -182,18 +202,26 @@ 0.0, 1.0, 0.67, - 0.47, + 0.43, 0.0, 0.49, 0.49, 0.49, 1.0, 1.0, + 1.0, + 1.0, 0.89, + 0.88, 0.0, 1.0, - 0.03, + 0.0, + 1.0, + 0.0, + 0.37, 0.68, + 0.67, + 0.0, 0.0, 0.0, 0.0, @@ -203,19 +231,10 @@ 2.0 ] ], + [1, 0], [ - 1, - 0 - ], - [ - [ - 0.0009244680404663086, - 0.9990755319595337 - ], - [ - 0.5283575654029846, - 0.4716424345970154 - ] + [0.002452075481414795, 0.9975479245185852], + [0.9957249164581299, 0.004275093786418438] ] ], "3": [ @@ -240,12 +259,20 @@ 1.0, 1.0, 1.0, + 1.0, 0.0, 1.0, 1.0, 1.0, - 0.88, 1.0, + 1.0, + 1.0, + 0.0, + 0.0, + 0.75, + 1.0, + 0.67, + 0.0, 0.0, 0.0, 0.0, @@ -255,14 +282,9 @@ 1.0 ] ], + [1], [ - 1 - ], - [ - [ - 0.0001405477523803711, - 0.9998594522476196 - ] + [0.0013791322708129883, 0.998620867729187] ] ], "4": [ @@ -287,11 +309,19 @@ 1.0, 1.0, 1.0, + 1.0, 0.0, 1.0, 1.0, 1.0, - 0.88, + 1.0, + 1.0, + 1.0, + 0.0, + 0.0, + 0.75, + 1.0, + 1.0, 1.0, 1.0, 1.0, @@ -315,18 +345,26 @@ 0.0, 1.0, 0.67, - 0.47, + 0.43, 0.0, 0.49, 0.49, 0.49, 1.0, + 1.0, 0.0, + 1.0, 0.89, + 0.88, 0.0, 1.0, - 0.03, + 0.0, + 1.0, + 0.0, + 0.37, 0.68, + 0.67, + 0.0, 0.0, 0.0, 0.0, @@ -336,19 +374,10 @@ 1.0 ] ], + [1, 0], [ - 1, - 0 - ], - [ - [ - 0.0011121630668640137, - 0.998887836933136 - ], - [ - 0.534410834312439, - 0.46558913588523865 - ] + [0.003977537155151367, 0.9960224628448486], + [0.9831222295761108, 0.01687774993479252] ] ], "5": [ @@ -373,11 +402,19 @@ 1.0, 1.0, 1.0, + 1.0, 0.0, 1.0, 1.0, 1.0, - 0.88, + 1.0, + 1.0, + 1.0, + 0.0, + 0.0, + 0.75, + 1.0, + 1.0, 1.0, 1.0, 1.0, @@ -401,18 +438,26 @@ 0.0, 1.0, 0.67, - 0.47, + 0.43, 0.0, 0.49, 0.49, 0.49, 1.0, + 1.0, 0.0, + 1.0, 0.89, + 0.88, + 0.0, + 1.0, 0.0, 1.0, - 0.03, + 0.0, + 0.37, 0.68, + 0.67, + 0.0, 0.0, 0.0, 0.0, @@ -422,19 +467,10 @@ 2.0 ] ], + [1, 0], [ - 1, - 0 - ], - [ - [ - 0.00099259614944458, - 0.9990074038505554 - ], - [ - 0.5329375267028809, - 0.46706244349479675 - ] + [0.002452075481414795, 0.9975479245185852], + [0.9957249164581299, 0.004275093786418438] ] ], "6": [ diff --git a/test_res/fixtures/cluster_update.json b/test_res/fixtures/cluster_update.json index 8a288f47..4e744398 100644 --- a/test_res/fixtures/cluster_update.json +++ b/test_res/fixtures/cluster_update.json @@ -1,6 +1,6 @@ {"update":{"_index":"2","_id":4}} -{"doc":{"cluster_id":"51305554424475301","cluster_message":"error occured \r\n error found \r\n error mined","cluster_with_numbers":true}} +{"doc":{"cluster_id":"21874152824769751","cluster_message":"error occurred\nerror found\nerror mined","cluster_with_numbers":true}} {"update":{"_index":"2","_id":5}} -{"doc":{"cluster_id":"51305554424475301","cluster_message":"error occured \r\n error found \r\n error mined","cluster_with_numbers":true}} +{"doc":{"cluster_id":"21874152824769751","cluster_message":"error occurred\nerror found\nerror mined","cluster_with_numbers":true}} {"update":{"_index":"2","_id":9}} -{"doc":{"cluster_id":"2474938495021661","cluster_message":"error occured \r\n error found \r\n assert query","cluster_with_numbers":true}} +{"doc":{"cluster_id":"44972330576749361","cluster_message":"error occurred\nerror found\nassert query","cluster_with_numbers":true}} diff --git a/test_res/fixtures/cluster_update_all_the_same.json b/test_res/fixtures/cluster_update_all_the_same.json index c8f20ea4..ae9f8248 100644 --- a/test_res/fixtures/cluster_update_all_the_same.json +++ b/test_res/fixtures/cluster_update_all_the_same.json @@ -1,6 +1,6 @@ {"update":{"_index":"2","_id":4}} -{"doc":{"cluster_id":"53490850438321651","cluster_message":"error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"48859729558090231","cluster_message":"error occurred\nerror found","cluster_with_numbers":true}} {"update":{"_index":"2","_id":5}} -{"doc":{"cluster_id":"53490850438321651","cluster_message":"error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"48859729558090231","cluster_message":"error occurred\nerror found","cluster_with_numbers":true}} {"update":{"_index":"2","_id":9}} -{"doc":{"cluster_id":"53490850438321651","cluster_message":"error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"48859729558090231","cluster_message":"error occurred\nerror found","cluster_with_numbers":true}} diff --git a/test_res/fixtures/cluster_update_all_the_same_es_update.json b/test_res/fixtures/cluster_update_all_the_same_es_update.json index c8f20ea4..ae9f8248 100644 --- a/test_res/fixtures/cluster_update_all_the_same_es_update.json +++ b/test_res/fixtures/cluster_update_all_the_same_es_update.json @@ -1,6 +1,6 @@ {"update":{"_index":"2","_id":4}} -{"doc":{"cluster_id":"53490850438321651","cluster_message":"error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"48859729558090231","cluster_message":"error occurred\nerror found","cluster_with_numbers":true}} {"update":{"_index":"2","_id":5}} -{"doc":{"cluster_id":"53490850438321651","cluster_message":"error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"48859729558090231","cluster_message":"error occurred\nerror found","cluster_with_numbers":true}} {"update":{"_index":"2","_id":9}} -{"doc":{"cluster_id":"53490850438321651","cluster_message":"error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"48859729558090231","cluster_message":"error occurred\nerror found","cluster_with_numbers":true}} diff --git a/test_res/fixtures/cluster_update_all_the_same_es_update_with_prefix.json b/test_res/fixtures/cluster_update_all_the_same_es_update_with_prefix.json index 1cd74584..5d8df2d4 100644 --- a/test_res/fixtures/cluster_update_all_the_same_es_update_with_prefix.json +++ b/test_res/fixtures/cluster_update_all_the_same_es_update_with_prefix.json @@ -1,6 +1,6 @@ {"update":{"_index":"rp_2","_id":4}} -{"doc":{"cluster_id":"53490850438321651","cluster_message":"error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"48859729558090231","cluster_message":"error occurred\nerror found","cluster_with_numbers":true}} {"update":{"_index":"rp_2","_id":5}} -{"doc":{"cluster_id":"53490850438321651","cluster_message":"error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"48859729558090231","cluster_message":"error occurred\nerror found","cluster_with_numbers":true}} {"update":{"_index":"rp_2","_id":9}} -{"doc":{"cluster_id":"53490850438321651","cluster_message":"error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"48859729558090231","cluster_message":"error occurred\nerror found","cluster_with_numbers":true}} diff --git a/test_res/fixtures/cluster_update_all_the_same_es_with_different_errors.json b/test_res/fixtures/cluster_update_all_the_same_es_with_different_errors.json index 35009fe9..7db37595 100644 --- a/test_res/fixtures/cluster_update_all_the_same_es_with_different_errors.json +++ b/test_res/fixtures/cluster_update_all_the_same_es_with_different_errors.json @@ -1,6 +1,6 @@ {"update":{"_index":"2","_id":4}} -{"doc":{"cluster_id":"66538501077545981","cluster_message":"AssertionError error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"37711525315085941","cluster_message":"AssertionError error occurred\nerror found","cluster_with_numbers":true}} {"update":{"_index":"2","_id":5}} -{"doc":{"cluster_id":"30071099716448071","cluster_message":"AssertionError status code: 500 error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"48851059259117511","cluster_message":"AssertionError status code SPECIALNUMBER error occurred\nerror found","cluster_with_numbers":true}} {"update":{"_index":"2","_id":9}} -{"doc":{"cluster_id":"59521687023339221","cluster_message":"NoSuchElementException error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"90988898127574211","cluster_message":"NoSuchElementException error occurred\nerror found","cluster_with_numbers":true}} diff --git a/test_res/fixtures/cluster_update_es_update.json b/test_res/fixtures/cluster_update_es_update.json index 0c5aa07d..b8dbd284 100644 --- a/test_res/fixtures/cluster_update_es_update.json +++ b/test_res/fixtures/cluster_update_es_update.json @@ -1,8 +1,8 @@ {"update":{"_index":"2","_id":4}} -{"doc":{"cluster_id":"123","cluster_message":"error occured \n error found \n error mined","cluster_with_numbers":true}} +{"doc":{"cluster_id":"123","cluster_message":"error occurred \n error found \n error mined","cluster_with_numbers":true}} {"update":{"_index":"2","_id":5}} -{"doc":{"cluster_id":"123","cluster_message":"error occured \n error found \n error mined","cluster_with_numbers":true}} +{"doc":{"cluster_id":"123","cluster_message":"error occurred \n error found \n error mined","cluster_with_numbers":true}} {"update":{"_index":"2","_id":111}} -{"doc":{"cluster_id":"123","cluster_message":"error occured \n error found \n error mined","cluster_with_numbers":true}} +{"doc":{"cluster_id":"123","cluster_message":"error occurred \n error found \n error mined","cluster_with_numbers":true}} {"update":{"_index":"2","_id":9}} -{"doc":{"cluster_id":"2474938495021661","cluster_message":"error occured \r\n error found \r\n assert query","cluster_with_numbers":true}} +{"doc":{"cluster_id":"44972330576749361","cluster_message":"error occurred\nerror found\nassert query","cluster_with_numbers":true}} diff --git a/test_res/fixtures/cluster_update_small_logs.json b/test_res/fixtures/cluster_update_small_logs.json index 9c2d7b91..e9000d36 100644 --- a/test_res/fixtures/cluster_update_small_logs.json +++ b/test_res/fixtures/cluster_update_small_logs.json @@ -1,12 +1,12 @@ {"update":{"_index":"2","_id":3}} -{"doc":{"cluster_id":"78342974021039661","cluster_message":"error occured twice \r\nAssertionError error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"60604459849884091","cluster_message":"error occurred twice\nAssertionError error occurred\nerror found","cluster_with_numbers":true}} {"update":{"_index":"2","_id":4}} -{"doc":{"cluster_id":"78342974021039661","cluster_message":"error occured twice \r\nAssertionError error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"60604459849884091","cluster_message":"error occurred twice\nAssertionError error occurred\nerror found","cluster_with_numbers":true}} {"update":{"_index":"2","_id":5}} -{"doc":{"cluster_id":"37054331802624341","cluster_message":"AssertionError status code: 500 error occured","cluster_with_numbers":true}} +{"doc":{"cluster_id":"9398573272102061","cluster_message":"AssertionError status code SPECIALNUMBER error occurred","cluster_with_numbers":true}} {"update":{"_index":"2","_id":9}} -{"doc":{"cluster_id":"16492834929015971","cluster_message":"NoSuchElementException error occured \r\n error found \r\n assert query","cluster_with_numbers":true}} +{"doc":{"cluster_id":"86465058569810291","cluster_message":"NoSuchElementException error occurred\nerror found\nassert query","cluster_with_numbers":true}} {"update":{"_index":"2","_id":"4_m"}} -{"doc":{"cluster_id":"78342974021039661","cluster_message":"error occured twice \r\nAssertionError error occured \r\n error found","cluster_with_numbers":true}} +{"doc":{"cluster_id":"60604459849884091","cluster_message":"error occurred twice\nAssertionError error occurred\nerror found","cluster_with_numbers":true}} {"update":{"_index":"2","_id":"5_m"}} -{"doc":{"cluster_id":"37054331802624341","cluster_message":"AssertionError status code: 500 error occured","cluster_with_numbers":true}} +{"doc":{"cluster_id":"9398573272102061","cluster_message":"AssertionError status code SPECIALNUMBER error occurred","cluster_with_numbers":true}} diff --git a/test_res/fixtures/example_logs.json b/test_res/fixtures/example_logs.json index 0bb15744..1e2d4223 100644 --- a/test_res/fixtures/example_logs.json +++ b/test_res/fixtures/example_logs.json @@ -5,6 +5,12 @@ "detected_message": "org.openqa.selenium.TimeoutException: Expected condition failed: waiting for visibility of element located by By.xpath: //*[(translate(@id, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'g2.userdashboardspage.expandcollapsebutton') and not(ancestor::div[contains(@style,'display: none')])] (tried for 10 second(s) with 500 milliseconds interval)", "stacktrace": "at org.openqa.selenium.support.ui.WebDriverWait.timeoutException(WebDriverWait.java:113) \n at org.openqa.selenium.support.ui.FluentWait.until(FluentWait.java:283) \n at com.mscs.emr.test.functional.BaseTestPage.waitUntilElementVisibleBeta(BaseTestPage.java:2340) \n at com.mscs.emr.test.functional.BaseTestPage.waitUntilElementVisibleBeta(BaseTestPage.java:2328) \n at com.mscs.emr.test.functional.g2.pages.dashboard.UserDashboardTabPage.openOrCloseLibraryPanel(UserDashboardTabPage.java:52) \n at com.mscs.emr.test.functional.g2.pages.dashboard.UserDashboardTabPage.addVisitListWidgetToDashboard(UserDashboardTabPage.java:96) \n at com.mscs.emr.test.functional.g2.test.chargecapture.ChargeCaptureReportTest.test4ChargeInterfaceProceduresFollowedByDrugsChargeReportHeaderStatusNeedsRelease(ChargeCaptureReportTest.java:5360) \n at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) \n at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) \n at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) \n at java.lang.reflect.Method.invoke(Unknown Source) \n at org.testng.internal.MethodInvocationHelper.invokeMethod(MethodInvocationHelper.java:124) \n at org.testng.internal.MethodInvocationHelper$1.runTestMethod(MethodInvocationHelper.java:230) \n at com.mscs.emr.test.functional.InvokedMethodListener.lambda$0(InvokedMethodListener.java:276) \n at com.mscs.emr.ejb.session.adminTools.TransactionManager.doWork(TransactionManager.java:101) \n at com.mscs.emr.ejb.session.adminTools.TransactionManager.doWork(TransactionManager.java:82) \n at com.mscs.emr.test.functional.InvokedMethodListener.run(InvokedMethodListener.java:255) \n at org.testng.internal.MethodInvocationHelper.invokeHookable(MethodInvocationHelper.java:242) \n at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) \n at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) \n at java.lang.Thread.run(Unknown Source)" }, + { + "language": "java", + "log": "com.epam.ta.reportportal.exception.ReportPortalException: Impossible interact with integration. There are no analyzer services are deployed.\n\tat com.epam.ta.reportportal.commons.validation.ErrorTypeBasedRuleValidator.verify(ErrorTypeBasedRuleValidator.java:32)\n\tat com.epam.ta.reportportal.core.launch.cluster.pipeline.data.AnalyzerClusterDataProvider.provide(AnalyzerClusterDataProvider.java:47)\n\tat com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.lambda.provide.0(SaveClusterDataPartProvider.java:44)\n\tat java.base/java.util.Optional.flatMap(Optional.java:289)\n\tat com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.provide(SaveClusterDataPartProvider.java:44)\n\tat com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.provide(SaveClusterDataPartProvider.java:30)\n\tat com.epam.ta.reportportal.pipeline.PipelineConstructor.lambda.construct.0(PipelineConstructor.java:34)\n\tat java.base/java.util.stream.ReferencePipeline.3.1.accept(ReferencePipeline.java:197)\n\tat java.base/java.util.AbstractList.RandomAccessSpliterator.forEachRemaining(AbstractList.java:722)\n\tat java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)\n\tat java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499)\n\tat java.base/java.util.stream.ReduceOps.ReduceOp.evaluateSequential(ReduceOps.java:921)\n\tat java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)\n\tat java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:682)\n\tat com.epam.ta.reportportal.pipeline.PipelineConstructor.construct(PipelineConstructor.java:34)\n\tat com.epam.ta.reportportal.core.launch.cluster.UniqueErrorGenerator.generateClusters(UniqueErrorGenerator.java:78)\n\tat com.epam.ta.reportportal.core.launch.cluster.UniqueErrorGenerator.generate(UniqueErrorGenerator.java:60)\n\tat com.epam.ta.reportportal.core.events.handler.item.TestItemUniqueErrorAnalysisRunner.handle(TestItemUniqueErrorAnalysisRunner.java:66)\n\tat com.epam.ta.reportportal.core.events.handler.item.TestItemUniqueErrorAnalysisRunner.handle(TestItemUniqueErrorAnalysisRunner.java:37)\n\tat com.epam.ta.reportportal.core.events.subscriber.impl.delegate.ProjectConfigDelegatingSubscriber.lambda.handleEvent.0(ProjectConfigDelegatingSubscriber.java:44)\n\tat java.base/java.lang.Iterable.forEach(Iterable.java:75)\n\tat com.epam.ta.reportportal.core.events.subscriber.impl.delegate.ProjectConfigDelegatingSubscriber.handleEvent(ProjectConfigDelegatingSubscriber.java:44)\n\tat com.epam.ta.reportportal.core.events.subscriber.impl.delegate.ProjectConfigDelegatingSubscriber.handleEvent(ProjectConfigDelegatingSubscriber.java:29)\n\tat com.epam.ta.reportportal.core.events.listener.TestItemIssueResolvedEventListener.lambda.onApplicationEvent.0(TestItemIssueResolvedEventListener.java:39)\n\tat java.base/java.lang.Iterable.forEach(Iterable.java:75)\n\tat com.epam.ta.reportportal.core.events.listener.TestItemIssueResolvedEventListener.onApplicationEvent(TestItemIssueResolvedEventListener.java:39)\n\tat org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:218)\n\tat org.springframework.aop.framework.CglibAopProxy.CglibMethodInvocation.invokeJoinpoint(CglibAopProxy.java:793)\n\tat org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:163)\n\tat org.springframework.aop.framework.CglibAopProxy.CglibMethodInvocation.proceed(CglibAopProxy.java:763)\n\tat org.springframework.aop.interceptor.AsyncExecutionInterceptor.lambda.invoke.0(AsyncExecutionInterceptor.java:115)\n\tat java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor.Worker.run(ThreadPoolExecutor.java:642)\n\tat java.base/java.lang.Thread.run(Thread.java:1583)\n", + "detected_message": "com.epam.ta.reportportal.exception.ReportPortalException: Impossible interact with integration. There are no analyzer services are deployed.\n", + "stacktrace": "\tat com.epam.ta.reportportal.commons.validation.ErrorTypeBasedRuleValidator.verify(ErrorTypeBasedRuleValidator.java:32)\n\tat com.epam.ta.reportportal.core.launch.cluster.pipeline.data.AnalyzerClusterDataProvider.provide(AnalyzerClusterDataProvider.java:47)\n\tat com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.lambda.provide.0(SaveClusterDataPartProvider.java:44)\n\tat java.base/java.util.Optional.flatMap(Optional.java:289)\n\tat com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.provide(SaveClusterDataPartProvider.java:44)\n\tat com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.provide(SaveClusterDataPartProvider.java:30)\n\tat com.epam.ta.reportportal.pipeline.PipelineConstructor.lambda.construct.0(PipelineConstructor.java:34)\n\tat java.base/java.util.stream.ReferencePipeline.3.1.accept(ReferencePipeline.java:197)\n\tat java.base/java.util.AbstractList.RandomAccessSpliterator.forEachRemaining(AbstractList.java:722)\n\tat java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)\n\tat java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499)\n\tat java.base/java.util.stream.ReduceOps.ReduceOp.evaluateSequential(ReduceOps.java:921)\n\tat java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)\n\tat java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:682)\n\tat com.epam.ta.reportportal.pipeline.PipelineConstructor.construct(PipelineConstructor.java:34)\n\tat com.epam.ta.reportportal.core.launch.cluster.UniqueErrorGenerator.generateClusters(UniqueErrorGenerator.java:78)\n\tat com.epam.ta.reportportal.core.launch.cluster.UniqueErrorGenerator.generate(UniqueErrorGenerator.java:60)\n\tat com.epam.ta.reportportal.core.events.handler.item.TestItemUniqueErrorAnalysisRunner.handle(TestItemUniqueErrorAnalysisRunner.java:66)\n\tat com.epam.ta.reportportal.core.events.handler.item.TestItemUniqueErrorAnalysisRunner.handle(TestItemUniqueErrorAnalysisRunner.java:37)\n\tat com.epam.ta.reportportal.core.events.subscriber.impl.delegate.ProjectConfigDelegatingSubscriber.lambda.handleEvent.0(ProjectConfigDelegatingSubscriber.java:44)\n\tat java.base/java.lang.Iterable.forEach(Iterable.java:75)\n\tat com.epam.ta.reportportal.core.events.subscriber.impl.delegate.ProjectConfigDelegatingSubscriber.handleEvent(ProjectConfigDelegatingSubscriber.java:44)\n\tat com.epam.ta.reportportal.core.events.subscriber.impl.delegate.ProjectConfigDelegatingSubscriber.handleEvent(ProjectConfigDelegatingSubscriber.java:29)\n\tat com.epam.ta.reportportal.core.events.listener.TestItemIssueResolvedEventListener.lambda.onApplicationEvent.0(TestItemIssueResolvedEventListener.java:39)\n\tat java.base/java.lang.Iterable.forEach(Iterable.java:75)\n\tat com.epam.ta.reportportal.core.events.listener.TestItemIssueResolvedEventListener.onApplicationEvent(TestItemIssueResolvedEventListener.java:39)\n\tat org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:218)\n\tat org.springframework.aop.framework.CglibAopProxy.CglibMethodInvocation.invokeJoinpoint(CglibAopProxy.java:793)\n\tat org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:163)\n\tat org.springframework.aop.framework.CglibAopProxy.CglibMethodInvocation.proceed(CglibAopProxy.java:763)\n\tat org.springframework.aop.interceptor.AsyncExecutionInterceptor.lambda.invoke.0(AsyncExecutionInterceptor.java:115)\n\tat java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor.Worker.run(ThreadPoolExecutor.java:642)\n\tat java.base/java.lang.Thread.run(Thread.java:1583)" + }, { "language": "c#", "log": "System.NullReferenceException : Object reference not set to an instance of an object.\nat Aram.Gff.Tests.Core.Functions.Functions.SelectListItemByName(RadComboBox list, String itemName, Boolean exactSearch, Boolean waitProgressBar, Boolean startWith) in C:\\swarm\\workspace\\US EN ExpensesOnceTrans STD\\Aram.Gff.Tests\\Core\\Functions\\Functions.cs:line 260 \n at ARAM_GFF.Extensions.SelectListItemByName(RadComboBox comboBox, String value, Boolean exactSearch, Boolean expandAll, Boolean waitProgressBar) in C:\\swarm\\workspace\\US EN ExpensesOnceTrans STD\\Aram.Gff.Tests\\Core\\Extensions.cs:line 532 \n at Aram.Gff.Tests.Steps.Expenses.OtherPayrollEstimatesFormSteps.SelectType(OtherPayrollEstimatesType otherEndingEstimatesType) in C:\\swarm\\workspace\\US EN ExpensesOnceTrans STD\\Aram.Gff.Tests\\Steps\\Expenses\\OtherPayrollEstimatesSteps.cs:line 132 \n at Aram.Gff.Tests.Expenses.OtherPayrollEstimates.AddDraftOtherPayrollEstimatesTest(GFFSite gffSite, OtherPayrollEstimatesType payrollEstimatesType) in C:\\swarm\\workspace\\US EN ExpensesOnceTrans STD\\Aram.Gff.Tests\\Test\\Expenses\\OtherPayrollEstimates.cs:line 46", diff --git a/test_res/fixtures/index_logs_rq.json b/test_res/fixtures/index_logs_rq.json index 82aeebf5..38d18f83 100644 --- a/test_res/fixtures/index_logs_rq.json +++ b/test_res/fixtures/index_logs_rq.json @@ -1,2 +1,2 @@ {"index":{"_index":"idx2","_type":"log","_id":"1_m"}} -{"issue_type":"AB001","launch_name":"Launch 1","launch_number":0,"log_level":40000,"original_message_lines":1,"original_message_words_number":2,"message":"","test_item":1,"test_item_name":"first test","start_time":"2020-01-15 10:57:43","unique_id":"unique1","test_case_hash":-1126886180,"detected_message":"","detected_message_with_numbers":"","only_numbers":"12","merged_small_logs":"message http localhost admin java.lang.noclassdeffounderror","stacktrace":"","urls":"http localhost admin","paths":"","message_params":"","potential_status_codes":"","found_exceptions":"java.lang.noclassdeffounderror","found_exceptions_extended":"java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror","found_tests_and_methods":"","stacktrace_extended":"","message_extended":"","detected_message_extended":"","detected_message_without_params_extended":"message java.lang lang.noclassdeffounderror noclassdeffounderror","message_without_params_extended":"","message_without_params_and_brackets":"","detected_message_without_params_and_brackets":"","whole_message":"Message 12 \r\n java.lang lang.NoClassDefFoundError\r\n","is_merged":true} +{"issue_type":"AB001","launch_name":"Launch 1","launch_number":0,"log_level":40000,"original_message_lines":1,"original_message_words_number":2,"message":"","test_item":1,"test_item_name":"first test","start_time":"2020-01-15 10:57:43","unique_id":"unique1","test_case_hash":-1126886180,"detected_message":"","detected_message_with_numbers":"","only_numbers":"12","merged_small_logs":"message specialnumber http localhost admin java.lang.noclassdeffounderror","stacktrace":"","urls":"http localhost admin","paths":"","message_params":"","potential_status_codes":"","found_exceptions":"java.lang.noclassdeffounderror","found_exceptions_extended":"java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror","found_tests_and_methods":"","stacktrace_extended":"","message_extended":"","detected_message_extended":"","detected_message_without_params_extended":"message specialnumber java.lang lang.noclassdeffounderror noclassdeffounderror","message_without_params_extended":"","message_without_params_and_brackets":"","detected_message_without_params_and_brackets":"","whole_message":"Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError\n","is_merged":true} diff --git a/test_res/fixtures/index_logs_rq_big_messages.json b/test_res/fixtures/index_logs_rq_big_messages.json index 46dc71f8..eae5fbd0 100644 --- a/test_res/fixtures/index_logs_rq_big_messages.json +++ b/test_res/fixtures/index_logs_rq_big_messages.json @@ -1,4 +1,4 @@ {"index":{"_index":"2","_id":1}} -{"launch_id":1234567892,"launch_name":"Launch with test items with logs","launch_number":0,"launch_start_time":"2020-01-15 10:57:43","test_item":1,"test_item_name":"first test","unique_id":"unique1","cluster_id":"0","cluster_message":"","test_case_hash":-1126886180,"is_auto_analyzed":false,"issue_type":"ti001","log_time":"2020-01-15 10:57:43","log_level":40000,"original_message_lines":5,"original_message_words_number":9,"message":"Message \r\n Message 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator \r","is_merged":false,"start_time":"2020-01-15 10:57:43","merged_small_logs":"","detected_message":"Message \r\n Message 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator \r","detected_message_with_numbers":"Message 1 \r\n Message 2 \r\n Message 3 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator \r","stacktrace":"","only_numbers":"1 2 3","found_exceptions":"java.lang.NoClassDefFoundError","whole_message":"Message 1 \r\n Message 2 \r\n Message 3 'prod_en' \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator \n ","potential_status_codes":"","found_tests_and_methods":"","cluster_with_numbers":false,"urls":"","paths":"/src/prod/results.html","message_params":"prod_en","found_exceptions_extended":"java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError","detected_message_extended":"Message \r\n Message 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator impl.MandatoryAttributesValidator MandatoryAttributesValidator \r","detected_message_without_params_extended":"Message \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator impl.MandatoryAttributesValidator MandatoryAttributesValidator ","stacktrace_extended":"","message_extended":"Message \r\n Message 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator impl.MandatoryAttributesValidator MandatoryAttributesValidator \r","message_without_params_extended":"Message \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator impl.MandatoryAttributesValidator MandatoryAttributesValidator ","detected_message_without_params_and_brackets":"Message \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator","message_without_params_and_brackets":"Message \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator"} +{"launch_id":1234567892,"launch_name":"Launch with test items with logs","launch_number":0,"launch_start_time":"2020-01-15 10:57:43","test_item":1,"test_item_name":"first test","unique_id":"unique1","cluster_id":"0","cluster_message":"","test_case_hash":-1126886180,"is_auto_analyzed":false,"issue_type":"ti001","log_time":"2020-01-15 10:57:43","log_level":40000,"original_message":"Message 1 \n Message 2 \n Message 3 'prod_en' /src/prod/results.html \n java.lang.NoClassDefFoundError\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator$1@31ca8ab4 \n ca.canadiantire.steps.hybris.ws.WebserviceHybrisCustomerAndCartSteps$$EnhancerByCGLIB$$84837ae7.CGLIB$add_products_to_cart$16()","original_message_lines":5,"original_message_words_number":9,"message":"Message SPECIALNUMBER\n Message SPECIALNUMBER 'prod_en' /src/prod/results.html\n java.lang.NoClassDefFoundError\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator.SPECIALNUMBER","is_merged":false,"start_time":"2020-01-15 10:57:43","merged_small_logs":"","detected_message":"Message SPECIALNUMBER\n Message SPECIALNUMBER 'prod_en' /src/prod/results.html\n java.lang.NoClassDefFoundError\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator.SPECIALNUMBER","detected_message_with_numbers":"Message 1\n Message 2\n Message 3 'prod_en' /src/prod/results.html\n java.lang.NoClassDefFoundError\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator.1","stacktrace":"","only_numbers":"1 2 3","found_exceptions":"java.lang.NoClassDefFoundError","whole_message":"Message SPECIALNUMBER\nMessage SPECIALNUMBER\nMessage SPECIALNUMBER prod en src prod results html\njava lang NoClassDefFoundError\nde hybris platform servicelayer interceptor impl MandatoryAttributesValidator SPECIALNUMBER\n","potential_status_codes":"","found_tests_and_methods":"","cluster_with_numbers":false,"urls":"","paths":"/src/prod/results.html","message_params":"prod_en","found_exceptions_extended":"java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError","detected_message_extended":"Message 1\n Message 2\n Message 3 'prod_en' /src/prod/results.html\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator.1","detected_message_without_params_extended":"Message SPECIALNUMBER\nMessage SPECIALNUMBER prod en src prod results html\njava lang NoClassDefFoundError\nde hybris platform servicelayer interceptor impl MandatoryAttributesValidator SPECIALNUMBER","stacktrace_extended":"","message_extended":"Message SPECIALNUMBER\n Message SPECIALNUMBER 'prod_en' /src/prod/results.html\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator.SPECIALNUMBER MandatoryAttributesValidator.SPECIALNUMBER SPECIALNUMBER ","message_without_params_extended":"Message SPECIALNUMBER\n Message SPECIALNUMBER prod en src prod results html\n java lang NoClassDefFoundError\n de hybris platform servicelayer interceptor impl MandatoryAttributesValidator SPECIALNUMBER","detected_message_without_params_and_brackets":"Message SPECIALNUMBER\nMessage SPECIALNUMBER prod en src prod results html\njava lang NoClassDefFoundError\nde hybris platform servicelayer interceptor impl MandatoryAttributesValidator SPECIALNUMBER","message_without_params_and_brackets":"Message SPECIALNUMBER\n Message SPECIALNUMBER prod en src prod results html\n java lang NoClassDefFoundError\n de hybris platform servicelayer interceptor impl MandatoryAttributesValidator SPECIALNUMBER"} {"index":{"_index":"2","_id":2}} -{"launch_id":1234567892,"launch_name":"Launch with test items with logs","launch_number":0,"launch_start_time":"2020-01-15 10:57:43","test_item":1,"test_item_name":"first test","unique_id":"unique1","cluster_id":"0","cluster_message":"","test_case_hash":-1126886180,"is_auto_analyzed":false,"issue_type":"ti001","log_time":"2020-01-15 10:57:43","log_level":40000,"original_message_lines":4,"original_message_words_number":7,"message":"Message \r\n Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r","is_merged":false,"start_time":"2020-01-15 10:57:43","merged_small_logs":"","detected_message":"Message \r\n Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r","detected_message_with_numbers":"Message 2 \r\n Message 4 \r\n Message 5 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r","stacktrace":"","only_numbers":"2 4 5","found_exceptions":"java.lang.NoClassDefFoundError","whole_message":"Message 2 \r\n Message 4 \r\n Message 5 \r\n java.lang.NoClassDefFoundError \n ","potential_status_codes":"","found_tests_and_methods":"","cluster_with_numbers":false,"urls":"http:localhost/admin","paths":"","message_params":"","found_exceptions_extended":"java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError","detected_message_extended":"Message \r\n Message http : localhost/admin \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \r","detected_message_without_params_extended":"Message \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError ","stacktrace_extended":"","message_extended":"Message \r\n Message http : localhost/admin \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \r","message_without_params_extended":"Message \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError ","detected_message_without_params_and_brackets":"Message \r\n java.lang.NoClassDefFoundError","message_without_params_and_brackets":"Message \r\n java.lang.NoClassDefFoundError"} +{"launch_id":1234567892,"launch_name":"Launch with test items with logs","launch_number":0,"launch_start_time":"2020-01-15 10:57:43","test_item":1,"test_item_name":"first test","unique_id":"unique1","cluster_id":"0","cluster_message":"","test_case_hash":-1126886180,"is_auto_analyzed":false,"issue_type":"ti001","log_time":"2020-01-15 10:57:43","log_level":40000,"original_message":"Message 2 \n Message 4 \n Message 5 http:localhost/admin \n java.lang.NoClassDefFoundError\n For documentation on this error, please visit https://www.seleniumhq.org/exceptions/stale_element_reference.html","original_message_lines":4,"original_message_words_number":7,"message":"Message SPECIALNUMBER\n Message SPECIALNUMBER http : localhost/admin\n java.lang.NoClassDefFoundError","is_merged":false,"start_time":"2020-01-15 10:57:43","merged_small_logs":"","detected_message":"Message SPECIALNUMBER\n Message SPECIALNUMBER http : localhost/admin\n java.lang.NoClassDefFoundError","detected_message_with_numbers":"Message 2\n Message 4\n Message 5 http : localhost/admin\n java.lang.NoClassDefFoundError","stacktrace":"","only_numbers":"2 4 5","found_exceptions":"java.lang.NoClassDefFoundError","whole_message":"Message SPECIALNUMBER\nMessage SPECIALNUMBER\nMessage SPECIALNUMBER http localhost admin\njava lang NoClassDefFoundError\n","potential_status_codes":"","found_tests_and_methods":"","cluster_with_numbers":false,"urls":"","paths":"","message_params":"","found_exceptions_extended":"java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError","detected_message_extended":"Message 2\n Message 4\n Message 5 http : localhost/admin\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError ","detected_message_without_params_extended":"Message SPECIALNUMBER\nMessage SPECIALNUMBER http localhost admin\njava lang NoClassDefFoundError","stacktrace_extended":"","message_extended":"Message SPECIALNUMBER\n Message SPECIALNUMBER http : localhost/admin\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError ","message_without_params_extended":"Message SPECIALNUMBER\n Message SPECIALNUMBER http localhost admin\n java lang NoClassDefFoundError","detected_message_without_params_and_brackets":"Message SPECIALNUMBER\nMessage SPECIALNUMBER http localhost admin\njava lang NoClassDefFoundError","message_without_params_and_brackets":"Message SPECIALNUMBER\n Message SPECIALNUMBER http localhost admin\n java lang NoClassDefFoundError"} diff --git a/test_res/fixtures/index_logs_rq_big_messages_with_clusters.json b/test_res/fixtures/index_logs_rq_big_messages_with_clusters.json index d1aa4367..8c6c5f09 100644 --- a/test_res/fixtures/index_logs_rq_big_messages_with_clusters.json +++ b/test_res/fixtures/index_logs_rq_big_messages_with_clusters.json @@ -1,4 +1,4 @@ {"index":{"_index":"2","_id":1}} -{"launch_id":1234567892,"launch_name":"Launch with test items with logs","launch_number":0,"launch_start_time":"2020-01-15 10:57:43","test_item":1,"test_item_name":"first test","unique_id":"unique1","cluster_id":"2727777272727727721","cluster_message":"","test_case_hash":-1126886180,"is_auto_analyzed":false,"issue_type":"ti001","log_time":"2020-01-15 10:57:43","log_level":40000,"original_message_lines":5,"original_message_words_number":9,"message":"Message \r\n Message 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator \r","is_merged":false,"start_time":"2020-01-15 10:57:43","merged_small_logs":"","detected_message":"Message \r\n Message 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator \r","detected_message_with_numbers":"Message 1 \r\n Message 2 \r\n Message 3 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator \r","stacktrace":"","only_numbers":"1 2 3","found_exceptions":"java.lang.NoClassDefFoundError","whole_message":"Message 1 \r\n Message 2 \r\n Message 3 'prod_en' \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator \n ","potential_status_codes":"","found_tests_and_methods":"","cluster_with_numbers":true,"urls":"","paths":"/src/prod/results.html","message_params":"prod_en","found_exceptions_extended":"java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError","detected_message_extended":"Message \r\n Message 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator impl.MandatoryAttributesValidator MandatoryAttributesValidator \r","detected_message_without_params_extended":"Message \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator impl.MandatoryAttributesValidator MandatoryAttributesValidator ","stacktrace_extended":"","message_extended":"Message \r\n Message 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator impl.MandatoryAttributesValidator MandatoryAttributesValidator \r","message_without_params_extended":"Message \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator impl.MandatoryAttributesValidator MandatoryAttributesValidator ","detected_message_without_params_and_brackets":"Message \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator","message_without_params_and_brackets":"Message \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator"} +{"launch_id":1234567892,"launch_name":"Launch with test items with logs","launch_number":0,"launch_start_time":"2020-01-15 10:57:43","test_item":1,"test_item_name":"first test","unique_id":"unique1","cluster_id":"2727777272727727721","cluster_message":"","test_case_hash":-1126886180,"is_auto_analyzed":false,"issue_type":"ti001","log_time":"2020-01-15 10:57:43","log_level":40000,"original_message":"Message 1 \n Message 2 \n Message 3 'prod_en' /src/prod/results.html \n java.lang.NoClassDefFoundError\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator$1@31ca8ab4 \n ca.canadiantire.steps.hybris.ws.WebserviceHybrisCustomerAndCartSteps$$EnhancerByCGLIB$$84837ae7.CGLIB$add_products_to_cart$16()","original_message_lines":5,"original_message_words_number":9,"message":"Message SPECIALNUMBER\n Message SPECIALNUMBER 'prod_en' /src/prod/results.html\n java.lang.NoClassDefFoundError\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator.SPECIALNUMBER","is_merged":false,"start_time":"2020-01-15 10:57:43","merged_small_logs":"","detected_message":"Message SPECIALNUMBER\n Message SPECIALNUMBER 'prod_en' /src/prod/results.html\n java.lang.NoClassDefFoundError\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator.SPECIALNUMBER","detected_message_with_numbers":"Message 1\n Message 2\n Message 3 'prod_en' /src/prod/results.html\n java.lang.NoClassDefFoundError\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator.1","stacktrace":"","only_numbers":"1 2 3","found_exceptions":"java.lang.NoClassDefFoundError","whole_message":"Message SPECIALNUMBER\nMessage SPECIALNUMBER\nMessage SPECIALNUMBER prod en src prod results html\njava lang NoClassDefFoundError\nde hybris platform servicelayer interceptor impl MandatoryAttributesValidator SPECIALNUMBER\n","potential_status_codes":"","found_tests_and_methods":"","cluster_with_numbers":true,"urls":"","paths":"/src/prod/results.html","message_params":"prod_en","found_exceptions_extended":"java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError","detected_message_extended":"Message 1\n Message 2\n Message 3 'prod_en' /src/prod/results.html\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator.1","detected_message_without_params_extended":"Message SPECIALNUMBER\nMessage SPECIALNUMBER prod en src prod results html\njava lang NoClassDefFoundError\nde hybris platform servicelayer interceptor impl MandatoryAttributesValidator SPECIALNUMBER","stacktrace_extended":"","message_extended":"Message SPECIALNUMBER\n Message SPECIALNUMBER 'prod_en' /src/prod/results.html\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator.SPECIALNUMBER MandatoryAttributesValidator.SPECIALNUMBER SPECIALNUMBER ","message_without_params_extended":"Message SPECIALNUMBER\n Message SPECIALNUMBER prod en src prod results html\n java lang NoClassDefFoundError\n de hybris platform servicelayer interceptor impl MandatoryAttributesValidator SPECIALNUMBER","detected_message_without_params_and_brackets":"Message SPECIALNUMBER\nMessage SPECIALNUMBER prod en src prod results html\njava lang NoClassDefFoundError\nde hybris platform servicelayer interceptor impl MandatoryAttributesValidator SPECIALNUMBER","message_without_params_and_brackets":"Message SPECIALNUMBER\n Message SPECIALNUMBER prod en src prod results html\n java lang NoClassDefFoundError\n de hybris platform servicelayer interceptor impl MandatoryAttributesValidator SPECIALNUMBER"} {"index":{"_index":"2","_id":2}} -{"launch_id":1234567892,"launch_name":"Launch with test items with logs","launch_number":0,"launch_start_time":"2020-01-15 10:57:43","test_item":1,"test_item_name":"first test","unique_id":"unique1","cluster_id":"0","cluster_message":"","test_case_hash":-1126886180,"is_auto_analyzed":false,"issue_type":"ti001","log_time":"2020-01-15 10:57:43","log_level":40000,"original_message_lines":4,"original_message_words_number":7,"message":"Message \r\n Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r","is_merged":false,"start_time":"2020-01-15 10:57:43","merged_small_logs":"","detected_message":"Message \r\n Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r","detected_message_with_numbers":"Message 2 \r\n Message 4 \r\n Message 5 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r","stacktrace":"","only_numbers":"2 4 5","found_exceptions":"java.lang.NoClassDefFoundError","whole_message":"Message 2 \r\n Message 4 \r\n Message 5 \r\n java.lang.NoClassDefFoundError \n ","potential_status_codes":"","found_tests_and_methods":"","cluster_with_numbers":false,"urls":"http:localhost/admin","paths":"","message_params":"","found_exceptions_extended":"java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError","detected_message_extended":"Message \r\n Message http : localhost/admin \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \r","detected_message_without_params_extended":"Message \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError ","stacktrace_extended":"","message_extended":"Message \r\n Message http : localhost/admin \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError \r","message_without_params_extended":"Message \r\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError ","detected_message_without_params_and_brackets":"Message \r\n java.lang.NoClassDefFoundError","message_without_params_and_brackets":"Message \r\n java.lang.NoClassDefFoundError"} +{"launch_id":1234567892,"launch_name":"Launch with test items with logs","launch_number":0,"launch_start_time":"2020-01-15 10:57:43","test_item":1,"test_item_name":"first test","unique_id":"unique1","cluster_id":"0","cluster_message":"","test_case_hash":-1126886180,"is_auto_analyzed":false,"issue_type":"ti001","log_time":"2020-01-15 10:57:43","log_level":40000,"original_message":"Message 2 \n Message 4 \n Message 5 http:localhost/admin \n java.lang.NoClassDefFoundError\n For documentation on this error, please visit https://www.seleniumhq.org/exceptions/stale_element_reference.html","original_message_lines":4,"original_message_words_number":7,"message":"Message SPECIALNUMBER\n Message SPECIALNUMBER http : localhost/admin\n java.lang.NoClassDefFoundError","is_merged":false,"start_time":"2020-01-15 10:57:43","merged_small_logs":"","detected_message":"Message SPECIALNUMBER\n Message SPECIALNUMBER http : localhost/admin\n java.lang.NoClassDefFoundError","detected_message_with_numbers":"Message 2\n Message 4\n Message 5 http : localhost/admin\n java.lang.NoClassDefFoundError","stacktrace":"","only_numbers":"2 4 5","found_exceptions":"java.lang.NoClassDefFoundError","whole_message":"Message SPECIALNUMBER\nMessage SPECIALNUMBER\nMessage SPECIALNUMBER http localhost admin\njava lang NoClassDefFoundError\n","potential_status_codes":"","found_tests_and_methods":"","cluster_with_numbers":false,"urls":"","paths":"","message_params":"","found_exceptions_extended":"java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError","detected_message_extended":"Message 2\n Message 4\n Message 5 http : localhost/admin\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError ","detected_message_without_params_extended":"Message SPECIALNUMBER\nMessage SPECIALNUMBER http localhost admin\njava lang NoClassDefFoundError","stacktrace_extended":"","message_extended":"Message SPECIALNUMBER\n Message SPECIALNUMBER http : localhost/admin\n java.lang.NoClassDefFoundError lang.NoClassDefFoundError NoClassDefFoundError ","message_without_params_extended":"Message SPECIALNUMBER\n Message SPECIALNUMBER http localhost admin\n java lang NoClassDefFoundError","detected_message_without_params_and_brackets":"Message SPECIALNUMBER\nMessage SPECIALNUMBER http localhost admin\njava lang NoClassDefFoundError","message_without_params_and_brackets":"Message SPECIALNUMBER\n Message SPECIALNUMBER http localhost admin\n java lang NoClassDefFoundError"} diff --git a/test_res/fixtures/index_logs_rq_different_log_level.json b/test_res/fixtures/index_logs_rq_different_log_level.json index 6f25271e..688ee101 100644 --- a/test_res/fixtures/index_logs_rq_different_log_level.json +++ b/test_res/fixtures/index_logs_rq_different_log_level.json @@ -1,2 +1,2 @@ {"index":{"_index":"2","_id":1}} -{"launch_id":1234567892,"launch_name":"Launch with test items with logs","launch_number":0,"launch_start_time":"2020-01-15 10:57:43","test_item":1,"test_item_name":"first test","unique_id":"unique1","cluster_id":"0","cluster_message":"","test_case_hash":-1126886180,"is_auto_analyzed":false,"issue_type":"ti001","log_time":"2020-01-15 10:57:43","log_level":40000,"original_message_lines":3,"original_message_words_number":8,"message":" Message \n java.lang.reflect.Method.invoke(Method.java : ) \n message error caused by exception","is_merged":false,"start_time":"2020-01-15 10:57:43","merged_small_logs":"","detected_message":" Message \n message error caused by exception","detected_message_with_numbers":" Message 1 \n message error caused by exception","stacktrace":" java.lang.reflect.Method.invoke(Method.java : ) ","only_numbers":"1","found_exceptions":"","whole_message":"Message 1 \n message error caused by exception \n java.lang.reflect.Method.invoke(Method.java:) ","potential_status_codes":"","found_tests_and_methods":"","cluster_with_numbers":false,"urls":"","paths":"","message_params":"","found_exceptions_extended":"","detected_message_extended":" Message \n message error caused by exception","detected_message_without_params_extended":"Message \n message error caused by exception","stacktrace_extended":" java.lang.reflect.Method.invoke Method.invoke invoke (Method.java : ) ","message_extended":" Message \n java.lang.reflect.Method.invoke Method.invoke invoke (Method.java : ) \n message error caused by exception","message_without_params_extended":"Message \n java.lang.reflect.Method.invoke Method.invoke invoke (Method.java : ) \n message error caused by exception","detected_message_without_params_and_brackets":"Message \n message error caused by exception","message_without_params_and_brackets":"Message \n java.lang.reflect.Method.invoke \n message error caused by exception"} +{"launch_id":1234567892,"launch_name":"Launch with test items with logs","launch_number":0,"launch_start_time":"2020-01-15 10:57:43","test_item":1,"test_item_name":"first test","unique_id":"unique1","cluster_id":"0","cluster_message":"","test_case_hash":-1126886180,"is_auto_analyzed":false,"issue_type":"ti001","log_time":"2020-01-15 10:57:43","log_level":40000,"original_message":"Message 1 \n java.lang.reflect.Method.invoke(Method.java:498) \n message error caused by exception\n ... 34 more","original_message_lines":3,"original_message_words_number":8,"message":" Message SPECIALNUMBER \n java.lang.reflect.Method.invoke(Method.java : SPECIALNUMBER)\n message error caused by exception","is_merged":false,"start_time":"2020-01-15 10:57:43","merged_small_logs":"","detected_message":" Message SPECIALNUMBER \n message error caused by exception","detected_message_with_numbers":" Message 1 \n message error caused by exception","stacktrace":" java.lang.reflect.Method.invoke","only_numbers":"1","found_exceptions":"","whole_message":" Message SPECIALNUMBER\nmessage error caused by exception\n java.lang.reflect.Method.invoke","potential_status_codes":"","found_tests_and_methods":"","cluster_with_numbers":false,"urls":"","paths":"","message_params":"","found_exceptions_extended":"","detected_message_extended":" Message 1 \n message error caused by exception","detected_message_without_params_extended":" Message SPECIALNUMBER\nmessage error caused by exception","stacktrace_extended":" java.lang.reflect.Method.invoke Method.invoke invoke ","message_extended":" Message SPECIALNUMBER \n java.lang.reflect.Method.invoke Method.invoke invoke (Method.java : SPECIALNUMBER)\n message error caused by exception","message_without_params_extended":" Message SPECIALNUMBER \n java lang reflect Method invoke Method java SPECIALNUMBER \n message error caused by exception","detected_message_without_params_and_brackets":" Message SPECIALNUMBER\nmessage error caused by exception","message_without_params_and_brackets":" Message SPECIALNUMBER \n java lang reflect Method invoke Method java SPECIALNUMBER \n message error caused by exception"} diff --git a/test_res/fixtures/index_logs_rq_different_log_level_merged.json b/test_res/fixtures/index_logs_rq_different_log_level_merged.json index 82aeebf5..38d18f83 100644 --- a/test_res/fixtures/index_logs_rq_different_log_level_merged.json +++ b/test_res/fixtures/index_logs_rq_different_log_level_merged.json @@ -1,2 +1,2 @@ {"index":{"_index":"idx2","_type":"log","_id":"1_m"}} -{"issue_type":"AB001","launch_name":"Launch 1","launch_number":0,"log_level":40000,"original_message_lines":1,"original_message_words_number":2,"message":"","test_item":1,"test_item_name":"first test","start_time":"2020-01-15 10:57:43","unique_id":"unique1","test_case_hash":-1126886180,"detected_message":"","detected_message_with_numbers":"","only_numbers":"12","merged_small_logs":"message http localhost admin java.lang.noclassdeffounderror","stacktrace":"","urls":"http localhost admin","paths":"","message_params":"","potential_status_codes":"","found_exceptions":"java.lang.noclassdeffounderror","found_exceptions_extended":"java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror","found_tests_and_methods":"","stacktrace_extended":"","message_extended":"","detected_message_extended":"","detected_message_without_params_extended":"message java.lang lang.noclassdeffounderror noclassdeffounderror","message_without_params_extended":"","message_without_params_and_brackets":"","detected_message_without_params_and_brackets":"","whole_message":"Message 12 \r\n java.lang lang.NoClassDefFoundError\r\n","is_merged":true} +{"issue_type":"AB001","launch_name":"Launch 1","launch_number":0,"log_level":40000,"original_message_lines":1,"original_message_words_number":2,"message":"","test_item":1,"test_item_name":"first test","start_time":"2020-01-15 10:57:43","unique_id":"unique1","test_case_hash":-1126886180,"detected_message":"","detected_message_with_numbers":"","only_numbers":"12","merged_small_logs":"message specialnumber http localhost admin java.lang.noclassdeffounderror","stacktrace":"","urls":"http localhost admin","paths":"","message_params":"","potential_status_codes":"","found_exceptions":"java.lang.noclassdeffounderror","found_exceptions_extended":"java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror","found_tests_and_methods":"","stacktrace_extended":"","message_extended":"","detected_message_extended":"","detected_message_without_params_extended":"message specialnumber java.lang lang.noclassdeffounderror noclassdeffounderror","message_without_params_extended":"","message_without_params_and_brackets":"","detected_message_without_params_and_brackets":"","whole_message":"Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError\n","is_merged":true} diff --git a/test_res/fixtures/index_logs_rq_different_log_level_with_prefix.json b/test_res/fixtures/index_logs_rq_different_log_level_with_prefix.json index 3dfaf418..56e9ac11 100644 --- a/test_res/fixtures/index_logs_rq_different_log_level_with_prefix.json +++ b/test_res/fixtures/index_logs_rq_different_log_level_with_prefix.json @@ -1,2 +1,2 @@ {"index":{"_index":"rp_2","_id":1}} -{"launch_id":1234567892,"launch_name":"Launch with test items with logs","launch_number":0,"launch_start_time":"2020-01-15 10:57:43","test_item":1,"test_item_name":"first test","unique_id":"unique1","cluster_id":"0","cluster_message":"","test_case_hash":-1126886180,"is_auto_analyzed":false,"issue_type":"ti001","log_time":"2020-01-15 10:57:43","log_level":40000,"original_message_lines":3,"original_message_words_number":8,"message":" Message \n java.lang.reflect.Method.invoke(Method.java : ) \n message error caused by exception","is_merged":false,"start_time":"2020-01-15 10:57:43","merged_small_logs":"","detected_message":" Message \n message error caused by exception","detected_message_with_numbers":" Message 1 \n message error caused by exception","stacktrace":" java.lang.reflect.Method.invoke(Method.java : ) ","only_numbers":"1","found_exceptions":"","whole_message":"Message 1 \n message error caused by exception \n java.lang.reflect.Method.invoke(Method.java:) ","potential_status_codes":"","found_tests_and_methods":"","cluster_with_numbers":false,"urls":"","paths":"","message_params":"","found_exceptions_extended":"","detected_message_extended":" Message \n message error caused by exception","detected_message_without_params_extended":"Message \n message error caused by exception","stacktrace_extended":" java.lang.reflect.Method.invoke Method.invoke invoke (Method.java : ) ","message_extended":" Message \n java.lang.reflect.Method.invoke Method.invoke invoke (Method.java : ) \n message error caused by exception","message_without_params_extended":"Message \n java.lang.reflect.Method.invoke Method.invoke invoke (Method.java : ) \n message error caused by exception","detected_message_without_params_and_brackets":"Message \n message error caused by exception","message_without_params_and_brackets":"Message \n java.lang.reflect.Method.invoke \n message error caused by exception"} +{"launch_id":1234567892,"launch_name":"Launch with test items with logs","launch_number":0,"launch_start_time":"2020-01-15 10:57:43","test_item":1,"test_item_name":"first test","unique_id":"unique1","cluster_id":"0","cluster_message":"","test_case_hash":-1126886180,"is_auto_analyzed":false,"issue_type":"ti001","log_time":"2020-01-15 10:57:43","log_level":40000,"original_message":"Message 1 \n java.lang.reflect.Method.invoke(Method.java:498) \n message error caused by exception\n ... 34 more","original_message_lines":3,"original_message_words_number":8,"message":" Message SPECIALNUMBER \n java.lang.reflect.Method.invoke(Method.java : SPECIALNUMBER)\n message error caused by exception","is_merged":false,"start_time":"2020-01-15 10:57:43","merged_small_logs":"","detected_message":" Message SPECIALNUMBER \n message error caused by exception","detected_message_with_numbers":" Message 1 \n message error caused by exception","stacktrace":" java.lang.reflect.Method.invoke","only_numbers":"1","found_exceptions":"","whole_message":" Message SPECIALNUMBER\nmessage error caused by exception\n java.lang.reflect.Method.invoke","potential_status_codes":"","found_tests_and_methods":"","cluster_with_numbers":false,"urls":"","paths":"","message_params":"","found_exceptions_extended":"","detected_message_extended":" Message 1 \n message error caused by exception","detected_message_without_params_extended":" Message SPECIALNUMBER\nmessage error caused by exception","stacktrace_extended":" java.lang.reflect.Method.invoke Method.invoke invoke ","message_extended":" Message SPECIALNUMBER \n java.lang.reflect.Method.invoke Method.invoke invoke (Method.java : SPECIALNUMBER)\n message error caused by exception","message_without_params_extended":" Message SPECIALNUMBER \n java lang reflect Method invoke Method java SPECIALNUMBER \n message error caused by exception","detected_message_without_params_and_brackets":" Message SPECIALNUMBER\nmessage error caused by exception","message_without_params_and_brackets":" Message SPECIALNUMBER \n java lang reflect Method invoke Method java SPECIALNUMBER \n message error caused by exception"} diff --git a/test_res/fixtures/launch_w_items_clustering.json b/test_res/fixtures/launch_w_items_clustering.json index 8345d274..d043600d 100644 --- a/test_res/fixtures/launch_w_items_clustering.json +++ b/test_res/fixtures/launch_w_items_clustering.json @@ -21,10 +21,10 @@ "logs": [ {"logId": 3, "logLevel": 40000, - "message": "error occured"}, + "message": "error occurred"}, {"logId": 4, "logLevel": 40000, - "message": "error occured \r\n error found \r\n error mined"}] + "message": "error occurred \n error found \n error mined"}] }, {"testItemId": 5, "uniqueId": "df1", @@ -36,7 +36,7 @@ "logs": [ {"logId": 5, "logLevel": 40000, - "message": "error occured \r\n error found \r\n error mined"}] + "message": "error occurred \n error found \n error mined"}] }, {"testItemId": 6, "uniqueId": "df2", @@ -48,6 +48,6 @@ "logs": [ {"logId": 9, "logLevel": 40000, - "message": "error occured \r\n error found \r\n assert query"}] + "message": "error occurred \n error found \n assert query"}] }] } \ No newline at end of file diff --git a/test_res/fixtures/launch_w_items_clustering_with_different_errors.json b/test_res/fixtures/launch_w_items_clustering_with_different_errors.json index 898eddcb..ebd16191 100644 --- a/test_res/fixtures/launch_w_items_clustering_with_different_errors.json +++ b/test_res/fixtures/launch_w_items_clustering_with_different_errors.json @@ -21,10 +21,10 @@ "logs": [ {"logId": 3, "logLevel": 40000, - "message": "error occured"}, + "message": "error occurred"}, {"logId": 4, "logLevel": 40000, - "message": "AssertionError error occured \r\n error found \r\n error mined"}] + "message": "AssertionError error occurred \n error found \n error mined"}] }, {"testItemId": 5, "uniqueId": "df1", @@ -36,7 +36,7 @@ "logs": [ {"logId": 5, "logLevel": 40000, - "message": "AssertionError status code: 500 error occured \r\n error found \r\n error mined"}] + "message": "AssertionError status code: 500 error occurred \n error found \n error mined"}] }, {"testItemId": 6, "uniqueId": "df2", @@ -48,6 +48,6 @@ "logs": [ {"logId": 9, "logLevel": 40000, - "message": "NoSuchElementException error occured \r\n error found \r\n assert query"}] + "message": "NoSuchElementException error occurred \n error found \n assert query"}] }] } \ No newline at end of file diff --git a/test_res/fixtures/launch_w_small_logs_for_clustering.json b/test_res/fixtures/launch_w_small_logs_for_clustering.json index 0addceae..f57fc000 100644 --- a/test_res/fixtures/launch_w_small_logs_for_clustering.json +++ b/test_res/fixtures/launch_w_small_logs_for_clustering.json @@ -21,10 +21,10 @@ "logs": [ {"logId": 3, "logLevel": 40000, - "message": "error occured twice"}, + "message": "error occurred twice"}, {"logId": 4, "logLevel": 40000, - "message": "AssertionError error occured \r\n error found"}] + "message": "AssertionError error occurred \n error found"}] }, {"testItemId": 5, "uniqueId": "df1", @@ -36,7 +36,7 @@ "logs": [ {"logId": 5, "logLevel": 40000, - "message": "AssertionError status code: 500 error occured"}] + "message": "AssertionError status code: 500 error occurred"}] }, {"testItemId": 6, "uniqueId": "df2", @@ -48,6 +48,6 @@ "logs": [ {"logId": 9, "logLevel": 40000, - "message": "NoSuchElementException error occured \r\n error found \r\n assert query"}] + "message": "NoSuchElementException error occurred \n error found \n assert query"}] }] } \ No newline at end of file diff --git a/test_res/fixtures/launch_w_test_items_w_logs.json b/test_res/fixtures/launch_w_test_items_w_logs.json index b1382aec..bf0f0b20 100644 --- a/test_res/fixtures/launch_w_test_items_w_logs.json +++ b/test_res/fixtures/launch_w_test_items_w_logs.json @@ -17,13 +17,13 @@ "logId": 1, "logLevel": 40000, "logTime": [2020,1,15,10,57,43], - "message": "Message 1 \r\n Message 2 \r\n Message 3 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator$1@31ca8ab4 \r\n ca.canadiantire.steps.hybris.ws.WebserviceHybrisCustomerAndCartSteps$$EnhancerByCGLIB$$84837ae7.CGLIB$add_products_to_cart$16()" + "message": "Message 1 \n Message 2 \n Message 3 'prod_en' /src/prod/results.html \n java.lang.NoClassDefFoundError\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator$1@31ca8ab4 \n ca.canadiantire.steps.hybris.ws.WebserviceHybrisCustomerAndCartSteps$$EnhancerByCGLIB$$84837ae7.CGLIB$add_products_to_cart$16()" }, { "logId": 2, "logLevel": 40000, "logTime": [2020,1,15,10,57,43], - "message": "Message 2 \r\n Message 4 \r\n Message 5 http:localhost/admin \r\n java.lang.NoClassDefFoundError\r\n For documentation on this error, please visit https://www.seleniumhq.org/exceptions/stale_element_reference.html" + "message": "Message 2 \n Message 4 \n Message 5 http:localhost/admin \n java.lang.NoClassDefFoundError\n For documentation on this error, please visit https://www.seleniumhq.org/exceptions/stale_element_reference.html" } ] } diff --git a/test_res/fixtures/launch_w_test_items_w_logs_to_be_merged.json b/test_res/fixtures/launch_w_test_items_w_logs_to_be_merged.json index 175f9e0b..3a84848b 100644 --- a/test_res/fixtures/launch_w_test_items_w_logs_to_be_merged.json +++ b/test_res/fixtures/launch_w_test_items_w_logs_to_be_merged.json @@ -18,7 +18,7 @@ { "logId": 3, "logLevel": 40000, - "message": "Message 3 \r\n Message 4" + "message": "Message 3 \n Message 4" } ] } diff --git a/test_res/fixtures/launch_w_test_items_w_logs_with_clusters.json b/test_res/fixtures/launch_w_test_items_w_logs_with_clusters.json index fcffffb5..0806d369 100644 --- a/test_res/fixtures/launch_w_test_items_w_logs_with_clusters.json +++ b/test_res/fixtures/launch_w_test_items_w_logs_with_clusters.json @@ -18,18 +18,18 @@ "logLevel": 40000, "logTime": [2020,1,15,10,57,43], "clusterId": 2727777272727727721, - "message": "Message 1 \r\n Message 2 \r\n Message 3 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator$1@31ca8ab4 \r\n ca.canadiantire.steps.hybris.ws.WebserviceHybrisCustomerAndCartSteps$$EnhancerByCGLIB$$84837ae7.CGLIB$add_products_to_cart$16()" + "message": "Message 1 \n Message 2 \n Message 3 'prod_en' /src/prod/results.html \n java.lang.NoClassDefFoundError\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator$1@31ca8ab4 \n ca.canadiantire.steps.hybris.ws.WebserviceHybrisCustomerAndCartSteps$$EnhancerByCGLIB$$84837ae7.CGLIB$add_products_to_cart$16()" }, { "logId": 2, "logLevel": 40000, "logTime": [2020,1,15,10,57,43], - "message": "Message 2 \r\n Message 4 \r\n Message 5 http:localhost/admin \r\n java.lang.NoClassDefFoundError\r\n For documentation on this error, please visit https://www.seleniumhq.org/exceptions/stale_element_reference.html" + "message": "Message 2 \n Message 4 \n Message 5 http:localhost/admin \n java.lang.NoClassDefFoundError\n For documentation on this error, please visit https://www.seleniumhq.org/exceptions/stale_element_reference.html" } ] } ], "clusters": { - "272777727272772772": "Message 1 \r\n Message 2" + "272777727272772772": "Message 1 \n Message 2" } }] \ No newline at end of file diff --git a/test_res/fixtures/one_hit_search_rs.json b/test_res/fixtures/one_hit_search_rs.json index 0497bb86..e5ff0a72 100644 --- a/test_res/fixtures/one_hit_search_rs.json +++ b/test_res/fixtures/one_hit_search_rs.json @@ -26,14 +26,14 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 1, "test_item_name": "first test", "start_time": "2020-01-15 10:57:43", "unique_id": "unique1", "test_case_hash": -1126886180, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 12 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 12 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "12", "merged_small_logs": "", "stacktrace": "", @@ -45,13 +45,13 @@ "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "found_tests_and_methods": "", "stacktrace_extended": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_and_brackets": "Message 12 \r\n java.lang lang.NoClassDefFoundError", - "detected_message_without_params_and_brackets": "Message 12 \r\n java.lang lang.NoClassDefFoundError", - "whole_message": "Message 12 \r\n java.lang lang.NoClassDefFoundError" + "message_extended": "Message SPECIALNUMBER http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message 12 http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message SPECIALNUMBER\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message SPECIALNUMBER\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_and_brackets": "Message SPECIALNUMBER\n java.lang lang.NoClassDefFoundError", + "detected_message_without_params_and_brackets": "Message SPECIALNUMBER\n java.lang lang.NoClassDefFoundError", + "whole_message": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError" } } ] diff --git a/test_res/fixtures/one_hit_search_rs_clustering.json b/test_res/fixtures/one_hit_search_rs_clustering.json index 19ea7fd4..d39b37fb 100644 --- a/test_res/fixtures/one_hit_search_rs_clustering.json +++ b/test_res/fixtures/one_hit_search_rs_clustering.json @@ -26,14 +26,14 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "error occured \n error found \n error mined", - "whole_message": "error occured \n error found \n error mined", + "message": "error occurred \n error found \n error mined", + "whole_message": "error occurred \n error found \n error mined", "test_item": 12, "start_time": "2020-01-15 10:57:43", "unique_id": "unique1", "test_case_hash": -1126886180, - "detected_message": "error occured \n error found \n error mined", - "detected_message_with_numbers": "error occured \n error found \n error mined", + "detected_message": "error occurred \n error found \n error mined", + "detected_message_with_numbers": "error occurred \n error found \n error mined", "only_numbers": "", "merged_small_logs": "", "is_merged": "False", @@ -41,16 +41,16 @@ "urls": "", "paths": "", "cluster_id": "123", - "cluster_message": "error occured \n error found \n error mined", + "cluster_message": "error occurred \n error found \n error mined", "message_params": "", "found_exceptions": "", "potential_status_codes": "", "found_exceptions_extended": "", "stacktrace_extended": "", - "message_extended": "error occured \n error found \n error mined", - "detected_message_extended": "error occured \n error found \n error mined", - "detected_message_without_params_extended": "error occured \n error found \n error mined", - "message_without_params_extended": "error occured \n error found \n error mined" + "message_extended": "error occurred \n error found \n error mined", + "detected_message_extended": "error occurred \n error found \n error mined", + "detected_message_without_params_extended": "error occurred \n error found \n error mined", + "message_without_params_extended": "error occurred \n error found \n error mined" } } ] diff --git a/test_res/fixtures/one_hit_search_rs_search_logs.json b/test_res/fixtures/one_hit_search_rs_search_logs.json index 74e97cd3..21a83bc5 100644 --- a/test_res/fixtures/one_hit_search_rs_search_logs.json +++ b/test_res/fixtures/one_hit_search_rs_search_logs.json @@ -23,14 +23,14 @@ "issue_type": "AB001", "launch_name": "Launch 1", "log_level": 40000, - "merged_small_logs": "error occured once", + "merged_small_logs": "error occurred once", "message": "", "test_item": 1, "unique_id": "unique1", "test_case_hash": -1126886180, "stacktrace": "", - "detected_message": "error occured once", - "detected_message_with_numbers": "error occured once", + "detected_message": "error occurred once", + "detected_message_with_numbers": "error occurred once", "only_numbers": "", "potential_status_codes": "" } diff --git a/test_res/fixtures/search_logs_rq_first_group.json b/test_res/fixtures/search_logs_rq_first_group.json index 27c236c2..bb56d322 100644 --- a/test_res/fixtures/search_logs_rq_first_group.json +++ b/test_res/fixtures/search_logs_rq_first_group.json @@ -1 +1 @@ -{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occured error found error mined", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 2}}}], "should": [{"term": {"launch_id": 1}}, {"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file +{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occurred error found error mined", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 2}}}], "should": [{"term": {"launch_id": 1}}, {"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file diff --git a/test_res/fixtures/search_logs_rq_first_group_2lines.json b/test_res/fixtures/search_logs_rq_first_group_2lines.json index 44c07774..12852009 100644 --- a/test_res/fixtures/search_logs_rq_first_group_2lines.json +++ b/test_res/fixtures/search_logs_rq_first_group_2lines.json @@ -1 +1 @@ -{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occured error found", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 2}}}], "should": [{"term": {"launch_id": 1}}, {"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file +{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occurred error found", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 2}}}], "should": [{"term": {"launch_id": 1}}, {"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file diff --git a/test_res/fixtures/search_logs_rq_first_group_2lines_not_for_update.json b/test_res/fixtures/search_logs_rq_first_group_2lines_not_for_update.json index de690b28..367475e8 100644 --- a/test_res/fixtures/search_logs_rq_first_group_2lines_not_for_update.json +++ b/test_res/fixtures/search_logs_rq_first_group_2lines_not_for_update.json @@ -1 +1 @@ -{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occured error found", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 2}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file +{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occurred error found", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 2}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file diff --git a/test_res/fixtures/search_logs_rq_first_group_assertion_error.json b/test_res/fixtures/search_logs_rq_first_group_assertion_error.json index b76cf338..82587e9c 100644 --- a/test_res/fixtures/search_logs_rq_first_group_assertion_error.json +++ b/test_res/fixtures/search_logs_rq_first_group_assertion_error.json @@ -1 +1 @@ -{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "assertionerror error occured error found", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}, {"more_like_this": {"boost": 1.0, "fields": ["found_exceptions"], "like": "AssertionError", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 2}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file +{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "assertionerror error occurred error found", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}, {"more_like_this": {"boost": 1.0, "fields": ["found_exceptions"], "like": "AssertionError", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 2}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file diff --git a/test_res/fixtures/search_logs_rq_first_group_assertion_error_status_code.json b/test_res/fixtures/search_logs_rq_first_group_assertion_error_status_code.json index b61b09fc..fd722182 100644 --- a/test_res/fixtures/search_logs_rq_first_group_assertion_error_status_code.json +++ b/test_res/fixtures/search_logs_rq_first_group_assertion_error_status_code.json @@ -1 +1 @@ -{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "assertionerror status code 500 error occured error found", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<86%"}}, {"more_like_this": {"boost": 1.0, "fields": ["found_exceptions"], "like": "AssertionError", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}, {"more_like_this": {"boost": 1.0, "fields": ["potential_status_codes"], "like": "500", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 5}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file +{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "assertionerror status code specialnumber error occurred error found", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<86%"}}, {"more_like_this": {"boost": 1.0, "fields": ["found_exceptions"], "like": "AssertionError", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}, {"more_like_this": {"boost": 1.0, "fields": ["potential_status_codes"], "like": "500", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 5}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file diff --git a/test_res/fixtures/search_logs_rq_first_group_no_such_element.json b/test_res/fixtures/search_logs_rq_first_group_no_such_element.json index 1dbc6aed..67c87e14 100644 --- a/test_res/fixtures/search_logs_rq_first_group_no_such_element.json +++ b/test_res/fixtures/search_logs_rq_first_group_no_such_element.json @@ -1 +1 @@ -{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "nosuchelementexception error occured error found", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}, {"more_like_this": {"boost": 1.0, "fields": ["found_exceptions"], "like": "NoSuchElementException", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 6}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file +{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "nosuchelementexception error occurred error found", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}, {"more_like_this": {"boost": 1.0, "fields": ["found_exceptions"], "like": "NoSuchElementException", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 6}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file diff --git a/test_res/fixtures/search_logs_rq_first_group_no_such_element_all_log_lines.json b/test_res/fixtures/search_logs_rq_first_group_no_such_element_all_log_lines.json index 2d4e756d..138731de 100644 --- a/test_res/fixtures/search_logs_rq_first_group_no_such_element_all_log_lines.json +++ b/test_res/fixtures/search_logs_rq_first_group_no_such_element_all_log_lines.json @@ -1 +1 @@ -{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "nosuchelementexception error occured error found assert query", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<83%"}}, {"more_like_this": {"boost": 1.0, "fields": ["found_exceptions"], "like": "NoSuchElementException", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 6}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file +{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "nosuchelementexception error occurred error found assert query", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<83%"}}, {"more_like_this": {"boost": 1.0, "fields": ["found_exceptions"], "like": "NoSuchElementException", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 6}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file diff --git a/test_res/fixtures/search_logs_rq_first_group_not_for_update.json b/test_res/fixtures/search_logs_rq_first_group_not_for_update.json index 20dc016d..39c9ff5d 100644 --- a/test_res/fixtures/search_logs_rq_first_group_not_for_update.json +++ b/test_res/fixtures/search_logs_rq_first_group_not_for_update.json @@ -1 +1 @@ -{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occured error found error mined", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 2}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file +{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occurred error found error mined", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 2}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file diff --git a/test_res/fixtures/search_logs_rq_first_group_small_logs.json b/test_res/fixtures/search_logs_rq_first_group_small_logs.json index 46d9479c..2e7f79c5 100644 --- a/test_res/fixtures/search_logs_rq_first_group_small_logs.json +++ b/test_res/fixtures/search_logs_rq_first_group_small_logs.json @@ -1 +1 @@ -{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": true}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occured twice assertionerror error occured error found", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}, {"more_like_this": {"boost": 1.0, "fields": ["found_exceptions"], "like": "assertionerror", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 2}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file +{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": true}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occurred twice assertionerror error occurred error found", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}, {"more_like_this": {"boost": 1.0, "fields": ["found_exceptions"], "like": "assertionerror", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 2}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file diff --git a/test_res/fixtures/search_logs_rq_not_found.json b/test_res/fixtures/search_logs_rq_not_found.json index f20109ff..938e2f6c 100644 --- a/test_res/fixtures/search_logs_rq_not_found.json +++ b/test_res/fixtures/search_logs_rq_not_found.json @@ -1 +1 @@ -{"_source": ["message", "test_item", "detected_message", "stacktrace", "potential_status_codes", "merged_small_logs"], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": true}}], "must": [{"bool": {"should": [{"wildcard": {"issue_type": "TI*"}}, {"wildcard": {"issue_type": "ti*"}}]}}, {"terms": {"launch_id": [1]}}, {"more_like_this": {"boost": 1.0, "fields": ["merged_small_logs"], "like": "error occured", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 3}}}, {"wildcard": {"message": "*"}}], "should": [{"term": {"is_auto_analyzed": {"boost": 1.0, "value": "false"}}}]}}, "size": 1000, "sort": "_doc"} \ No newline at end of file +{"_source": ["message", "test_item", "detected_message", "stacktrace", "potential_status_codes", "merged_small_logs"], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": true}}], "must": [{"bool": {"should": [{"wildcard": {"issue_type": "TI*"}}, {"wildcard": {"issue_type": "ti*"}}]}}, {"terms": {"launch_id": [1]}}, {"more_like_this": {"boost": 1.0, "fields": ["merged_small_logs"], "like": "error occurred", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<95%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 3}}}, {"wildcard": {"message": "*"}}], "should": [{"term": {"is_auto_analyzed": {"boost": 1.0, "value": "false"}}}]}}, "size": 1000, "sort": "_doc"} \ No newline at end of file diff --git a/test_res/fixtures/search_logs_rq_second_group.json b/test_res/fixtures/search_logs_rq_second_group.json index c0449c9b..1cbb4206 100644 --- a/test_res/fixtures/search_logs_rq_second_group.json +++ b/test_res/fixtures/search_logs_rq_second_group.json @@ -1 +1 @@ -{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occured error found assert query", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 6}}}], "should": [{"term": {"launch_id": 1}}, {"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file +{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occurred error found assert query", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 6}}}], "should": [{"term": {"launch_id": 1}}, {"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file diff --git a/test_res/fixtures/search_logs_rq_second_group_not_for_update.json b/test_res/fixtures/search_logs_rq_second_group_not_for_update.json index bc05e615..e0abe548 100644 --- a/test_res/fixtures/search_logs_rq_second_group_not_for_update.json +++ b/test_res/fixtures/search_logs_rq_second_group_not_for_update.json @@ -1 +1 @@ -{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occured error found assert query", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 6}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file +{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "error occurred error found assert query", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 6}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file diff --git a/test_res/fixtures/search_logs_rq_second_group_small_logs.json b/test_res/fixtures/search_logs_rq_second_group_small_logs.json index a32b4d77..cdec6135 100644 --- a/test_res/fixtures/search_logs_rq_second_group_small_logs.json +++ b/test_res/fixtures/search_logs_rq_second_group_small_logs.json @@ -1 +1 @@ -{"query": {"function_score": {"boost_mode": "multiply", "functions": [{"exp": {"start_time": {"decay": 0.95, "offset": "1d", "origin": "2021-10-18 17:00:00", "scale": "7d"}}}, {"script_score": {"script": {"source": "0.2"}}}], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": true}}, {"term": {"cluster_with_numbers": true}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"boost": 1.0, "fields": ["whole_message"], "like": "assertionerror status code 500 error occured", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<83%"}}, {"more_like_this": {"boost": 1.0, "fields": ["found_exceptions"], "like": "assertionerror", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}, {"more_like_this": {"boost": 1.0, "fields": ["potential_status_codes"], "like": "500", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 5}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}]}}, "score_mode": "max"}}, "size": 10} \ No newline at end of file +{"size": 10, "query": {"function_score": {"query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": true}}, {"term": {"cluster_with_numbers": true}}], "must_not": [{"term": {"test_item": {"value": 5, "boost": 1.0}}}, {"term": {"launch_id": 1}}], "should": [{"term": {"launch_name": "Launch name"}}], "must": [{"wildcard": {"cluster_message": "*"}}, {"more_like_this": {"fields": ["whole_message"], "like": "assertionerror status code specialnumber error occurred", "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<83%", "max_query_terms": 50, "boost": 1.0}}, {"more_like_this": {"fields": ["found_exceptions"], "like": "assertionerror", "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1", "max_query_terms": 50, "boost": 1.0}}, {"more_like_this": {"fields": ["potential_status_codes"], "like": "500", "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1", "max_query_terms": 50, "boost": 1.0}}]}}, "functions": [{"exp": {"start_time": {"origin": "2021-10-18 17:00:00", "scale": "7d", "offset": "1d", "decay": 0.95}}}, {"script_score": {"script": {"source": "0.2"}}}], "score_mode": "max", "boost_mode": "multiply"}}} \ No newline at end of file diff --git a/test_res/fixtures/search_logs_rq_with_status_codes.json b/test_res/fixtures/search_logs_rq_with_status_codes.json index 1a959c8d..ca93fec0 100644 --- a/test_res/fixtures/search_logs_rq_with_status_codes.json +++ b/test_res/fixtures/search_logs_rq_with_status_codes.json @@ -1 +1 @@ -{"_source": ["message", "test_item", "detected_message", "stacktrace", "potential_status_codes", "merged_small_logs"], "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": true}}], "must": [{"bool": {"should": [{"wildcard": {"issue_type": "TI*"}}, {"wildcard": {"issue_type": "ti*"}}]}}, {"terms": {"launch_id": [1]}}, {"more_like_this": {"boost": 1.0, "fields": ["merged_small_logs"], "like": "error occured status code got", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}, {"more_like_this": {"boost": 1.0, "fields": ["potential_status_codes"], "like": "500 200", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "2"}}], "must_not": [{"term": {"test_item": {"boost": 1.0, "value": 3}}}, {"wildcard": {"message": "*"}}], "should": [{"term": {"is_auto_analyzed": {"boost": 1.0, "value": "false"}}}]}}, "size": 1000, "sort": "_doc"} \ No newline at end of file +{"_source": ["message", "test_item", "detected_message", "stacktrace", "potential_status_codes", "merged_small_logs"], "size": 1000, "query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": true}}], "must_not": [{"term": {"test_item": {"value": 3, "boost": 1.0}}}, {"wildcard": {"message": "*"}}], "must": [{"bool": {"should": [{"wildcard": {"issue_type": "TI*"}}, {"wildcard": {"issue_type": "ti*"}}]}}, {"terms": {"launch_id": [1]}}, {"more_like_this": {"fields": ["merged_small_logs"], "like": "error occurred status code specialnumber got", "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<83%", "max_query_terms": 50, "boost": 1.0}}, {"more_like_this": {"fields": ["potential_status_codes"], "like": "500 200", "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "2", "max_query_terms": 50, "boost": 1.0}}], "should": [{"term": {"is_auto_analyzed": {"value": "false", "boost": 1.0}}}]}}, "sort": "_doc"} \ No newline at end of file diff --git a/test_res/fixtures/search_rq_another_log.json b/test_res/fixtures/search_rq_another_log.json index 54ee59d3..f0a41f6f 100644 --- a/test_res/fixtures/search_rq_another_log.json +++ b/test_res/fixtures/search_rq_another_log.json @@ -1 +1 @@ -{"query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": true}}], "must": [{"more_like_this": {"boost": 2.0, "fields": ["message"], "like": "Message \r\n Message \r\n Message ", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}], "must_not": [{"wildcard": {"issue_type": "TI*"}}, {"wildcard": {"issue_type": "ti*"}}, {"wildcard": {"issue_type": "nd*"}}, {"wildcard": {"issue_type": "ND*"}}, {"term": {"test_item": 1}}], "should": [{"term": {"test_case_hash": {"boost": 2, "value": -1126886180}}}, {"term": {"is_auto_analyzed": {"boost": 2, "value": "false"}}}, {"term": {"launch_name": {"boost": 2, "value": "Launch with test items with logs"}}}, {"more_like_this": {"boost": 0.5, "fields": ["merged_small_logs"], "like": "", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}, {"more_like_this": {"boost": 4.0, "fields": ["detected_message"], "like": "Message \r\n Message \r\n Message ", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}, {"more_like_this": {"boost": 4.0, "fields": ["only_numbers"], "like": "2 4 5", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}]}}, "size": 10, "sort": ["_score", {"start_time": "desc"}]} \ No newline at end of file +{"query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": true}}], "must": [{"more_like_this": {"boost": 2.0, "fields": ["message"], "like": "Message \n Message \n Message ", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}], "must_not": [{"wildcard": {"issue_type": "TI*"}}, {"wildcard": {"issue_type": "ti*"}}, {"wildcard": {"issue_type": "nd*"}}, {"wildcard": {"issue_type": "ND*"}}, {"term": {"test_item": 1}}], "should": [{"term": {"test_case_hash": {"boost": 2, "value": -1126886180}}}, {"term": {"is_auto_analyzed": {"boost": 2, "value": "false"}}}, {"term": {"launch_name": {"boost": 2, "value": "Launch with test items with logs"}}}, {"more_like_this": {"boost": 0.5, "fields": ["merged_small_logs"], "like": "", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}, {"more_like_this": {"boost": 4.0, "fields": ["detected_message"], "like": "Message \n Message \n Message ", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}, {"more_like_this": {"boost": 4.0, "fields": ["only_numbers"], "like": "2 4 5", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}]}}, "size": 10, "sort": ["_score", {"start_time": "desc"}]} \ No newline at end of file diff --git a/test_res/fixtures/search_rq_filtered.json b/test_res/fixtures/search_rq_filtered.json index 165c96fa..9af00ae5 100644 --- a/test_res/fixtures/search_rq_filtered.json +++ b/test_res/fixtures/search_rq_filtered.json @@ -1 +1 @@ -{"query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": true}}], "must": [{"more_like_this": {"boost": 2.0, "fields": ["message"], "like": "Message AB \r\n Message AB \r\n Message AB", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}], "must_not": [{"wildcard": {"issue_type": "TI*"}}, {"wildcard": {"issue_type": "ti*"}}, {"wildcard": {"issue_type": "nd*"}}, {"wildcard": {"issue_type": "ND*"}}, {"term": {"test_item": 1}}], "should": [{"term": {"test_case_hash": {"boost": 2, "value": -1126886180}}}, {"term": {"is_auto_analyzed": {"boost": 2, "value": "false"}}}, {"term": {"launch_name": {"boost": 2, "value": "Launch with test items with logs"}}}, {"more_like_this": {"boost": 0.5, "fields": ["merged_small_logs"], "like": "", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}, {"more_like_this": {"boost": 4.0, "fields": ["detected_message"], "like": "Message AB \r\n Message AB \r\n Message AB", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}, {"more_like_this": {"boost": 4.0, "fields": ["only_numbers"], "like": "", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}]}}, "size": 10, "sort": ["_score", {"start_time": "desc"}]} \ No newline at end of file +{"query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": true}}], "must": [{"more_like_this": {"boost": 2.0, "fields": ["message"], "like": "Message AB \n Message AB \n Message AB", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}], "must_not": [{"wildcard": {"issue_type": "TI*"}}, {"wildcard": {"issue_type": "ti*"}}, {"wildcard": {"issue_type": "nd*"}}, {"wildcard": {"issue_type": "ND*"}}, {"term": {"test_item": 1}}], "should": [{"term": {"test_case_hash": {"boost": 2, "value": -1126886180}}}, {"term": {"is_auto_analyzed": {"boost": 2, "value": "false"}}}, {"term": {"launch_name": {"boost": 2, "value": "Launch with test items with logs"}}}, {"more_like_this": {"boost": 0.5, "fields": ["merged_small_logs"], "like": "", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}, {"more_like_this": {"boost": 4.0, "fields": ["detected_message"], "like": "Message AB \n Message AB \n Message AB", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}, {"more_like_this": {"boost": 4.0, "fields": ["only_numbers"], "like": "", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}]}}, "size": 10, "sort": ["_score", {"start_time": "desc"}]} \ No newline at end of file diff --git a/test_res/fixtures/search_rq_third.json b/test_res/fixtures/search_rq_third.json index b3258c86..034e1cba 100644 --- a/test_res/fixtures/search_rq_third.json +++ b/test_res/fixtures/search_rq_third.json @@ -1 +1 @@ -{"query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}], "must": [{"more_like_this": {"boost": 4.0, "fields": ["detected_message_without_params_and_brackets"], "like": "Message 1\r\n Message 2\r\n Message 3", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<60%"}}], "must_not": [{"term": {"issue_type": "ti001"}}, {"term": {"test_item": 123}}, {"wildcard": {"stacktrace_extended": "*"}}], "should": [{"term": {"test_case_hash": {"boost": 2, "value": -1126886180}}}, {"term": {"is_auto_analyzed": {"boost": 2, "value": "false"}}}, {"term": {"launch_name": {"boost": 2, "value": "Launch with test items with logs"}}}, {"more_like_this": {"boost": 0.5, "fields": ["merged_small_logs"], "like": "", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}, {"more_like_this": {"boost": 2.0, "fields": ["detected_message_without_params_extended"], "like": "Message \r", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}, {"more_like_this": {"boost": 2.0, "fields": ["only_numbers"], "like": "1 2 3", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}]}}, "size": 10, "sort": ["_score", {"start_time": "desc"}]} \ No newline at end of file +{"query": {"bool": {"filter": [{"range": {"log_level": {"gte": 40000}}}, {"exists": {"field": "issue_type"}}, {"term": {"is_merged": false}}], "must": [{"more_like_this": {"boost": 4.0, "fields": ["detected_message_without_params_and_brackets"], "like": "Message 1\n Message 2\n Message 3", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<60%"}}], "must_not": [{"term": {"issue_type": "ti001"}}, {"term": {"test_item": 123}}, {"wildcard": {"stacktrace_extended": "*"}}], "should": [{"term": {"test_case_hash": {"boost": 2, "value": -1126886180}}}, {"term": {"is_auto_analyzed": {"boost": 2, "value": "false"}}}, {"term": {"launch_name": {"boost": 2, "value": "Launch with test items with logs"}}}, {"more_like_this": {"boost": 0.5, "fields": ["merged_small_logs"], "like": "", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "5<80%"}}, {"more_like_this": {"boost": 2.0, "fields": ["detected_message_without_params_extended"], "like": "Message \r", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}, {"more_like_this": {"boost": 2.0, "fields": ["only_numbers"], "like": "1 2 3", "max_query_terms": 50, "min_doc_freq": 1, "min_term_freq": 1, "minimum_should_match": "1"}}]}}, "size": 10, "sort": ["_score", {"start_time": "desc"}]} \ No newline at end of file diff --git a/test_res/fixtures/suggest_boost_model_results.json b/test_res/fixtures/suggest_boost_model_results.json index dc3de7a3..4970532a 100644 --- a/test_res/fixtures/suggest_boost_model_results.json +++ b/test_res/fixtures/suggest_boost_model_results.json @@ -23,13 +23,21 @@ 1.0, 1.0, 1.0, + 0.89, 1.0, 1.0, - 0.88, 1.0, 1.0, 1.0, 0.0, + 0.0, + 0.75, + 1.0, + 0.29, + 0.0, + 1.0, + 1.0, + 0.0, 1.0, 0.8, 1.0, @@ -40,10 +48,7 @@ 1 ], [ - [ - 0.000761866569519043, - 0.999238133430481 - ] + [0.281916081905365, 0.718083918094635] ] ], "1": [ @@ -70,9 +75,17 @@ 1.0, 1.0, 1.0, + 0.89, + 1.0, + 1.0, 1.0, 1.0, - 0.88, + 1.0, + 0.0, + 0.0, + 0.75, + 1.0, + 0.58, 1.0, 0.0, 0.0, @@ -96,18 +109,26 @@ 0.0, 0.82, 0.58, - 0.47, + 0.43, 0.0, 0.49, 0.49, 0.49, 1.0, 1.0, + 1.0, 0.89, + 0.89, + 0.88, + 0.0, + 1.0, 0.0, 1.0, - 0.03, + 0.0, + 0.37, 0.68, + 0.29, + 0.0, 1.0, 1.0, 0.0, @@ -117,105 +138,103 @@ 1.0 ] ], + [1, 0], [ - 1, - 0 - ], - [ - [ - 0.06949764490127563, - 0.9305023550987244 - ], - [ - 0.9558268189430237, - 0.04417315497994423 - ] + [0.3793805241584778, 0.6206194758415222], + [0.9997096061706543, 0.0002903861168306321] ] ], "2": [ [ [ 1.0, - 1.0, - 1.0, - 1.0, - 0.67, - 0.5, - 0.99, - 0.0, - 1.0, - 0.0, - 0.0, - 0.82, - 0.87, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 0.88, - 1.0, - 0.0, - 0.0, - 1.0, - 0.82, - 0.8, - 1.0, - 2.0 + 1.0, + 1.0, + 1.0, + 0.67, + 0.5, + 0.99, + 0.0, + 1.0, + 0.0, + 0.0, + 0.82, + 0.87, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 0.89, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 0.0, + 0.0, + 0.75, + 1.0, + 0.58, + 1.0, + 0.0, + 0.0, + 1.0, + 0.82, + 0.8, + 1.0, + 2.0 ], [ 0.49, - 0.5, - 0.5, - 0.5, - 0.33, - 0.5, - 0.91, - 0.0, - 1.0, - 0.0, - 0.0, - 0.82, - 0.58, - 0.47, - 0.0, - 0.49, - 0.49, - 0.49, - 1.0, - 1.0, - 0.89, - 0.0, - 1.0, - 0.03, - 0.68, - 1.0, - 1.0, - 0.0, - 0.82, - 0.8, - 0.5, - 2.0 + 0.5, + 0.5, + 0.5, + 0.33, + 0.5, + 0.91, + 0.0, + 1.0, + 0.0, + 0.0, + 0.82, + 0.58, + 0.43, + 0.0, + 0.49, + 0.49, + 0.49, + 1.0, + 1.0, + 1.0, + 0.89, + 0.89, + 0.88, + 0.0, + 1.0, + 0.0, + 1.0, + 0.0, + 0.37, + 0.68, + 0.29, + 0.0, + 1.0, + 1.0, + 0.0, + 0.82, + 0.8, + 0.5, + 2.0 ] ], + [1, 0], [ - 1, - 0 - ], - [ - [ - 0.047748863697052, - 0.952251136302948 - ], - [ - 0.9549607634544373, - 0.045039255172014236 - ] + [0.3091089129447937, 0.6908910870552063], + [0.9995285272598267, 0.00047146008000709116] ] ], "3": [ @@ -240,14 +259,22 @@ 1.0, 1.0, 1.0, + 0.0, + 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, - 0.57, 1.0, 0.0, 0.0, + 0.91, + 1.0, + 1.0, + 0.0, + 0.0, + 0.0, 1.0, 1.0, 1.0, @@ -276,9 +303,17 @@ 1.0, 1.0, 1.0, + 1.0, + 1.0, + 1.0, + 0.0, + 1.0, 0.0, 1.0, 0.0, + 0.33, + 1.0, + 1.0, 1.0, 0.0, 0.0, @@ -289,19 +324,10 @@ 1.0 ] ], + [0, 0], [ - 1, - 1 - ], - [ - [ - 0.0011691451072692871, - 0.9988308548927307 - ], - [ - 0.002290487289428711, - 0.9977095127105713 - ] + [0.815524697303772, 0.18447527289390564], + [0.9980536699295044, 0.001946336473338306] ] ], "4": [ @@ -326,13 +352,21 @@ 1.0, 1.0, 1.0, + 1.0, 0.0, + 0.89, 1.0, 1.0, 1.0, - 0.88, 1.0, 1.0, + 0.0, + 0.0, + 0.75, + 1.0, + 0.29, + 0.0, + 1.0, 1.0, 0.0, 1.0, @@ -341,14 +375,9 @@ 1.0 ] ], + [1], [ - 1 - ], - [ - [ - 0.0006948709487915039, - 0.9993051290512085 - ] + [0.281916081905365, 0.718083918094635] ] ], "5": [ @@ -373,11 +402,19 @@ 1.0, 1.0, 1.0, + 1.0, 0.0, + 0.89, + 1.0, 1.0, 1.0, 1.0, - 0.88, + 1.0, + 0.0, + 0.0, + 0.75, + 1.0, + 0.58, 1.0, 0.0, 0.0, @@ -401,18 +438,26 @@ 0.0, 0.82, 0.58, - 0.47, + 0.43, 0.0, 0.49, 0.49, 0.49, 1.0, + 1.0, 0.0, 0.89, + 0.89, + 0.88, 0.0, 1.0, - 0.03, + 0.0, + 1.0, + 0.0, + 0.37, 0.68, + 0.29, + 0.0, 1.0, 1.0, 0.0, @@ -422,19 +467,10 @@ 1.0 ] ], + [1, 0], [ - 1, - 0 - ], - [ - [ - 0.06376421451568604, - 0.936235785484314 - ], - [ - 0.9525938630104065, - 0.0474061444401741 - ] + [0.3793805241584778, 0.6206194758415222], + [0.9997096061706543, 0.0002903861168306321] ] ], "6": [ @@ -459,11 +495,19 @@ 1.0, 1.0, 1.0, + 1.0, 0.0, + 0.89, 1.0, 1.0, 1.0, - 0.88, + 1.0, + 1.0, + 0.0, + 0.0, + 0.75, + 1.0, + 0.58, 1.0, 0.0, 0.0, @@ -487,18 +531,26 @@ 0.0, 0.82, 0.58, - 0.47, + 0.43, 0.0, 0.49, 0.49, 0.49, 1.0, + 1.0, 0.0, 0.89, + 0.89, + 0.88, 0.0, 1.0, - 0.03, + 0.0, + 1.0, + 0.0, + 0.37, 0.68, + 0.29, + 0.0, 1.0, 1.0, 0.0, @@ -508,19 +560,10 @@ 2.0 ] ], + [1, 0], [ - 1, - 0 - ], - [ - [ - 0.043725430965423584, - 0.9562745690345764 - ], - [ - 0.9516675472259521, - 0.04833242669701576 - ] + [0.3091089129447937, 0.6908910870552063], + [0.9995285272598267, 0.00047146008000709116] ] ], "7": [ @@ -546,13 +589,21 @@ 1.0, 1.0, 0.0, + 0.0, + 1.0, + 1.0, 1.0, 1.0, 1.0, - 0.57, 1.0, 0.0, 0.0, + 0.91, + 1.0, + 1.0, + 0.0, + 0.0, + 0.0, 1.0, 1.0, 1.0, @@ -579,13 +630,21 @@ 0.49, 0.49, 1.0, + 1.0, 0.0, 1.0, + 1.0, + 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, + 0.33, + 1.0, + 1.0, + 1.0, + 0.0, 0.0, 1.0, 0.82, @@ -594,19 +653,10 @@ 1.0 ] ], + [0, 0], [ - 1, - 1 - ], - [ - [ - 0.0010661482810974121, - 0.9989338517189026 - ], - [ - 0.002039194107055664, - 0.9979608058929443 - ] + [0.815524697303772, 0.18447527289390564], + [0.9980536699295044, 0.001946336473338306] ] ] } diff --git a/test_res/fixtures/suggest_test_item_info_w_logs.json b/test_res/fixtures/suggest_test_item_info_w_logs.json index fc4b99d7..765f02a7 100644 --- a/test_res/fixtures/suggest_test_item_info_w_logs.json +++ b/test_res/fixtures/suggest_test_item_info_w_logs.json @@ -8,7 +8,7 @@ "project": 1, "logs": [{ "logId": 178, - "message": "Message 1\r\n Message 2\r\n Message 3", + "message": "Message 1\n Message 2\n Message 3", "logLevel": 40000 }] } \ No newline at end of file diff --git a/test_res/fixtures/suggest_test_item_info_w_merged_logs.json b/test_res/fixtures/suggest_test_item_info_w_merged_logs.json index ff3339fb..2a339607 100644 --- a/test_res/fixtures/suggest_test_item_info_w_merged_logs.json +++ b/test_res/fixtures/suggest_test_item_info_w_merged_logs.json @@ -8,7 +8,7 @@ "project": 1, "logs": [{ "logId": 178, - "message": "Message 1\r\n Message 2", + "message": "Message 1\n Message 2", "logLevel": 40000 }] } \ No newline at end of file diff --git a/test_res/fixtures/three_hits_search_rs.json b/test_res/fixtures/three_hits_search_rs.json index 8e49846a..77387ff2 100644 --- a/test_res/fixtures/three_hits_search_rs.json +++ b/test_res/fixtures/three_hits_search_rs.json @@ -25,13 +25,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 1, "start_time": "2020-01-15 10:57:43", "unique_id": "unique4", "test_case_hash": 4, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 211 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 211 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "211", "merged_small_logs": "", "stacktrace": "", @@ -43,12 +43,12 @@ "found_tests_and_methods": "", "stacktrace_extended": "", "potential_status_codes": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "detected_message_without_params_and_brackets": "Message 211 \r\n java.lang lang.NoClassDefFoundError", - "message_without_params_and_brackets": "Message 211 \r\n java.lang lang.NoClassDefFoundError" + "message_extended": "Message SPECIALNUMBER http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message 211 http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "detected_message_without_params_and_brackets": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError", + "message_without_params_and_brackets": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError" } }, { @@ -62,13 +62,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 2, "start_time": "2020-01-15 10:57:43", "unique_id": "unique5", "test_case_hash": 5, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 234 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 234 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "234", "merged_small_logs": "", "stacktrace": "", @@ -80,12 +80,12 @@ "found_tests_and_methods": "", "stacktrace_extended": "", "potential_status_codes": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "detected_message_without_params_and_brackets": "Message 234 \r\n java.lang lang.NoClassDefFoundError", - "message_without_params_and_brackets": "Message 234 \r\n java.lang lang.NoClassDefFoundError" + "message_extended": "Message SPECIALNUMBER http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message 234 http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "detected_message_without_params_and_brackets": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError", + "message_without_params_and_brackets": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError" } }, { @@ -99,13 +99,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 3, "start_time": "2020-01-15 10:57:43", "unique_id": "unique6", "test_case_hash": 6, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 45 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 45 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "45", "merged_small_logs": "", "stacktrace": "", @@ -117,12 +117,12 @@ "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "found_tests_and_methods": "", "stacktrace_extended": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "detected_message_without_params_and_brackets": "Message 45 \r\n java.lang lang.NoClassDefFoundError", - "message_without_params_and_brackets": "Message 45 \r\n java.lang lang.NoClassDefFoundError" + "message_extended": "Message SPECIALNUMBER http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message 45 http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "detected_message_without_params_and_brackets": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError", + "message_without_params_and_brackets": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError" } } ] diff --git a/test_res/fixtures/three_hits_search_rs_with_duplicate.json b/test_res/fixtures/three_hits_search_rs_with_duplicate.json index 127a457c..2f0fdd5f 100644 --- a/test_res/fixtures/three_hits_search_rs_with_duplicate.json +++ b/test_res/fixtures/three_hits_search_rs_with_duplicate.json @@ -24,13 +24,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 3, "start_time": "2020-01-17 10:57:43", "unique_id": "unique1", "test_case_hash": -1126886180, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 211 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 211 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "211", "merged_small_logs": "", "stacktrace": "", @@ -40,12 +40,12 @@ "found_exceptions": "java.lang.noclassdeffounderror", "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "stacktrace_extended": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "detected_message_without_params_and_brackets": "Message 211\r\n java.lang lang.NoClassDefFoundError", - "message_without_params_and_brackets": "Message 211\r\n java.lang lang.NoClassDefFoundError" + "message_extended": "Message SPECIALNUMBER http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message 211 http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "detected_message_without_params_and_brackets": "Message SPECIALNUMBER\n java.lang lang.NoClassDefFoundError", + "message_without_params_and_brackets": "Message SPECIALNUMBER\n java.lang lang.NoClassDefFoundError" } }, { @@ -59,13 +59,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 2, "start_time": "2020-01-15 10:57:43", "unique_id": "unique1", "test_case_hash": -1126886180, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 234 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 234 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "234", "merged_small_logs": "", "stacktrace": "", @@ -75,12 +75,12 @@ "found_exceptions": "java.lang.noclassdeffounderror", "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "stacktrace_extended": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "detected_message_without_params_and_brackets": "Message 234\r\n java.lang lang.NoClassDefFoundError", - "message_without_params_and_brackets": "Message 234\r\n java.lang lang.NoClassDefFoundError" + "message_extended": "Message SPECIALNUMBER http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message 234 http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "detected_message_without_params_and_brackets": "Message SPECIALNUMBER\n java.lang lang.NoClassDefFoundError", + "message_without_params_and_brackets": "Message SPECIALNUMBER\n java.lang lang.NoClassDefFoundError" } }, { @@ -94,13 +94,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 1, "start_time": "2020-01-15 10:57:43", "unique_id": "unique1", "test_case_hash": -1126886180, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 234 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 234 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "234", "merged_small_logs": "", "stacktrace": "", @@ -110,12 +110,12 @@ "found_exceptions": "java.lang.noclassdeffounderror", "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "stacktrace_extended": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "detected_message_without_params_and_brackets": "Message 234\r\n java.lang lang.NoClassDefFoundError", - "message_without_params_and_brackets": "Message 234\r\n java.lang lang.NoClassDefFoundError" + "message_extended": "Message SPECIALNUMBER http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message 234 http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message SPECIALNUMBER\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "detected_message_without_params_and_brackets": "Message SPECIALNUMBER\n java.lang lang.NoClassDefFoundError", + "message_without_params_and_brackets": "Message SPECIALNUMBER\n java.lang lang.NoClassDefFoundError" } } ] diff --git a/test_res/fixtures/three_hits_search_rs_with_one_unique_id.json b/test_res/fixtures/three_hits_search_rs_with_one_unique_id.json index 1158b6e9..3bcb96ef 100644 --- a/test_res/fixtures/three_hits_search_rs_with_one_unique_id.json +++ b/test_res/fixtures/three_hits_search_rs_with_one_unique_id.json @@ -24,13 +24,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 1, "start_time": "2020-01-15 10:57:43", "unique_id": "unique4", "test_case_hash": 4, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 211 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 211 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "211", "merged_small_logs": "", "stacktrace": "", @@ -41,10 +41,10 @@ "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "stacktrace_extended": "", "potential_status_codes": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError " + "message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError " } }, { @@ -58,13 +58,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 2, "start_time": "2020-01-14 10:57:43", "unique_id": "unique4", "test_case_hash": 4, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 234 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 234 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "234", "merged_small_logs": "", "stacktrace": "", @@ -75,10 +75,10 @@ "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "stacktrace_extended": "", "potential_status_codes": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError " + "message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError " } }, { @@ -92,13 +92,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 3, "start_time": "2020-01-13 10:57:43", "unique_id": "unique4", "test_case_hash": 4, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 45 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 45 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "45", "merged_small_logs": "", "stacktrace": "", @@ -109,10 +109,10 @@ "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "stacktrace_extended": "", "potential_status_codes": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError " + "message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError " } } ] diff --git a/test_res/fixtures/three_hits_with_no_defect.json b/test_res/fixtures/three_hits_with_no_defect.json index b5c1dc37..2cf001c3 100644 --- a/test_res/fixtures/three_hits_with_no_defect.json +++ b/test_res/fixtures/three_hits_with_no_defect.json @@ -24,13 +24,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 1, "start_time": "2020-01-15 10:57:43", "unique_id": "unique4", "test_case_hash": 4, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 211 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 211 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "211", "merged_small_logs": "", "stacktrace": "", @@ -41,12 +41,12 @@ "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "stacktrace_extended": "", "potential_status_codes": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "detected_message_without_params_and_brackets": "Message 211 \r\n java.lang lang.NoClassDefFoundError", - "message_without_params_and_brackets": "Message 211 \r\n java.lang lang.NoClassDefFoundError" + "message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "detected_message_without_params_and_brackets": "Message 211 \n java.lang lang.NoClassDefFoundError", + "message_without_params_and_brackets": "Message 211 \n java.lang lang.NoClassDefFoundError" } }, { @@ -60,13 +60,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 2, "start_time": "2020-01-15 10:57:43", "unique_id": "unique5", "test_case_hash": 5, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 234 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 234 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "234", "merged_small_logs": "", "stacktrace": "", @@ -77,12 +77,12 @@ "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "stacktrace_extended": "", "potential_status_codes": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "detected_message_without_params_and_brackets": "Message 234 \r\n java.lang lang.NoClassDefFoundError", - "message_without_params_and_brackets": "Message 234 \r\n java.lang lang.NoClassDefFoundError" + "message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "detected_message_without_params_and_brackets": "Message 234 \n java.lang lang.NoClassDefFoundError", + "message_without_params_and_brackets": "Message 234 \n java.lang lang.NoClassDefFoundError" } }, { @@ -96,13 +96,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 3, "start_time": "2020-01-15 10:57:43", "unique_id": "unique6", "test_case_hash": 6, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 45 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 45 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "45", "merged_small_logs": "", "stacktrace": "", @@ -113,12 +113,12 @@ "found_exceptions": "java.lang.noclassdeffounderror", "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "stacktrace_extended": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "detected_message_without_params_and_brackets": "Message 45 \r\n java.lang lang.NoClassDefFoundError", - "message_without_params_and_brackets": "Message 45 \r\n java.lang lang.NoClassDefFoundError" + "message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "detected_message_without_params_and_brackets": "Message 45 \n java.lang lang.NoClassDefFoundError", + "message_without_params_and_brackets": "Message 45 \n java.lang lang.NoClassDefFoundError" } } ] diff --git a/test_res/fixtures/two_hits_search_rs.json b/test_res/fixtures/two_hits_search_rs.json index 2ae9b5a9..27dc9354 100644 --- a/test_res/fixtures/two_hits_search_rs.json +++ b/test_res/fixtures/two_hits_search_rs.json @@ -24,13 +24,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 1, "start_time": "2020-01-15 10:57:43", "unique_id": "unique2", "test_case_hash": -1126886181, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 34 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message SPECIALNUMBER http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 34 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "34", "merged_small_logs": "", "stacktrace": "", @@ -42,12 +42,12 @@ "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "found_tests_and_methods": "", "stacktrace_extended": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "detected_message_without_params_and_brackets": "Message 34 \r\n java.lang lang.NoClassDefFoundError", - "message_without_params_and_brackets": "Message 34 \r\n java.lang lang.NoClassDefFoundError" + "message_extended": "Message SPECIALNUMBER http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message 34 http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "detected_message_without_params_and_brackets": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError", + "message_without_params_and_brackets": "Message SPECIALNUMBER \n java.lang lang.NoClassDefFoundError" } }, { @@ -83,8 +83,8 @@ "detected_message_extended": "Message", "detected_message_without_params_extended": "Message", "message_without_params_extended": "Message", - "message_without_params_and_brackets": "Message 111", - "detected_message_without_params_and_brackets": "Message 111" + "message_without_params_and_brackets": "Message SPECIALNUMBER", + "detected_message_without_params_and_brackets": "Message SPECIALNUMBER" } } ] diff --git a/test_res/fixtures/two_hits_search_rs_search_logs.json b/test_res/fixtures/two_hits_search_rs_search_logs.json index bae1e097..78299fdb 100644 --- a/test_res/fixtures/two_hits_search_rs_search_logs.json +++ b/test_res/fixtures/two_hits_search_rs_search_logs.json @@ -23,14 +23,14 @@ "issue_type": "AB001", "launch_name": "Launch 1", "log_level": 40000, - "merged_small_logs": "error occured once", + "merged_small_logs": "error occurred once", "message": "", "test_item": 1, "unique_id": "unique1", "test_case_hash": -1126886180, "stacktrace": "", - "detected_message": "error occured once", - "detected_message_with_numbers": "error occured once", + "detected_message": "error occurred once", + "detected_message_with_numbers": "error occurred once", "only_numbers": "", "potential_status_codes": "" } @@ -44,14 +44,14 @@ "issue_type": "PB001", "launch_name": "Launch 1", "log_level": 40000, - "merged_small_logs": "error occured twice", + "merged_small_logs": "error occurred twice", "message": "", "test_item": 3, "unique_id": "unique1", "test_case_hash": -1126886180, "stacktrace": "", - "detected_message": "error occured twice", - "detected_message_with_numbers": "error occured twice", + "detected_message": "error occurred twice", + "detected_message_with_numbers": "error occurred twice", "only_numbers": "", "potential_status_codes": "" } diff --git a/test_res/fixtures/two_hits_search_rs_search_logs_with_status_codes.json b/test_res/fixtures/two_hits_search_rs_search_logs_with_status_codes.json index b317aaf3..1b8d8306 100644 --- a/test_res/fixtures/two_hits_search_rs_search_logs_with_status_codes.json +++ b/test_res/fixtures/two_hits_search_rs_search_logs_with_status_codes.json @@ -23,14 +23,14 @@ "issue_type": "AB001", "launch_name": "Launch 1", "log_level": 40000, - "merged_small_logs": "error occured once status code: but got", + "merged_small_logs": "error occurred once status code: but got", "message": "", "test_item": 3, "unique_id": "unique1", "test_case_hash": -1126886180, "stacktrace": "", - "detected_message": "error occured once", - "detected_message_with_numbers": "error occured once", + "detected_message": "error occurred once", + "detected_message_with_numbers": "error occurred once", "only_numbers": "", "potential_status_codes": "500 204" } @@ -44,14 +44,14 @@ "issue_type": "PB001", "launch_name": "Launch 1", "log_level": 40000, - "merged_small_logs": "error occured once status code: but got", + "merged_small_logs": "error occurred once status code: but got", "message": "", "test_item": 1, "unique_id": "unique1", "test_case_hash": -1126886180, "stacktrace": "", - "detected_message": "error occured twice", - "detected_message_with_numbers": "error occured twice", + "detected_message": "error occurred twice", + "detected_message_with_numbers": "error occurred twice", "only_numbers": "", "potential_status_codes": "500 200" } diff --git a/test_res/fixtures/two_hits_search_rs_second_message.json b/test_res/fixtures/two_hits_search_rs_second_message.json index 0a648e1e..34097cec 100644 --- a/test_res/fixtures/two_hits_search_rs_second_message.json +++ b/test_res/fixtures/two_hits_search_rs_second_message.json @@ -24,13 +24,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message \r\n Message \r\n Message 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator", + "message": "Message \n Message \n Message 'prod_en' /src/prod/results.html \n java.lang.NoClassDefFoundError\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator", "test_item": 1, "start_time": "2020-01-15 10:57:43", "unique_id": "unique2", "test_case_hash": -1126886181, - "detected_message": "Message \r\n Message \r\n Message 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator", - "detected_message_with_numbers": "Message 1 \r\n Message 2 \r\n Message 3 'prod_en' /src/prod/results.html \r\n java.lang.NoClassDefFoundError\r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator", + "detected_message": "Message \n Message \n Message 'prod_en' /src/prod/results.html \n java.lang.NoClassDefFoundError\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator", + "detected_message_with_numbers": "Message 1 \n Message 2 \n Message 3 'prod_en' /src/prod/results.html \n java.lang.NoClassDefFoundError\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator", "only_numbers": "34", "merged_small_logs": "", "stacktrace": "", @@ -42,12 +42,12 @@ "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "found_tests_and_methods": "", "stacktrace_extended": "", - "message_extended": "Message prod_en \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message prod_en \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "detected_message_without_params_and_brackets": "Message \r\n java.lang lang.NoClassDefFoundError de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator", - "message_without_params_and_brackets": "Message \r\n java.lang lang.NoClassDefFoundError de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator" + "message_extended": "Message prod_en \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message prod_en \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message \n de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "detected_message_without_params_and_brackets": "Message \n java.lang lang.NoClassDefFoundError de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator", + "message_without_params_and_brackets": "Message \n java.lang lang.NoClassDefFoundError de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator" } }, { diff --git a/test_res/fixtures/two_hits_search_with_big_messages_rs.json b/test_res/fixtures/two_hits_search_with_big_messages_rs.json index 9a0cd6b1..8a370dd6 100644 --- a/test_res/fixtures/two_hits_search_with_big_messages_rs.json +++ b/test_res/fixtures/two_hits_search_with_big_messages_rs.json @@ -23,13 +23,13 @@ "issue_type": "AB001", "launch_name": "Launch 1", "log_level": 40000, - "message": "Message AB \r\n Message AB \r\n Message AB", + "message": "Message AB \n Message AB \n Message AB", "original_message_lines": 3, "original_message_words_number": 2, "test_item": 1, "unique_id": "unique1", "test_case_hash": -1126886180, - "detected_message": "Message AB \r\n Message AB", + "detected_message": "Message AB \n Message AB", "merged_small_logs": "" } }, @@ -42,13 +42,13 @@ "issue_type": "PB001", "launch_name": "Launch 1", "log_level": 40000, - "message": "Message PB \r\n Message PB \r\n Message PB", + "message": "Message PB \n Message PB \n Message PB", "original_message_lines": 3, "original_message_words_number": 2, "test_item": 1, "unique_id": "unique1", "test_case_hash": -1126886180, - "detected_message": "Message AB \r\n Message AB", + "detected_message": "Message AB \n Message AB", "merged_small_logs": "" } } diff --git a/test_res/fixtures/two_hits_with_no_defect.json b/test_res/fixtures/two_hits_with_no_defect.json index 307f8f64..979ae485 100644 --- a/test_res/fixtures/two_hits_with_no_defect.json +++ b/test_res/fixtures/two_hits_with_no_defect.json @@ -24,13 +24,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 34, "start_time": "2020-01-15 10:57:43", "unique_id": "unique3", "test_case_hash": -1126886182, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 34 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 34 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "34", "merged_small_logs": "", "stacktrace": "", @@ -41,12 +41,12 @@ "found_exceptions": "java.lang.noclassdeffounderror", "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "stacktrace_extended": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "detected_message_without_params_and_brackets": "Message 34 \r\n java.lang lang.NoClassDefFoundError", - "message_without_params_and_brackets": "Message 34 \r\n java.lang lang.NoClassDefFoundError" + "message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "detected_message_without_params_and_brackets": "Message 34 \n java.lang lang.NoClassDefFoundError", + "message_without_params_and_brackets": "Message 34 \n java.lang lang.NoClassDefFoundError" } }, { @@ -60,13 +60,13 @@ "log_level": 40000, "original_message_lines": 1, "original_message_words_number": 2, - "message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", "test_item": 23, "start_time": "2020-01-15 10:57:43", "unique_id": "unique3", "test_case_hash": -1126886182, - "detected_message": "Message http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", - "detected_message_with_numbers": "Message 34 http : localhost/admin \r\n java.lang.NoClassDefFoundError\r", + "detected_message": "Message http : localhost/admin \n java.lang.NoClassDefFoundError\r", + "detected_message_with_numbers": "Message 34 http : localhost/admin \n java.lang.NoClassDefFoundError\r", "only_numbers": "34", "merged_small_logs": "", "stacktrace": "", @@ -77,12 +77,12 @@ "found_exceptions": "java.lang.noclassdeffounderror", "found_exceptions_extended": "java.lang.noclassdeffounderror lang.noclassdeffounderror noclassdeffounderror", "stacktrace_extended": "", - "message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_extended": "Message http : localhost/admin \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", - "detected_message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "message_without_params_extended": "Message \r\n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", - "detected_message_without_params_and_brackets": "Message 34 \r\n java.lang lang.NoClassDefFoundError", - "message_without_params_and_brackets": "Message 34 \r\n java.lang lang.NoClassDefFoundError" + "message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_extended": "Message http : localhost/admin \n java.lang lang.NoClassDefFoundError NoClassDefFoundError\r", + "detected_message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "message_without_params_extended": "Message \n java.lang lang.NoClassDefFoundError NoClassDefFoundError ", + "detected_message_without_params_and_brackets": "Message 34 \n java.lang lang.NoClassDefFoundError", + "message_without_params_and_brackets": "Message 34 \n java.lang lang.NoClassDefFoundError" } } ] diff --git a/test_res/test_logs/brackets_test.txt b/test_res/test_logs/brackets_test.txt new file mode 100644 index 00000000..399369ff --- /dev/null +++ b/test_res/test_logs/brackets_test.txt @@ -0,0 +1,5 @@ +[this is a test] +(this is a test) +{this is a test} + at com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.provide(SaveClusterDataPartProvider.java:30) + at com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.provide() diff --git a/test_res/test_logs/brackets_test_results.txt b/test_res/test_logs/brackets_test_results.txt new file mode 100644 index 00000000..b56441c3 --- /dev/null +++ b/test_res/test_logs/brackets_test_results.txt @@ -0,0 +1,5 @@ + + + + at com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.provide + at com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.provide diff --git a/test_res/test_logs/log_line_no_timestamp_current.txt b/test_res/test_logs/log_line_no_log_level.txt similarity index 80% rename from test_res/test_logs/log_line_no_timestamp_current.txt rename to test_res/test_logs/log_line_no_log_level.txt index 580f78de..946fcd6c 100644 --- a/test_res/test_logs/log_line_no_timestamp_current.txt +++ b/test_res/test_logs/log_line_no_log_level.txt @@ -1,6 +1,13 @@ 1 --- [ main] o.s.b.web.embedded.tomcat.TomcatStarter : Error starting Tomcat context. Exception: org.springframework.beans.factory.BeanCreationException. Message: Error creating bean with name 'servletEndpointRegistrar' defined in class path resource [org/springframework/boot/actuate/autoconfigure/endpoint/web/ServletEndpointManagementContextConfiguration$WebMvcServletEndpointManagementContextConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.boot.actuate.endpoint.web.ServletEndpointRegistrar]: Factory method 'servletEndpointRegistrar' threw exception; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'healthEndpoint' defined in class path resource [org/springframework/boot/actuate/autoconfigure/health/HealthEndpointConfiguration.class]: Unsatisfied dependency expressed through method 'healthEndpoint' parameter 0; nested exception is org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'healthContributorRegistry' defined in class path resource [org/springframework/boot/actuate/autoconfigure/health/HealthEndpointConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.boot.actuate.health.HealthContributorRegistry]: Factory method 'healthContributorRegistry' threw exception; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'rabbitHealthContributor' defined in class path resource [org/springframework/boot/actuate/autoconfigure/amqp/RabbitHealthContributorAutoConfiguration.class]: Unsatisfied dependency expressed through method 'rabbitHealthContributor' parameter 0; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'rabbitTemplate' defined in class path resource [com/epam/ta/reportportal/core/configs/rabbit/RabbitMqConfiguration.class]: Unsatisfied dependency expressed through method 'rabbitTemplate' parameter 0; nested exception is org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'connectionFactory' defined in class path resource [com/epam/ta/reportportal/core/configs/rabbit/RabbitMqConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.amqp.rabbit.connection.ConnectionFactory]: Factory method 'connectionFactory' threw exception; nested exception is com.epam.ta.reportportal.exception.ReportPortalException: Unclassified Report Portal Error -15:33:19,880 - INFO - analyzerApp - Starting waiting for AMQP connection +analyzerApp - Starting waiting for AMQP connection 1 --- [nio-8585-exec-3] c.e.t.r.c.e.rest.RestExceptionHandler : Resolved [com.epam.ta.reportportal.exception.ReportPortalException: Launch '1054' not found. Did you use correct Launch ID?] 1 --- [ main] o.hibernate.annotations.common.Version : HCANN000001: Hibernate Commons Annotations {5.1.0.Final} 1 --- [ main] edFilterInvocationSecurityMetadataSource : Adding web access control expression [permitAll] for Ant [pattern='/api-docs/**'] -14:49:37,738 [TestNG-test=Json logging-1] INFO com.epam.reportportal.example.testng.log4j.logging.JsonLoggingTest - com.epam.reportportal.message.ReportPortalMessage@3ba778b0 +[TestNG-test=Json logging-1] INFO com.epam.reportportal.example.testng.log4j.logging.JsonLoggingTest - com.epam.reportportal.message.ReportPortalMessage@3ba778b0 +[ NotificationRestService] - Get notifications related entity by entityId [#22:1923] +[pool-1-thread-9 for channel id=171242] INFO - -- Status: SUCCESS +Page text for USD_PageSource was saved to file: DesktopClient_OutboundEmailGiven_ShouldBePossibleMoveEmailToWorkBin_PageSource_2024_02_19_14_23_29_749.xml +(Logger.java:41) - Verifying Health Notification(s) +base_page.ts:128:16 | waitForElementToBeEnabledSoft +Filtering dataframe by {'Member_ID': 'TST02677080-01', 'Population_ID': 'service_date_4'} +2024-03-02 03:46:05,366 - //button[@data-test='create-btn'] should be visible, 10 s. - FAIL diff --git a/test_res/test_logs/log_line_no_thread_id.txt b/test_res/test_logs/log_line_no_thread_id.txt new file mode 100644 index 00000000..dce91c01 --- /dev/null +++ b/test_res/test_logs/log_line_no_thread_id.txt @@ -0,0 +1,13 @@ +[ main] o.s.b.web.embedded.tomcat.TomcatStarter : Error starting Tomcat context. Exception: org.springframework.beans.factory.BeanCreationException. Message: Error creating bean with name 'servletEndpointRegistrar' defined in class path resource [org/springframework/boot/actuate/autoconfigure/endpoint/web/ServletEndpointManagementContextConfiguration$WebMvcServletEndpointManagementContextConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.boot.actuate.endpoint.web.ServletEndpointRegistrar]: Factory method 'servletEndpointRegistrar' threw exception; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'healthEndpoint' defined in class path resource [org/springframework/boot/actuate/autoconfigure/health/HealthEndpointConfiguration.class]: Unsatisfied dependency expressed through method 'healthEndpoint' parameter 0; nested exception is org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'healthContributorRegistry' defined in class path resource [org/springframework/boot/actuate/autoconfigure/health/HealthEndpointConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.boot.actuate.health.HealthContributorRegistry]: Factory method 'healthContributorRegistry' threw exception; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'rabbitHealthContributor' defined in class path resource [org/springframework/boot/actuate/autoconfigure/amqp/RabbitHealthContributorAutoConfiguration.class]: Unsatisfied dependency expressed through method 'rabbitHealthContributor' parameter 0; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'rabbitTemplate' defined in class path resource [com/epam/ta/reportportal/core/configs/rabbit/RabbitMqConfiguration.class]: Unsatisfied dependency expressed through method 'rabbitTemplate' parameter 0; nested exception is org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'connectionFactory' defined in class path resource [com/epam/ta/reportportal/core/configs/rabbit/RabbitMqConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.amqp.rabbit.connection.ConnectionFactory]: Factory method 'connectionFactory' threw exception; nested exception is com.epam.ta.reportportal.exception.ReportPortalException: Unclassified Report Portal Error +analyzerApp - Starting waiting for AMQP connection +[nio-8585-exec-3] c.e.t.r.c.e.rest.RestExceptionHandler : Resolved [com.epam.ta.reportportal.exception.ReportPortalException: Launch '1054' not found. Did you use correct Launch ID?] +[ main] o.hibernate.annotations.common.Version : HCANN000001: Hibernate Commons Annotations {5.1.0.Final} +[ main] edFilterInvocationSecurityMetadataSource : Adding web access control expression [permitAll] for Ant [pattern='/api-docs/**'] +[TestNG-test=Json logging-1] INFO com.epam.reportportal.example.testng.log4j.logging.JsonLoggingTest - com.epam.reportportal.message.ReportPortalMessage@3ba778b0 +[ NotificationRestService] - Get notifications related entity by entityId [#22:1923] +[pool-1-thread-9 for channel id=171242] INFO - -- Status: SUCCESS +Page text for USD_PageSource was saved to file: DesktopClient_OutboundEmailGiven_ShouldBePossibleMoveEmailToWorkBin_PageSource_2024_02_19_14_23_29_749.xml +(Logger.java:41) - Verifying Health Notification(s) +base_page.ts:128:16 | waitForElementToBeEnabledSoft +Filtering dataframe by {'Member_ID': 'TST02677080-01', 'Population_ID': 'service_date_4'} +2024-03-02 03:46:05,366 - //button[@data-test='create-btn'] should be visible, 10 s. - FAIL diff --git a/test_res/test_logs/log_line_no_thread_name.txt b/test_res/test_logs/log_line_no_thread_name.txt new file mode 100644 index 00000000..4a4b4d49 --- /dev/null +++ b/test_res/test_logs/log_line_no_thread_name.txt @@ -0,0 +1,13 @@ +o.s.b.web.embedded.tomcat.TomcatStarter : Error starting Tomcat context. Exception: org.springframework.beans.factory.BeanCreationException. Message: Error creating bean with name 'servletEndpointRegistrar' defined in class path resource [org/springframework/boot/actuate/autoconfigure/endpoint/web/ServletEndpointManagementContextConfiguration$WebMvcServletEndpointManagementContextConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.boot.actuate.endpoint.web.ServletEndpointRegistrar]: Factory method 'servletEndpointRegistrar' threw exception; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'healthEndpoint' defined in class path resource [org/springframework/boot/actuate/autoconfigure/health/HealthEndpointConfiguration.class]: Unsatisfied dependency expressed through method 'healthEndpoint' parameter 0; nested exception is org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'healthContributorRegistry' defined in class path resource [org/springframework/boot/actuate/autoconfigure/health/HealthEndpointConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.boot.actuate.health.HealthContributorRegistry]: Factory method 'healthContributorRegistry' threw exception; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'rabbitHealthContributor' defined in class path resource [org/springframework/boot/actuate/autoconfigure/amqp/RabbitHealthContributorAutoConfiguration.class]: Unsatisfied dependency expressed through method 'rabbitHealthContributor' parameter 0; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'rabbitTemplate' defined in class path resource [com/epam/ta/reportportal/core/configs/rabbit/RabbitMqConfiguration.class]: Unsatisfied dependency expressed through method 'rabbitTemplate' parameter 0; nested exception is org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'connectionFactory' defined in class path resource [com/epam/ta/reportportal/core/configs/rabbit/RabbitMqConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.amqp.rabbit.connection.ConnectionFactory]: Factory method 'connectionFactory' threw exception; nested exception is com.epam.ta.reportportal.exception.ReportPortalException: Unclassified Report Portal Error +analyzerApp - Starting waiting for AMQP connection +c.e.t.r.c.e.rest.RestExceptionHandler : Resolved [com.epam.ta.reportportal.exception.ReportPortalException: Launch '1054' not found. Did you use correct Launch ID?] +o.hibernate.annotations.common.Version : HCANN000001: Hibernate Commons Annotations {5.1.0.Final} +edFilterInvocationSecurityMetadataSource : Adding web access control expression [permitAll] for Ant [pattern='/api-docs/**'] +INFO com.epam.reportportal.example.testng.log4j.logging.JsonLoggingTest - com.epam.reportportal.message.ReportPortalMessage@3ba778b0 +Get notifications related entity by entityId [#22:1923] +INFO - -- Status: SUCCESS +Page text for USD_PageSource was saved to file: DesktopClient_OutboundEmailGiven_ShouldBePossibleMoveEmailToWorkBin_PageSource_2024_02_19_14_23_29_749.xml +(Logger.java:41) - Verifying Health Notification(s) +base_page.ts:128:16 | waitForElementToBeEnabledSoft +Filtering dataframe by {'Member_ID': 'TST02677080-01', 'Population_ID': 'service_date_4'} +2024-03-02 03:46:05,366 - //button[@data-test='create-btn'] should be visible, 10 s. - FAIL diff --git a/test_res/test_logs/log_line_no_timestamp_right.txt b/test_res/test_logs/log_line_no_timestamp.txt similarity index 83% rename from test_res/test_logs/log_line_no_timestamp_right.txt rename to test_res/test_logs/log_line_no_timestamp.txt index 21f304cb..82cc5895 100644 --- a/test_res/test_logs/log_line_no_timestamp_right.txt +++ b/test_res/test_logs/log_line_no_timestamp.txt @@ -1,6 +1,13 @@ ERROR 1 --- [ main] o.s.b.web.embedded.tomcat.TomcatStarter : Error starting Tomcat context. Exception: org.springframework.beans.factory.BeanCreationException. Message: Error creating bean with name 'servletEndpointRegistrar' defined in class path resource [org/springframework/boot/actuate/autoconfigure/endpoint/web/ServletEndpointManagementContextConfiguration$WebMvcServletEndpointManagementContextConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.boot.actuate.endpoint.web.ServletEndpointRegistrar]: Factory method 'servletEndpointRegistrar' threw exception; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'healthEndpoint' defined in class path resource [org/springframework/boot/actuate/autoconfigure/health/HealthEndpointConfiguration.class]: Unsatisfied dependency expressed through method 'healthEndpoint' parameter 0; nested exception is org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'healthContributorRegistry' defined in class path resource [org/springframework/boot/actuate/autoconfigure/health/HealthEndpointConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.boot.actuate.health.HealthContributorRegistry]: Factory method 'healthContributorRegistry' threw exception; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'rabbitHealthContributor' defined in class path resource [org/springframework/boot/actuate/autoconfigure/amqp/RabbitHealthContributorAutoConfiguration.class]: Unsatisfied dependency expressed through method 'rabbitHealthContributor' parameter 0; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'rabbitTemplate' defined in class path resource [com/epam/ta/reportportal/core/configs/rabbit/RabbitMqConfiguration.class]: Unsatisfied dependency expressed through method 'rabbitTemplate' parameter 0; nested exception is org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'connectionFactory' defined in class path resource [com/epam/ta/reportportal/core/configs/rabbit/RabbitMqConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.amqp.rabbit.connection.ConnectionFactory]: Factory method 'connectionFactory' threw exception; nested exception is com.epam.ta.reportportal.exception.ReportPortalException: Unclassified Report Portal Error -- INFO - analyzerApp - Starting waiting for AMQP connection +INFO - analyzerApp - Starting waiting for AMQP connection WARN 1 --- [nio-8585-exec-3] c.e.t.r.c.e.rest.RestExceptionHandler : Resolved [com.epam.ta.reportportal.exception.ReportPortalException: Launch '1054' not found. Did you use correct Launch ID?] INFO 1 --- [ main] o.hibernate.annotations.common.Version : HCANN000001: Hibernate Commons Annotations {5.1.0.Final} DEBUG 1 --- [ main] edFilterInvocationSecurityMetadataSource : Adding web access control expression [permitAll] for Ant [pattern='/api-docs/**'] [TestNG-test=Json logging-1] INFO com.epam.reportportal.example.testng.log4j.logging.JsonLoggingTest - com.epam.reportportal.message.ReportPortalMessage@3ba778b0 +INFO [ NotificationRestService] - Get notifications related entity by entityId [#22:1923] +[pool-1-thread-9 for channel id=171242] INFO - -- Status: SUCCESS +INFO| Page text for USD_PageSource was saved to file: DesktopClient_OutboundEmailGiven_ShouldBePossibleMoveEmailToWorkBin_PageSource_2024_02_19_14_23_29_749.xml +INFO (Logger.java:41) - Verifying Health Notification(s) +base_page.ts:128:16 | waitForElementToBeEnabledSoft +INFO - Filtering dataframe by {'Member_ID': 'TST02677080-01', 'Population_ID': 'service_date_4'} +[INFO] 2024-03-02 03:46:05,366 - //button[@data-test='create-btn'] should be visible, 10 s. - FAIL diff --git a/test_res/test_logs/log_line_prepared.txt b/test_res/test_logs/log_line_prepared.txt new file mode 100644 index 00000000..22f6a4e6 --- /dev/null +++ b/test_res/test_logs/log_line_prepared.txt @@ -0,0 +1,13 @@ +o.s.b.web.embedded.tomcat.TomcatStarter : Error starting Tomcat context. Exception: org.springframework.beans.factory.BeanCreationException. Message: Error creating bean with name 'servletEndpointRegistrar' defined in class path resource [org/springframework/boot/actuate/autoconfigure/endpoint/web/ServletEndpointManagementContextConfiguration.WebMvcServletEndpointManagementContextConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.boot.actuate.endpoint.web.ServletEndpointRegistrar]: Factory method 'servletEndpointRegistrar' threw exception; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'healthEndpoint' defined in class path resource [org/springframework/boot/actuate/autoconfigure/health/HealthEndpointConfiguration.class]: Unsatisfied dependency expressed through method 'healthEndpoint' parameter 0; nested exception is org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'healthContributorRegistry' defined in class path resource [org/springframework/boot/actuate/autoconfigure/health/HealthEndpointConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.boot.actuate.health.HealthContributorRegistry]: Factory method 'healthContributorRegistry' threw exception; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'rabbitHealthContributor' defined in class path resource [org/springframework/boot/actuate/autoconfigure/amqp/RabbitHealthContributorAutoConfiguration.class]: Unsatisfied dependency expressed through method 'rabbitHealthContributor' parameter 0; nested exception is org.springframework.beans.factory.UnsatisfiedDependencyException: Error creating bean with name 'rabbitTemplate' defined in class path resource [com/epam/ta/reportportal/core/configs/rabbit/RabbitMqConfiguration.class]: Unsatisfied dependency expressed through method 'rabbitTemplate' parameter 0; nested exception is org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'connectionFactory' defined in class path resource [com/epam/ta/reportportal/core/configs/rabbit/RabbitMqConfiguration.class]: Bean instantiation via factory method failed; nested exception is org.springframework.beans.BeanInstantiationException: Failed to instantiate [org.springframework.amqp.rabbit.connection.ConnectionFactory]: Factory method 'connectionFactory' threw exception; nested exception is com.epam.ta.reportportal.exception.ReportPortalException: Unclassified Report Portal Error +analyzerApp - Starting waiting for AMQP connection +c.e.t.r.c.e.rest.RestExceptionHandler : Resolved [com.epam.ta.reportportal.exception.ReportPortalException: Launch '1054' not found. Did you use correct Launch ID?] +o.hibernate.annotations.common.Version : HCANN000001: Hibernate Commons Annotations {5.1.0.Final} +edFilterInvocationSecurityMetadataSource : Adding web access control expression [permitAll] for Ant [pattern='/api-docs/**'] +com.epam.reportportal.example.testng.log4j.logging.JsonLoggingTest - com.epam.reportportal.message.ReportPortalMessage +Get notifications related entity by entityId [#22:1923] +-- Status: SUCCESS +Page text for USD_PageSource was saved to file: DesktopClient_OutboundEmailGiven_ShouldBePossibleMoveEmailToWorkBin_PageSource_2024_02_19_14_23_29_749.xml +(Logger.java:41) - Verifying Health Notification(s) +base_page.ts:128:16 | waitForElementToBeEnabledSoft +Filtering dataframe by {'Member_ID': 'TST02677080-01', 'Population_ID': 'service_date_4'} +//button[@data-test='create-btn'] should be visible, 10 s. - FAIL diff --git a/test_res/test_logs/log_line.txt b/test_res/test_logs/log_line_timestamps.txt similarity index 83% rename from test_res/test_logs/log_line.txt rename to test_res/test_logs/log_line_timestamps.txt index 4bd49fe3..4258844b 100644 --- a/test_res/test_logs/log_line.txt +++ b/test_res/test_logs/log_line_timestamps.txt @@ -4,3 +4,10 @@ 2022-12-14 10:46:58.852 INFO 1 --- [ main] o.hibernate.annotations.common.Version : HCANN000001: Hibernate Commons Annotations {5.1.0.Final} 2023-05-30 12:47:43.985 DEBUG 1 --- [ main] edFilterInvocationSecurityMetadataSource : Adding web access control expression [permitAll] for Ant [pattern='/api-docs/**'] 2023-07-26 14:49:37,738 [TestNG-test=Json logging-1] INFO com.epam.reportportal.example.testng.log4j.logging.JsonLoggingTest - com.epam.reportportal.message.ReportPortalMessage@3ba778b0 +11:51:27 INFO [ NotificationRestService] - Get notifications related entity by entityId [#22:1923] +08:58:16.754 [pool-1-thread-9 for channel id=171242] INFO - -- Status: SUCCESS +2024-02-19 14:23:29.7652|INFO| Page text for USD_PageSource was saved to file: DesktopClient_OutboundEmailGiven_ShouldBePossibleMoveEmailToWorkBin_PageSource_2024_02_19_14_23_29_749.xml +[02:07:19] INFO (Logger.java:41) - Verifying Health Notification(s) +[2024-03-27 07:02:03] | base_page.ts:128:16 | waitForElementToBeEnabledSoft +2024-03-07 01:28:26,669 - INFO - Filtering dataframe by {'Member_ID': 'TST02677080-01', 'Population_ID': 'service_date_4'} +[INFO] 2024-03-02 03:46:05,366 - //button[@data-test='create-btn'] should be visible, 10 s. - FAIL diff --git a/test_res/test_logs/log_locator_with_attribute.txt b/test_res/test_logs/log_locator_with_attribute.txt new file mode 100644 index 00000000..a82c6ee0 --- /dev/null +++ b/test_res/test_logs/log_locator_with_attribute.txt @@ -0,0 +1 @@ +//button[@data-test='create-btn'] should be visible, 10 s. - FAIL diff --git a/test_res/test_logs/log_locator_with_attribute_prepared.txt b/test_res/test_logs/log_locator_with_attribute_prepared.txt new file mode 100644 index 00000000..a82c6ee0 --- /dev/null +++ b/test_res/test_logs/log_locator_with_attribute_prepared.txt @@ -0,0 +1 @@ +//button[@data-test='create-btn'] should be visible, 10 s. - FAIL diff --git a/test_res/test_logs/markdown/markdown_at_log.txt b/test_res/test_logs/markdown/markdown_at_log.txt new file mode 100644 index 00000000..3ef11130 --- /dev/null +++ b/test_res/test_logs/markdown/markdown_at_log.txt @@ -0,0 +1,16 @@ +!!!MARKDOWN_MODE!!! +And Eventually Text of "Web Element" should contain "My Web Element" +``` +AssertionError: expected 'Unknown Element' to include 'My Web Element' + at [root]/node_modules/@wk/cucumber-steps/step_definitions/content.js:58 + at Proxy.include (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai\lib\chai\core\assertions.js:585:10) + at doAsserterAsyncAndAddThen (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai-as-promised\lib\chai-as-promised.js:289:22) + at Proxy. (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai-as-promised\lib\chai-as-promised.js:272:28) + at Proxy.overwritingChainableMethodWrapper (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai\lib\chai\utils\overwriteChainableMethod.js:60:34) + at Proxy.chainableMethodWrapper (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai\lib\chai\utils\addChainableMethod.js:113:49) + at elementCheck (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\@wk\cucumber-steps\checks\content.js:453:65) + at Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\@wk\cucumber-steps\checks\content.js:388:9 + at Generator.next () + at fulfilled (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\@wk\cucumber-steps\node_modules\tslib\tslib.js:112:62) + at processTicksAndRejections (node:internal/process/task_queues:95:5) +``` diff --git a/test_res/test_logs/markdown/markdown_at_log_prepared.txt b/test_res/test_logs/markdown/markdown_at_log_prepared.txt new file mode 100644 index 00000000..e8a6fa0e --- /dev/null +++ b/test_res/test_logs/markdown/markdown_at_log_prepared.txt @@ -0,0 +1,15 @@ +And Eventually Text of "Web Element" should contain "My Web Element" +``` +AssertionError: expected 'Unknown Element' to include 'My Web Element' + at [root]/node_modules/@wk/cucumber-steps/step_definitions/content.js:58 + at Proxy.include (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai\lib\chai\core\assertions.js:585:10) + at doAsserterAsyncAndAddThen (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai-as-promised\lib\chai-as-promised.js:289:22) + at Proxy. (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai-as-promised\lib\chai-as-promised.js:272:28) + at Proxy.overwritingChainableMethodWrapper (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai\lib\chai\utils\overwriteChainableMethod.js:60:34) + at Proxy.chainableMethodWrapper (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai\lib\chai\utils\addChainableMethod.js:113:49) + at elementCheck (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\@wk\cucumber-steps\checks\content.js:453:65) + at Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\@wk\cucumber-steps\checks\content.js:388:9 + at Generator.next () + at fulfilled (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\@wk\cucumber-steps\node_modules\tslib\tslib.js:112:62) + at processTicksAndRejections (node:internal/process/task_queues:95:5) +``` diff --git a/test_res/test_logs/separators/fancy_separator_log.txt b/test_res/test_logs/separators/fancy_separator_log.txt new file mode 100644 index 00000000..c765c004 --- /dev/null +++ b/test_res/test_logs/separators/fancy_separator_log.txt @@ -0,0 +1,4 @@ +---=== SOFT ASSERT ERROR: ===--- +URL : https : //example.com/event/ +Reason : Check select values +Expected : contains list in any order diff --git a/test_res/test_logs/separators/fancy_separator_log_prepared.txt b/test_res/test_logs/separators/fancy_separator_log_prepared.txt new file mode 100644 index 00000000..7f16f86b --- /dev/null +++ b/test_res/test_logs/separators/fancy_separator_log_prepared.txt @@ -0,0 +1,4 @@ +TEXTDELIMITER SOFT ASSERT ERROR: TEXTDELIMITER +URL : https : //example.com/event/ +Reason : Check select values +Expected : contains list in any order diff --git a/test_res/test_logs/separators/markdown_separator_log.txt b/test_res/test_logs/separators/markdown_separator_log.txt new file mode 100644 index 00000000..3ef11130 --- /dev/null +++ b/test_res/test_logs/separators/markdown_separator_log.txt @@ -0,0 +1,16 @@ +!!!MARKDOWN_MODE!!! +And Eventually Text of "Web Element" should contain "My Web Element" +``` +AssertionError: expected 'Unknown Element' to include 'My Web Element' + at [root]/node_modules/@wk/cucumber-steps/step_definitions/content.js:58 + at Proxy.include (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai\lib\chai\core\assertions.js:585:10) + at doAsserterAsyncAndAddThen (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai-as-promised\lib\chai-as-promised.js:289:22) + at Proxy. (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai-as-promised\lib\chai-as-promised.js:272:28) + at Proxy.overwritingChainableMethodWrapper (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai\lib\chai\utils\overwriteChainableMethod.js:60:34) + at Proxy.chainableMethodWrapper (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai\lib\chai\utils\addChainableMethod.js:113:49) + at elementCheck (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\@wk\cucumber-steps\checks\content.js:453:65) + at Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\@wk\cucumber-steps\checks\content.js:388:9 + at Generator.next () + at fulfilled (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\@wk\cucumber-steps\node_modules\tslib\tslib.js:112:62) + at processTicksAndRejections (node:internal/process/task_queues:95:5) +``` diff --git a/test_res/test_logs/separators/markdown_separator_log_prepared.txt b/test_res/test_logs/separators/markdown_separator_log_prepared.txt new file mode 100644 index 00000000..9507ad91 --- /dev/null +++ b/test_res/test_logs/separators/markdown_separator_log_prepared.txt @@ -0,0 +1,14 @@ +!!!MARKDOWN_MODE!!! +And Eventually Text of "Web Element" should contain "My Web Element" TEXTDELIMITER +AssertionError: expected 'Unknown Element' to include 'My Web Element' + at [root]/node_modules/@wk/cucumber-steps/step_definitions/content.js:58 + at Proxy.include (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai\lib\chai\core\assertions.js:585:10) + at doAsserterAsyncAndAddThen (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai-as-promised\lib\chai-as-promised.js:289:22) + at Proxy. (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai-as-promised\lib\chai-as-promised.js:272:28) + at Proxy.overwritingChainableMethodWrapper (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai\lib\chai\utils\overwriteChainableMethod.js:60:34) + at Proxy.chainableMethodWrapper (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\chai\lib\chai\utils\addChainableMethod.js:113:49) + at elementCheck (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\@wk\cucumber-steps\checks\content.js:453:65) + at Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\@wk\cucumber-steps\checks\content.js:388:9 + at Generator.next () + at fulfilled (Z:\xml-data\build-dir\SM-TST-PRJ\node_modules\@wk\cucumber-steps\node_modules\tslib\tslib.js:112:62) + at processTicksAndRejections (node:internal/process/task_queues:95:5) TEXTDELIMITER diff --git a/test_res/test_logs/separators/mixed_markdown_separators.txt b/test_res/test_logs/separators/mixed_markdown_separators.txt new file mode 100644 index 00000000..9ca7929e --- /dev/null +++ b/test_res/test_logs/separators/mixed_markdown_separators.txt @@ -0,0 +1,20 @@ +!!!MARKDOWN_MODE!!! +-------------------------Then Following project hours are displayed for "Qwerty123 Employee" employee position:------------------------- + + +| Parent section | Section | Value | +|-| +| Billable | Reported hours | 10.5 | +| Billable | Overtime approved | 3 | +| Billable | Overtime submitted | 2 | +| Billable | On-duty hours | 10 | +| Billable | Overtime approved | 1 | +| Billable | Overtime submitted | 3 | +| Non-billable | Reported hours | 4.5 | +| Non-billable | Overtime approved | 1 | +| Non-billable | Overtime submitted | 3 | +| Non-billable | On-duty hours | 8 | +| Non-billable | Overtime approved | 3 | +| Non-billable | Overtime submitted | 1 | +| Leave | Taken | 8 | +| Leave | Requested | 4 | diff --git a/test_res/test_logs/separators/mixed_markdown_separators_prepared.txt b/test_res/test_logs/separators/mixed_markdown_separators_prepared.txt new file mode 100644 index 00000000..a906cae4 --- /dev/null +++ b/test_res/test_logs/separators/mixed_markdown_separators_prepared.txt @@ -0,0 +1,17 @@ +TEXTDELIMITER Then Following project hours are displayed for "Qwerty123 Employee" employee position: TEXTDELIMITER +| Parent section | Section | Value | +|-| +| Billable | Reported hours | 10.5 | +| Billable | Overtime approved | 3 | +| Billable | Overtime submitted | 2 | +| Billable | On-duty hours | 10 | +| Billable | Overtime approved | 1 | +| Billable | Overtime submitted | 3 | +| Non-billable | Reported hours | 4.5 | +| Non-billable | Overtime approved | 1 | +| Non-billable | Overtime submitted | 3 | +| Non-billable | On-duty hours | 8 | +| Non-billable | Overtime approved | 3 | +| Non-billable | Overtime submitted | 1 | +| Leave | Taken | 8 | +| Leave | Requested | 4 | diff --git a/test_res/test_logs/separators/step_separator_equality_log.txt b/test_res/test_logs/separators/step_separator_equality_log.txt new file mode 100644 index 00000000..29f05081 --- /dev/null +++ b/test_res/test_logs/separators/step_separator_equality_log.txt @@ -0,0 +1 @@ +=========================STEP FAILED========================= diff --git a/test_res/test_logs/separators/step_separator_log.txt b/test_res/test_logs/separators/step_separator_log.txt new file mode 100644 index 00000000..90f0b108 --- /dev/null +++ b/test_res/test_logs/separators/step_separator_log.txt @@ -0,0 +1 @@ +-------------------------STEP FAILED------------------------- diff --git a/test_res/test_logs/separators/step_separator_log_prepared.txt b/test_res/test_logs/separators/step_separator_log_prepared.txt new file mode 100644 index 00000000..1956aeba --- /dev/null +++ b/test_res/test_logs/separators/step_separator_log_prepared.txt @@ -0,0 +1 @@ +TEXTDELIMITER STEP FAILED TEXTDELIMITER diff --git a/test_res/test_logs/separators/step_separator_underscore_log.txt b/test_res/test_logs/separators/step_separator_underscore_log.txt new file mode 100644 index 00000000..0ce21eb9 --- /dev/null +++ b/test_res/test_logs/separators/step_separator_underscore_log.txt @@ -0,0 +1 @@ +_________________________STEP FAILED_________________________ diff --git a/test_res/test_logs/stacktraces/log_lines_class_referecences.txt b/test_res/test_logs/stacktraces/log_lines_class_referecences.txt new file mode 100644 index 00000000..2f411041 --- /dev/null +++ b/test_res/test_logs/stacktraces/log_lines_class_referecences.txt @@ -0,0 +1,2 @@ +com.epam.reportportal.message.ReportPortalMessage@3ba778b0 +org/springframework/boot/actuate/autoconfigure/endpoint/web/ServletEndpointManagementContextConfiguration$WebMvcServletEndpointManagementContextConfiguration.class diff --git a/test_res/test_logs/stacktraces/log_lines_class_referecences_prepared.txt b/test_res/test_logs/stacktraces/log_lines_class_referecences_prepared.txt new file mode 100644 index 00000000..37187e84 --- /dev/null +++ b/test_res/test_logs/stacktraces/log_lines_class_referecences_prepared.txt @@ -0,0 +1,2 @@ +com.epam.reportportal.message.ReportPortalMessage +org/springframework/boot/actuate/autoconfigure/endpoint/web/ServletEndpointManagementContextConfiguration.WebMvcServletEndpointManagementContextConfiguration.class diff --git a/test_res/test_logs/stacktraces/log_stacktrace_dotnet.txt b/test_res/test_logs/stacktraces/log_stacktrace_dotnet.txt new file mode 100644 index 00000000..eb91edff --- /dev/null +++ b/test_res/test_logs/stacktraces/log_stacktrace_dotnet.txt @@ -0,0 +1,9 @@ +Verify that for https://test.example.com/api/v1/User/reminders Response Codes are equal - Expected: 'BadRequest'; Actual: 'InternalServerError' +Original response: +{ "statusCode": 500, "message": "Internal server error", "activityId": "923c5632-dec4-4991-9e1f-71e6193f3172" } + Expected: BadRequest + But was: InternalServerError + + at Core.Common.VerifyThat.AreEqual(Object expected, Object actual, String message, Boolean logInfo, Boolean logWarn, StringBuilder warnMessage) in D:\path\TO\test\3\s\src\TestFramework\Core\Common\VerifyThat.cs:line 168 + at Core.Utils.VerifyBuilderResponseGeneric2`2.ResponseCodeIs(HttpStatusCode expectedHttpStatusCode, String cinemaId) in D:\path\TO\test\3\s\src\TestFramework\Core\Utils\VerifyBuilder.cs:line 316 + at Tests.User.GetUserRemindersTests.TestGetRemindersNotLoggedInUser() in D:\path\TO\test\3\s\src\TestFramework\Tests\User\GetUserRemindersTests.cs:line 115 diff --git a/test_res/test_logs/stacktraces/log_stacktrace_generated.txt b/test_res/test_logs/stacktraces/log_stacktrace_generated.txt new file mode 100644 index 00000000..a7812e01 --- /dev/null +++ b/test_res/test_logs/stacktraces/log_stacktrace_generated.txt @@ -0,0 +1,37 @@ +com.epam.ta.reportportal.exception.ReportPortalException: Impossible interact with integration. There are no analyzer services are deployed. + at com.epam.ta.reportportal.commons.validation.ErrorTypeBasedRuleValidator.verify(ErrorTypeBasedRuleValidator.java:32) + at com.epam.ta.reportportal.core.launch.cluster.pipeline.data.AnalyzerClusterDataProvider.provide(AnalyzerClusterDataProvider.java:47) + at com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.lambda$provide$0(SaveClusterDataPartProvider.java:44) + at java.base/java.util.Optional.flatMap(Optional.java:289) + at com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.provide(SaveClusterDataPartProvider.java:44) + at com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.provide(SaveClusterDataPartProvider.java:30) + at com.epam.ta.reportportal.pipeline.PipelineConstructor.lambda$construct$0(PipelineConstructor.java:34) + at java.base/java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:197) + at java.base/java.util.AbstractList$RandomAccessSpliterator.forEachRemaining(AbstractList.java:722) + at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509) + at java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499) + at java.base/java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:921) + at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) + at java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:682) + at com.epam.ta.reportportal.pipeline.PipelineConstructor.construct(PipelineConstructor.java:34) + at com.epam.ta.reportportal.core.launch.cluster.UniqueErrorGenerator.generateClusters(UniqueErrorGenerator.java:78) + at com.epam.ta.reportportal.core.launch.cluster.UniqueErrorGenerator.generate(UniqueErrorGenerator.java:60) + at com.epam.ta.reportportal.core.events.handler.item.TestItemUniqueErrorAnalysisRunner.handle(TestItemUniqueErrorAnalysisRunner.java:66) + at com.epam.ta.reportportal.core.events.handler.item.TestItemUniqueErrorAnalysisRunner.handle(TestItemUniqueErrorAnalysisRunner.java:37) + at com.epam.ta.reportportal.core.events.subscriber.impl.delegate.ProjectConfigDelegatingSubscriber.lambda$handleEvent$0(ProjectConfigDelegatingSubscriber.java:44) + at java.base/java.lang.Iterable.forEach(Iterable.java:75) + at com.epam.ta.reportportal.core.events.subscriber.impl.delegate.ProjectConfigDelegatingSubscriber.handleEvent(ProjectConfigDelegatingSubscriber.java:44) + at com.epam.ta.reportportal.core.events.subscriber.impl.delegate.ProjectConfigDelegatingSubscriber.handleEvent(ProjectConfigDelegatingSubscriber.java:29) + at com.epam.ta.reportportal.core.events.listener.TestItemIssueResolvedEventListener.lambda$onApplicationEvent$0(TestItemIssueResolvedEventListener.java:39) + at java.base/java.lang.Iterable.forEach(Iterable.java:75) + at com.epam.ta.reportportal.core.events.listener.TestItemIssueResolvedEventListener.onApplicationEvent(TestItemIssueResolvedEventListener.java:39) + at com.epam.ta.reportportal.core.events.listener.TestItemIssueResolvedEventListener$$FastClassBySpringCGLIB$$78c5649f.invoke() + at org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:218) + at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.invokeJoinpoint(CglibAopProxy.java:793) + at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:163) + at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.proceed(CglibAopProxy.java:763) + at org.springframework.aop.interceptor.AsyncExecutionInterceptor.lambda$invoke$0(AsyncExecutionInterceptor.java:115) + at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317) + at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) + at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) + at java.base/java.lang.Thread.run(Thread.java:1583) diff --git a/test_res/test_logs/stacktraces/log_stacktrace_generated_2.txt b/test_res/test_logs/stacktraces/log_stacktrace_generated_2.txt new file mode 100644 index 00000000..a510c474 --- /dev/null +++ b/test_res/test_logs/stacktraces/log_stacktrace_generated_2.txt @@ -0,0 +1,6 @@ +Message 1 + Message 2 + Message 3 'prod_en' /src/prod/results.html + java.lang.NoClassDefFoundError + de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator$1@31ca8ab4 + ca.canadiantire.steps.hybris.ws.WebserviceHybrisCustomerAndCartSteps$$EnhancerByCGLIB$$84837ae7.CGLIB$add_products_to_cart$16() diff --git a/test_res/test_logs/stacktraces/log_stacktrace_generated_3.txt b/test_res/test_logs/stacktraces/log_stacktrace_generated_3.txt new file mode 100644 index 00000000..ccc089bc --- /dev/null +++ b/test_res/test_logs/stacktraces/log_stacktrace_generated_3.txt @@ -0,0 +1,4 @@ +Message 1 + java.lang.reflect.Method.invoke(Method.java:498) + message error caused by exception + ... 34 more diff --git a/test_res/test_logs/stacktraces/log_stacktrace_js.txt b/test_res/test_logs/stacktraces/log_stacktrace_js.txt new file mode 100644 index 00000000..ce835d52 --- /dev/null +++ b/test_res/test_logs/stacktraces/log_stacktrace_js.txt @@ -0,0 +1,11 @@ +!!!MARKDOWN_MODE!!! +And eventually "Result title in #1 of Search result list items" should be visible +``` +AssertionError: '(//*//*[contains(concat(' ', normalize-space(./@class), ' '), ' srl-results-container ')]//*[./@data-testid = 'srl-item'])[position()=1]//*[./@data-testid = 'srl-item-title']' is not visible: expected false to deeply equal true + Error: AssertionError: '(//*//*[contains(concat(' ', normalize-space(./@class), ' '), ' srl-results-container ')]//*[./@data-testid = 'srl-item'])[position()=1]//*[./@data-testid = 'srl-item-title']' is not visible: expected false to deeply equal true + at Object.handleEventually (Z:\xml-data\build-dir\PROJECT-01\prj-e2e-acc\node_modules\@pr\aura-accelerate\lib\step_definitions\common.js:42:11) + at async ElementsHelper.seeVisibleElement (Z:\xml-data\build-dir\PROJECT-01\prj-e2e-acc\node_modules\@pr\aura-accelerate\lib\helpers\elementsHelper.js:38:13) + at async Z:\xml-data\build-dir\PROJECT-01\prj-e2e-acc\node_modules\@pr\aura-accelerate\lib\step_definitions\then.js:42:9 + at async runSteps (Z:\xml-data\build-dir\PROJECT-01\prj-e2e-acc\node_modules\codeceptjs\lib\interfaces\gherkin.js:77:9) +i Current URL: https://alb-1-2-3-ab-12345-treex.pint.prj-abcd-v2-dev.example.cloud/results/20209492515162 +``` diff --git a/test_res/test_logs/stacktraces/log_stacktrace_js_exception_message_no_params_and_brackets.txt b/test_res/test_logs/stacktraces/log_stacktrace_js_exception_message_no_params_and_brackets.txt new file mode 100644 index 00000000..56f7ac10 --- /dev/null +++ b/test_res/test_logs/stacktraces/log_stacktrace_js_exception_message_no_params_and_brackets.txt @@ -0,0 +1,4 @@ +And eventually Result title in SPECIALNUMBER of Search result list items should be visible TEXTDELIMITER +AssertionError contains concat normalize-space class srl-results-container data-testid srl-item position SPECIALNUMBER data-testid srl-item-title is not visible expected false to deeply equal true +Error AssertionError contains concat normalize-space class srl-results-container data-testid srl-item position SPECIALNUMBER data-testid srl-item-title is not visible expected false to deeply equal true +i Current URL https alb-SPECIALNUMBER-SPECIALNUMBER-SPECIALNUMBER-ab-SPECIALNUMBER-treex pint prj-abcd-v-dev example cloud results SPECIALNUMBER TEXTDELIMITER diff --git a/test_res/test_logs/stacktraces/log_stacktrace_js_prepared.txt b/test_res/test_logs/stacktraces/log_stacktrace_js_prepared.txt new file mode 100644 index 00000000..85039641 --- /dev/null +++ b/test_res/test_logs/stacktraces/log_stacktrace_js_prepared.txt @@ -0,0 +1,8 @@ +And eventually "Result title in #1 of Search result list items" should be visible TEXTDELIMITER +AssertionError: '(//*//*[contains(concat(' ', normalize-space(./@class), ' '), ' srl-results-container ')]//*[./@data-testid = 'srl-item'])[position()=1]//*[./@data-testid = 'srl-item-title']' is not visible: expected false to deeply equal true + Error: AssertionError: '(//*//*[contains(concat(' ', normalize-space(./@class), ' '), ' srl-results-container ')]//*[./@data-testid = 'srl-item'])[position()=1]//*[./@data-testid = 'srl-item-title']' is not visible: expected false to deeply equal true + at Object.handleEventually (Z:\xml-data\build-dir\PROJECT-01\prj-e2e-acc\node_modules\@pr\aura-accelerate\lib\step_definitions\common.js:42:11) + at async ElementsHelper.seeVisibleElement (Z:\xml-data\build-dir\PROJECT-01\prj-e2e-acc\node_modules\@pr\aura-accelerate\lib\helpers\elementsHelper.js:38:13) + at async Z:\xml-data\build-dir\PROJECT-01\prj-e2e-acc\node_modules\@pr\aura-accelerate\lib\step_definitions\then.js:42:9 + at async runSteps (Z:\xml-data\build-dir\PROJECT-01\prj-e2e-acc\node_modules\codeceptjs\lib\interfaces\gherkin.js:77:9) +i Current URL: https://alb-1-2-3-ab-12345-treex.pint.prj-abcd-v2-dev.example.cloud/results/20209492515162 TEXTDELIMITER diff --git a/test_res/test_logs/stacktraces/log_stacktrace_prepared.txt b/test_res/test_logs/stacktraces/log_stacktrace_prepared.txt new file mode 100644 index 00000000..e7938b2d --- /dev/null +++ b/test_res/test_logs/stacktraces/log_stacktrace_prepared.txt @@ -0,0 +1,36 @@ +com.epam.ta.reportportal.exception.ReportPortalException: Impossible interact with integration. There are no analyzer services are deployed. + at com.epam.ta.reportportal.commons.validation.ErrorTypeBasedRuleValidator.verify(ErrorTypeBasedRuleValidator.java:32) + at com.epam.ta.reportportal.core.launch.cluster.pipeline.data.AnalyzerClusterDataProvider.provide(AnalyzerClusterDataProvider.java:47) + at com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.lambda.provide.0(SaveClusterDataPartProvider.java:44) + at java.base/java.util.Optional.flatMap(Optional.java:289) + at com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.provide(SaveClusterDataPartProvider.java:44) + at com.epam.ta.reportportal.core.launch.cluster.pipeline.SaveClusterDataPartProvider.provide(SaveClusterDataPartProvider.java:30) + at com.epam.ta.reportportal.pipeline.PipelineConstructor.lambda.construct.0(PipelineConstructor.java:34) + at java.base/java.util.stream.ReferencePipeline.3.1.accept(ReferencePipeline.java:197) + at java.base/java.util.AbstractList.RandomAccessSpliterator.forEachRemaining(AbstractList.java:722) + at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509) + at java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499) + at java.base/java.util.stream.ReduceOps.ReduceOp.evaluateSequential(ReduceOps.java:921) + at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) + at java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:682) + at com.epam.ta.reportportal.pipeline.PipelineConstructor.construct(PipelineConstructor.java:34) + at com.epam.ta.reportportal.core.launch.cluster.UniqueErrorGenerator.generateClusters(UniqueErrorGenerator.java:78) + at com.epam.ta.reportportal.core.launch.cluster.UniqueErrorGenerator.generate(UniqueErrorGenerator.java:60) + at com.epam.ta.reportportal.core.events.handler.item.TestItemUniqueErrorAnalysisRunner.handle(TestItemUniqueErrorAnalysisRunner.java:66) + at com.epam.ta.reportportal.core.events.handler.item.TestItemUniqueErrorAnalysisRunner.handle(TestItemUniqueErrorAnalysisRunner.java:37) + at com.epam.ta.reportportal.core.events.subscriber.impl.delegate.ProjectConfigDelegatingSubscriber.lambda.handleEvent.0(ProjectConfigDelegatingSubscriber.java:44) + at java.base/java.lang.Iterable.forEach(Iterable.java:75) + at com.epam.ta.reportportal.core.events.subscriber.impl.delegate.ProjectConfigDelegatingSubscriber.handleEvent(ProjectConfigDelegatingSubscriber.java:44) + at com.epam.ta.reportportal.core.events.subscriber.impl.delegate.ProjectConfigDelegatingSubscriber.handleEvent(ProjectConfigDelegatingSubscriber.java:29) + at com.epam.ta.reportportal.core.events.listener.TestItemIssueResolvedEventListener.lambda.onApplicationEvent.0(TestItemIssueResolvedEventListener.java:39) + at java.base/java.lang.Iterable.forEach(Iterable.java:75) + at com.epam.ta.reportportal.core.events.listener.TestItemIssueResolvedEventListener.onApplicationEvent(TestItemIssueResolvedEventListener.java:39) + at org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:218) + at org.springframework.aop.framework.CglibAopProxy.CglibMethodInvocation.invokeJoinpoint(CglibAopProxy.java:793) + at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:163) + at org.springframework.aop.framework.CglibAopProxy.CglibMethodInvocation.proceed(CglibAopProxy.java:763) + at org.springframework.aop.interceptor.AsyncExecutionInterceptor.lambda.invoke.0(AsyncExecutionInterceptor.java:115) + at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317) + at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) + at java.base/java.util.concurrent.ThreadPoolExecutor.Worker.run(ThreadPoolExecutor.java:642) + at java.base/java.lang.Thread.run(Thread.java:1583) diff --git a/test_res/test_logs/stacktraces/log_stacktrace_prepared_2.txt b/test_res/test_logs/stacktraces/log_stacktrace_prepared_2.txt new file mode 100644 index 00000000..b3e6d6f8 --- /dev/null +++ b/test_res/test_logs/stacktraces/log_stacktrace_prepared_2.txt @@ -0,0 +1,5 @@ +Message 1 + Message 2 + Message 3 'prod_en' /src/prod/results.html + java.lang.NoClassDefFoundError + de.hybris.platform.servicelayer.interceptor.impl.MandatoryAttributesValidator.1 diff --git a/test_res/test_logs/stacktraces/log_stacktrace_prepared_3.txt b/test_res/test_logs/stacktraces/log_stacktrace_prepared_3.txt new file mode 100644 index 00000000..c7a4b07d --- /dev/null +++ b/test_res/test_logs/stacktraces/log_stacktrace_prepared_3.txt @@ -0,0 +1,3 @@ +Message 1 + java.lang.reflect.Method.invoke(Method.java:498) + message error caused by exception diff --git a/test_res/test_logs/stacktraces/webdriver_selenide_stacktrace.txt b/test_res/test_logs/stacktraces/webdriver_selenide_stacktrace.txt new file mode 100644 index 00000000..7271fa36 --- /dev/null +++ b/test_res/test_logs/stacktraces/webdriver_selenide_stacktrace.txt @@ -0,0 +1,51 @@ +Element should be clickable: interactable and enabled {button.btn-create} +Element: '' +Actual value: disabled +Screenshot: file:/home/jenkins/workspace/Project/Test-Framework/build/reports/tests/1711509358022.38.png +Page source: file:/home/jenkins/workspace/Project/Test-Framework/build/reports/tests/1711509358022.38.html +Timeout: 4 s. + at com.codeborne.selenide.impl.WebElementSource.handleError(WebElementSource.java:149) + at com.codeborne.selenide.impl.WebElementSource.checkConditionAndReturnElement(WebElementSource.java:127) + at com.codeborne.selenide.impl.WebElementSource.findAndAssertElementIsClickable(WebElementSource.java:173) + at com.codeborne.selenide.commands.Click.findElement(Click.java:46) + at com.codeborne.selenide.commands.Click.execute(Click.java:28) + at com.codeborne.selenide.commands.Click.execute(Click.java:21) + at com.codeborne.selenide.commands.Commands.execute(Commands.java:164) + at com.codeborne.selenide.impl.SelenideElementProxy.dispatchAndRetry(SelenideElementProxy.java:132) + at com.codeborne.selenide.impl.SelenideElementProxy.invoke(SelenideElementProxy.java:84) + at com.sun.proxy.$Proxy69.click(Unknown Source) + at example.hit.pages.NewInterviewPage.scheduleInterview(NewInterviewPage.java:519) + at com.example.automation.ui.regression.interview.AdditionalSkillsTest.skillToCheckTest(AdditionalSkillsTest.java:75) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) + at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + at java.base/java.lang.reflect.Method.invoke(Method.java:566) + at org.testng.internal.MethodInvocationHelper.invokeMethod(MethodInvocationHelper.java:133) + at org.testng.internal.TestInvoker.invokeMethod(TestInvoker.java:598) + at org.testng.internal.TestInvoker.invokeTestMethod(TestInvoker.java:173) + at org.testng.internal.MethodRunner.runInSequence(MethodRunner.java:46) + at org.testng.internal.TestInvoker$MethodInvocationAgent.invoke(TestInvoker.java:824) + at org.testng.internal.TestInvoker.invokeTestMethods(TestInvoker.java:146) + at org.testng.internal.TestMethodWorker.invokeTestMethods(TestMethodWorker.java:146) + at org.testng.internal.TestMethodWorker.run(TestMethodWorker.java:128) + at java.base/java.util.ArrayList.forEach(ArrayList.java:1541) + at org.testng.TestRunner.privateRun(TestRunner.java:794) + at org.testng.TestRunner.run(TestRunner.java:596) + at org.testng.SuiteRunner.runTest(SuiteRunner.java:377) + at org.testng.SuiteRunner.runSequentially(SuiteRunner.java:371) + at org.testng.SuiteRunner.privateRun(SuiteRunner.java:332) + at org.testng.SuiteRunner.run(SuiteRunner.java:276) + at org.testng.SuiteRunnerWorker.runSuite(SuiteRunnerWorker.java:53) + at org.testng.SuiteRunnerWorker.run(SuiteRunnerWorker.java:96) + at org.testng.TestNG.runSuitesSequentially(TestNG.java:1212) + at org.testng.TestNG.runSuitesSequentially(TestNG.java:1207) + at org.testng.TestNG.runSuitesLocally(TestNG.java:1134) + at org.testng.TestNG.runSuites(TestNG.java:1063) + at org.testng.TestNG.run(TestNG.java:1031) + at org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:284) + at org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:75) + at org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:119) + at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:428) + at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:162) + at org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:562) + at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:548) diff --git a/test_res/test_logs/stacktraces/webdriver_selenide_stacktrace_no_webdriver.txt b/test_res/test_logs/stacktraces/webdriver_selenide_stacktrace_no_webdriver.txt new file mode 100644 index 00000000..8d650ba4 --- /dev/null +++ b/test_res/test_logs/stacktraces/webdriver_selenide_stacktrace_no_webdriver.txt @@ -0,0 +1,49 @@ +Element should be clickable: interactable and enabled {button.btn-create} +Element: '' +Actual value: disabled +Timeout: 4 s. + at com.codeborne.selenide.impl.WebElementSource.handleError(WebElementSource.java:149) + at com.codeborne.selenide.impl.WebElementSource.checkConditionAndReturnElement(WebElementSource.java:127) + at com.codeborne.selenide.impl.WebElementSource.findAndAssertElementIsClickable(WebElementSource.java:173) + at com.codeborne.selenide.commands.Click.findElement(Click.java:46) + at com.codeborne.selenide.commands.Click.execute(Click.java:28) + at com.codeborne.selenide.commands.Click.execute(Click.java:21) + at com.codeborne.selenide.commands.Commands.execute(Commands.java:164) + at com.codeborne.selenide.impl.SelenideElementProxy.dispatchAndRetry(SelenideElementProxy.java:132) + at com.codeborne.selenide.impl.SelenideElementProxy.invoke(SelenideElementProxy.java:84) + at com.sun.proxy.$Proxy69.click(Unknown Source) + at example.hit.pages.NewInterviewPage.scheduleInterview(NewInterviewPage.java:519) + at com.example.automation.ui.regression.interview.AdditionalSkillsTest.skillToCheckTest(AdditionalSkillsTest.java:75) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) + at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + at java.base/java.lang.reflect.Method.invoke(Method.java:566) + at org.testng.internal.MethodInvocationHelper.invokeMethod(MethodInvocationHelper.java:133) + at org.testng.internal.TestInvoker.invokeMethod(TestInvoker.java:598) + at org.testng.internal.TestInvoker.invokeTestMethod(TestInvoker.java:173) + at org.testng.internal.MethodRunner.runInSequence(MethodRunner.java:46) + at org.testng.internal.TestInvoker$MethodInvocationAgent.invoke(TestInvoker.java:824) + at org.testng.internal.TestInvoker.invokeTestMethods(TestInvoker.java:146) + at org.testng.internal.TestMethodWorker.invokeTestMethods(TestMethodWorker.java:146) + at org.testng.internal.TestMethodWorker.run(TestMethodWorker.java:128) + at java.base/java.util.ArrayList.forEach(ArrayList.java:1541) + at org.testng.TestRunner.privateRun(TestRunner.java:794) + at org.testng.TestRunner.run(TestRunner.java:596) + at org.testng.SuiteRunner.runTest(SuiteRunner.java:377) + at org.testng.SuiteRunner.runSequentially(SuiteRunner.java:371) + at org.testng.SuiteRunner.privateRun(SuiteRunner.java:332) + at org.testng.SuiteRunner.run(SuiteRunner.java:276) + at org.testng.SuiteRunnerWorker.runSuite(SuiteRunnerWorker.java:53) + at org.testng.SuiteRunnerWorker.run(SuiteRunnerWorker.java:96) + at org.testng.TestNG.runSuitesSequentially(TestNG.java:1212) + at org.testng.TestNG.runSuitesSequentially(TestNG.java:1207) + at org.testng.TestNG.runSuitesLocally(TestNG.java:1134) + at org.testng.TestNG.runSuites(TestNG.java:1063) + at org.testng.TestNG.run(TestNG.java:1031) + at org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:284) + at org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:75) + at org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:119) + at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:428) + at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:162) + at org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:562) + at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:548) diff --git a/test_res/test_logs/stacktraces/webdriver_selenide_stacktrace_prepared.txt b/test_res/test_logs/stacktraces/webdriver_selenide_stacktrace_prepared.txt new file mode 100644 index 00000000..462a8653 --- /dev/null +++ b/test_res/test_logs/stacktraces/webdriver_selenide_stacktrace_prepared.txt @@ -0,0 +1,49 @@ +Element should be clickable: interactable and enabled {button.btn-create} +Element: '' +Actual value: disabled +Timeout: 4 s. +at com.codeborne.selenide.impl.WebElementSource.handleError(WebElementSource.java:149) +at com.codeborne.selenide.impl.WebElementSource.checkConditionAndReturnElement(WebElementSource.java:127) +at com.codeborne.selenide.impl.WebElementSource.findAndAssertElementIsClickable(WebElementSource.java:173) +at com.codeborne.selenide.commands.Click.findElement(Click.java:46) +at com.codeborne.selenide.commands.Click.execute(Click.java:28) +at com.codeborne.selenide.commands.Click.execute(Click.java:21) +at com.codeborne.selenide.commands.Commands.execute(Commands.java:164) +at com.codeborne.selenide.impl.SelenideElementProxy.dispatchAndRetry(SelenideElementProxy.java:132) +at com.codeborne.selenide.impl.SelenideElementProxy.invoke(SelenideElementProxy.java:84) +at com.sun.proxy.$Proxy69.click(Unknown Source) +at example.hit.pages.NewInterviewPage.scheduleInterview(NewInterviewPage.java:519) +at com.example.automation.ui.regression.interview.AdditionalSkillsTest.skillToCheckTest(AdditionalSkillsTest.java:75) +at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) +at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) +at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) +at java.base/java.lang.reflect.Method.invoke(Method.java:566) +at org.testng.internal.MethodInvocationHelper.invokeMethod(MethodInvocationHelper.java:133) +at org.testng.internal.TestInvoker.invokeMethod(TestInvoker.java:598) +at org.testng.internal.TestInvoker.invokeTestMethod(TestInvoker.java:173) +at org.testng.internal.MethodRunner.runInSequence(MethodRunner.java:46) +at org.testng.internal.TestInvoker.MethodInvocationAgent.invoke(TestInvoker.java:824) +at org.testng.internal.TestInvoker.invokeTestMethods(TestInvoker.java:146) +at org.testng.internal.TestMethodWorker.invokeTestMethods(TestMethodWorker.java:146) +at org.testng.internal.TestMethodWorker.run(TestMethodWorker.java:128) +at java.base/java.util.ArrayList.forEach(ArrayList.java:1541) +at org.testng.TestRunner.privateRun(TestRunner.java:794) +at org.testng.TestRunner.run(TestRunner.java:596) +at org.testng.SuiteRunner.runTest(SuiteRunner.java:377) +at org.testng.SuiteRunner.runSequentially(SuiteRunner.java:371) +at org.testng.SuiteRunner.privateRun(SuiteRunner.java:332) +at org.testng.SuiteRunner.run(SuiteRunner.java:276) +at org.testng.SuiteRunnerWorker.runSuite(SuiteRunnerWorker.java:53) +at org.testng.SuiteRunnerWorker.run(SuiteRunnerWorker.java:96) +at org.testng.TestNG.runSuitesSequentially(TestNG.java:1212) +at org.testng.TestNG.runSuitesSequentially(TestNG.java:1207) +at org.testng.TestNG.runSuitesLocally(TestNG.java:1134) +at org.testng.TestNG.runSuites(TestNG.java:1063) +at org.testng.TestNG.run(TestNG.java:1031) +at org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:284) +at org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:75) +at org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:119) +at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:428) +at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:162) +at org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:562) +at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:548) diff --git a/test_res/test_logs/webdriver/webdriver_exception_info.txt b/test_res/test_logs/webdriver/webdriver_exception_info.txt new file mode 100644 index 00000000..8414f188 --- /dev/null +++ b/test_res/test_logs/webdriver/webdriver_exception_info.txt @@ -0,0 +1,38 @@ +org.openqa.selenium.TimeoutException: Expected condition failed: waiting for visibility of element located by By.xpath: //*[contains(@class,'nav-bar_menu-items') and contains(text(),'Blog')] (tried for 20 second(s) with 500 milliseconds interval) +Build info: version: '4.2.1', revision: 'ac4d0fdd4a' +System info: host: 'AATESTMACHINE1', ip: '192.168.0.110', os.name: 'Windows 10', os.arch: 'amd64', os.version: '10.0', java.version: '17.0.9' +Driver info: org.openqa.selenium.remote.RemoteWebDriver +Capabilities {acceptInsecureCerts: false, browserName: chrome, browserVersion: 123.0.6312.86, chrome: {chromedriverVersion: 122.0.6261.94 (880dbf29479c..., userDataDir: C:\Users\DZMITR~1\AppData\L...}, fedcm:accounts: true, goog:chromeOptions: {debuggerAddress: localhost:59148}, networkConnectionEnabled: false, pageLoadStrategy: normal, platformName: Windows 10, proxy: Proxy(), se:bidiEnabled: false, se:cdp: ws://192.168.0.110:4445/ses..., se:cdpVersion: 123.0.6312.86, setWindowRect: true, strictFileInteractability: false, timeouts: {implicit: 0, pageLoad: 300000, script: 30000}, unhandledPromptBehavior: dismiss and notify, webauthn:extension:credBlob: true, webauthn:extension:largeBlob: true, webauthn:extension:minPinLength: true, webauthn:extension:prf: true, webauthn:virtualAuthenticators: true} +Session ID: 14d0e78a93616c2f1711f75213960e93 + at org.openqa.selenium.support.ui.WebDriverWait.timeoutException(WebDriverWait.java:87) + at org.openqa.selenium.support.ui.FluentWait.until(FluentWait.java:231) + at com.example.framework.ui.element.Element.waitForVisibility(Element.java:585) + at com.example.framework.ui.element.Element.waitForVisibility(Element.java:613) + at com.example.screens.HeaderScreen.waitLinksToRedirectOnOtherSectionsDisplayed(HeaderScreen.java:218) + at com.example.services.LoginService.loginByUserCredentials(LoginService.java:66) + at com.example.services.LoginService.loginAndSetLanguage(LoginService.java:26) + at com.example.services.LoginService.loginAndGoToProgramsManagementScreen(LoginService.java:40) + at com.example.smoke.traininglist.VerifyThatTimeLimitedTrainingsDisplayInTrainingListOnlyIfTheyHaveStartDateInFuture.setStartDateOfExistingTrainingsAsTrainingManager(VerifyThatTimeLimitedTrainingsDisplayInTrainingListOnlyIfTheyHaveStartDateInFuture.java:86) + at com.example.smoke.traininglist.VerifyThatTimeLimitedTrainingsDisplayInTrainingListOnlyIfTheyHaveStartDateInFuture.checkPresenceOfTrainingsDependingOnStartDates(VerifyThatTimeLimitedTrainingsDisplayInTrainingListOnlyIfTheyHaveStartDateInFuture.java:49) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) + at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) + at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + at java.base/java.lang.reflect.Method.invoke(Method.java:568) + at org.testng.internal.invokers.MethodInvocationHelper.invokeMethod(MethodInvocationHelper.java:139) + at org.testng.internal.invokers.TestInvoker.invokeMethod(TestInvoker.java:677) + at org.testng.internal.invokers.TestInvoker.invokeTestMethod(TestInvoker.java:221) + at org.testng.internal.invokers.MethodRunner.runInSequence(MethodRunner.java:50) + at org.testng.internal.invokers.TestInvoker$MethodInvocationAgent.invoke(TestInvoker.java:962) + at org.testng.internal.invokers.TestInvoker.invokeTestMethods(TestInvoker.java:194) + at org.testng.internal.invokers.TestMethodWorker.invokeTestMethods(TestMethodWorker.java:148) + at org.testng.internal.invokers.TestMethodWorker.run(TestMethodWorker.java:128) + at java.base/java.util.ArrayList.forEach(ArrayList.java:1511) + at org.testng.TestRunner.privateRun(TestRunner.java:806) + at org.testng.TestRunner.run(TestRunner.java:601) + at org.testng.SuiteRunner.runTest(SuiteRunner.java:433) + at org.testng.SuiteRunner$SuiteWorker.run(SuiteRunner.java:471) + at org.testng.internal.thread.ThreadUtil.lambda$execute$0(ThreadUtil.java:58) + at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) + at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) + at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) + at java.base/java.lang.Thread.run(Thread.java:842) diff --git a/test_res/test_logs/webdriver/webdriver_exception_info_prepared.txt b/test_res/test_logs/webdriver/webdriver_exception_info_prepared.txt new file mode 100644 index 00000000..49106490 --- /dev/null +++ b/test_res/test_logs/webdriver/webdriver_exception_info_prepared.txt @@ -0,0 +1,34 @@ +org.openqa.selenium.TimeoutException: Expected condition failed: waiting for visibility of element located by By.xpath: //*[contains(@class,'nav-bar_menu-items') and contains(text(),'Blog')] (tried for 20 second(s) with 500 milliseconds interval) +Session ID: SPECIALUUID +at org.openqa.selenium.support.ui.WebDriverWait.timeoutException(WebDriverWait.java:87) +at org.openqa.selenium.support.ui.FluentWait.until(FluentWait.java:231) +at com.example.framework.ui.element.Element.waitForVisibility(Element.java:585) +at com.example.framework.ui.element.Element.waitForVisibility(Element.java:613) +at com.example.screens.HeaderScreen.waitLinksToRedirectOnOtherSectionsDisplayed(HeaderScreen.java:218) +at com.example.services.LoginService.loginByUserCredentials(LoginService.java:66) +at com.example.services.LoginService.loginAndSetLanguage(LoginService.java:26) +at com.example.services.LoginService.loginAndGoToProgramsManagementScreen(LoginService.java:40) +at com.example.smoke.traininglist.VerifyThatTimeLimitedTrainingsDisplayInTrainingListOnlyIfTheyHaveStartDateInFuture.setStartDateOfExistingTrainingsAsTrainingManager(VerifyThatTimeLimitedTrainingsDisplayInTrainingListOnlyIfTheyHaveStartDateInFuture.java:86) +at com.example.smoke.traininglist.VerifyThatTimeLimitedTrainingsDisplayInTrainingListOnlyIfTheyHaveStartDateInFuture.checkPresenceOfTrainingsDependingOnStartDates(VerifyThatTimeLimitedTrainingsDisplayInTrainingListOnlyIfTheyHaveStartDateInFuture.java:49) +at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) +at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) +at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) +at java.base/java.lang.reflect.Method.invoke(Method.java:568) +at org.testng.internal.invokers.MethodInvocationHelper.invokeMethod(MethodInvocationHelper.java:139) +at org.testng.internal.invokers.TestInvoker.invokeMethod(TestInvoker.java:677) +at org.testng.internal.invokers.TestInvoker.invokeTestMethod(TestInvoker.java:221) +at org.testng.internal.invokers.MethodRunner.runInSequence(MethodRunner.java:50) +at org.testng.internal.invokers.TestInvoker.MethodInvocationAgent.invoke(TestInvoker.java:962) +at org.testng.internal.invokers.TestInvoker.invokeTestMethods(TestInvoker.java:194) +at org.testng.internal.invokers.TestMethodWorker.invokeTestMethods(TestMethodWorker.java:148) +at org.testng.internal.invokers.TestMethodWorker.run(TestMethodWorker.java:128) +at java.base/java.util.ArrayList.forEach(ArrayList.java:1511) +at org.testng.TestRunner.privateRun(TestRunner.java:806) +at org.testng.TestRunner.run(TestRunner.java:601) +at org.testng.SuiteRunner.runTest(SuiteRunner.java:433) +at org.testng.SuiteRunner.SuiteWorker.run(SuiteRunner.java:471) +at org.testng.internal.thread.ThreadUtil.lambda.execute.0(ThreadUtil.java:58) +at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) +at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) +at java.base/java.util.concurrent.ThreadPoolExecutor.Worker.run(ThreadPoolExecutor.java:635) +at java.base/java.lang.Thread.run(Thread.java:842) diff --git a/test_res/test_logs/webdriver/webdriver_oneliners.txt b/test_res/test_logs/webdriver/webdriver_oneliners.txt new file mode 100644 index 00000000..1ce79ac7 --- /dev/null +++ b/test_res/test_logs/webdriver/webdriver_oneliners.txt @@ -0,0 +1 @@ + -> Webdriver screenshot captured: screenshot14136914040349157708.png diff --git a/test_res/test_logs/webdriver/webdriver_oneliners_prepared.txt b/test_res/test_logs/webdriver/webdriver_oneliners_prepared.txt new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/test_res/test_logs/webdriver/webdriver_oneliners_prepared.txt @@ -0,0 +1 @@ +