diff --git a/dogesec_commons/stixifier/models.py b/dogesec_commons/stixifier/models.py index 8aa7b4c..e60e58f 100644 --- a/dogesec_commons/stixifier/models.py +++ b/dogesec_commons/stixifier/models.py @@ -28,8 +28,6 @@ class Profile(models.Model): created = models.DateTimeField(auto_now_add=True) name = models.CharField(max_length=250, unique=True) extractions = ArrayField(base_field=models.CharField(max_length=256)) - whitelists = ArrayField(base_field=models.CharField(max_length=256), default=list) - aliases = ArrayField(base_field=models.CharField(max_length=256), default=list) relationship_mode = models.CharField(choices=RelationshipMode.choices, max_length=20, default=RelationshipMode.STANDARD) extract_text_from_image = models.BooleanField(default=False) defang = models.BooleanField(help_text='If the text should be defanged before processing') diff --git a/dogesec_commons/stixifier/serializers.py b/dogesec_commons/stixifier/serializers.py index 618b854..f717ed9 100644 --- a/dogesec_commons/stixifier/serializers.py +++ b/dogesec_commons/stixifier/serializers.py @@ -71,16 +71,6 @@ class ProfileSerializer(serializers.ModelSerializer): child=serializers.CharField(max_length=256, validators=[partial(validate_extractor, 'extractor', ["ai", "pattern", "lookup"])]), help_text="extraction id(s)", ) - aliases = serializers.ListField( - child=serializers.CharField(max_length=256, validators=[partial(validate_extractor, 'alias', ["alias"])]), - help_text="alias id(s)", - required=False, - ) - whitelists = serializers.ListField( - child=serializers.CharField(max_length=256, validators=[partial(validate_extractor, 'whitelist', ["whitelist"])]), - help_text="whitelist id(s)", - required=False, - ) class Meta: model = Profile diff --git a/dogesec_commons/stixifier/stixifier.py b/dogesec_commons/stixifier/stixifier.py index b1e1760..f0657a4 100644 --- a/dogesec_commons/stixifier/stixifier.py +++ b/dogesec_commons/stixifier/stixifier.py @@ -88,8 +88,7 @@ def txt2stix(self): extractors_map[extractor.type][extractor.slug] = extractor else: extractors_map[extractor.type] = {extractor.slug: extractor} - aliases = all_extractors(self.profile.aliases) - whitelists = all_extractors(self.profile.whitelists) + bundler = txt2stixBundler( self.report_prop.name, @@ -105,19 +104,17 @@ def txt2stix(self): ) self.extra_data['_stixify_report_id'] = str(bundler.report.id) input_text = txt2stix.remove_data_images(self.output_md) - aliased_input = txt2stix.aliases.transform_all(aliases.values(), input_text) - bundler.whitelisted_values = txt2stix.lookups.merge_whitelists(whitelists.values()) ai_extractors = [txt2stix.parse_model(model_str) for model_str in self.profile.ai_settings_extractions] - txt2stix.validate_token_count(settings.INPUT_TOKEN_LIMIT, aliased_input, ai_extractors) + txt2stix.validate_token_count(settings.INPUT_TOKEN_LIMIT, input_text, ai_extractors) - all_extracts = txt2stix.extract_all(bundler, extractors_map, aliased_input, ai_extractors=ai_extractors) + all_extracts = txt2stix.extract_all(bundler, extractors_map, input_text, ai_extractors=ai_extractors) if self.profile.relationship_mode == models.RelationshipMode.AI and sum(map(lambda x: len(x), all_extracts.values())): ai_ref_extractor = txt2stix.parse_model(self.profile.ai_settings_relationships) - txt2stix.validate_token_count(settings.INPUT_TOKEN_LIMIT, aliased_input, [ai_ref_extractor]) - txt2stix.extract_relationships_with_ai(bundler, aliased_input, all_extracts, ai_ref_extractor) + txt2stix.validate_token_count(settings.INPUT_TOKEN_LIMIT, input_text, [ai_ref_extractor]) + txt2stix.extract_relationships_with_ai(bundler, input_text, all_extracts, ai_ref_extractor) return bundler diff --git a/dogesec_commons/stixifier/views.py b/dogesec_commons/stixifier/views.py index bf3518c..ed66693 100644 --- a/dogesec_commons/stixifier/views.py +++ b/dogesec_commons/stixifier/views.py @@ -18,7 +18,7 @@ @extend_schema_view( list=extend_schema( summary="Search profiles", - description="Profiles determine how txt2stix processes the text in each File. A profile consists of an extractors, aliases, and/or whitelists. You can search for existing profiles here.", + description="Profiles determine how txt2stix processes the text in each File. A profile consists of extractors. You can search for existing profiles here.", responses={400: DEFAULT_400_ERROR, 200: ProfileSerializer}, ), retrieve=extend_schema( @@ -30,12 +30,10 @@ summary="Create a new profile", description=textwrap.dedent( """ - Add a new Profile that can be applied to new Files. A profile consists of extractors, aliases, and/or whitelists. You can find available extractors, aliases, and whitelists via their respective endpoints.\n\n + Add a new Profile that can be applied to new Files. A profile consists of extractors. You can find available extractors via their respective endpoints.\n\n The following key/values are accepted in the body of the request:\n\n * `name` (required - must be unique) * `extractions` (required - at least one extraction ID): can be obtained from the GET Extractors endpoint. This is a [txt2stix](https://github.com/muchdogesec/txt2stix/) setting. - * `whitelists` (optional): can be obtained from the GET Whitelists endpoint. This is a [txt2stix](https://github.com/muchdogesec/txt2stix/) setting. - * `aliases` (optional): can be obtained from the GET Whitelists endpoint. This is a [txt2stix](https://github.com/muchdogesec/txt2stix/) setting. * `relationship_mode` (required): either `ai` or `standard`. Required AI provider to be configured if using `ai` mode. This is a [txt2stix](https://github.com/muchdogesec/txt2stix/) setting. * `ai_settings_extractions` (required if AI extraction used): A list of AI providers and models to be used for extraction in format `["provider:model","provider:model"]` e.g. `["openai:gpt-4o"]`. * `ai_settings_relationships` (required if AI relationship used): An AI provider and models to be used for relationship generation in format `"provider:model"` e.g. `"openai:gpt-4o"`. @@ -143,66 +141,4 @@ class ExtractorsView(txt2stixView): pagination_class = Pagination("extractors") def get_all(self): - return self.all_extractors(["lookup", "pattern", "ai"]) - -@extend_schema_view( - list=extend_schema( - summary="Search for Whitelists", - description=textwrap.dedent( - """ - In many cases files will have IoC extractions that are not malicious. e.g. `google.com` (and thus they don't want them to be extracted). Whitelists provide a list of values to be compared to extractions. If a whitelist value matches an extraction, that extraction is removed. To see the values used in this Whitelist, visit the URL shown as the value for the `file` key.\n\n - For more information see [txt2stix](https://github.com/muchdogesec/txt2stix/). - """ - ), - responses={400: DEFAULT_400_ERROR, 200: Txt2stixExtractorSerializer}, - ), - retrieve=extend_schema( - summary="Get a whitelist", - description="Get a specific Whitelist. To see the values used in this Whitelist, visit the URL shown as the value for the `file` key", - responses={400: DEFAULT_400_ERROR, 404: DEFAULT_404_ERROR, 200: Txt2stixExtractorSerializer}, - ), -) -class WhitelistsView(txt2stixView): - lookup_url_kwarg = "whitelist_id" - openapi_path_params = [ - OpenApiParameter( - lookup_url_kwarg, location=OpenApiParameter.PATH, type=OpenApiTypes.UUID, description="The `id` of the Whitelist." - ) - ] - openapi_tags = ["Whitelists"] - pagination_class = Pagination("whitelists") - - def get_all(self): - return self.all_extractors(["whitelist"]) - -@extend_schema_view( - list=extend_schema( - summary="Search for aliases", - description=textwrap.dedent( - """ - Aliases replace strings in the blog post with values defined in the Alias. Aliases are applied before extractions. For example, an alias of `USA` with a value `United States` will change all records of `USA` in the blog post with `United States`. To see the values used in this Alias, visit the URL shown as the value for the `file` key\n\n - For more information see [txt2stix](https://github.com/muchdogesec/txt2stix/). - """ - ), - responses={400: DEFAULT_400_ERROR, 200: Txt2stixExtractorSerializer}, - ), - retrieve=extend_schema( - summary="Get an Alias", - description="Get a specific Alias. To see the values used in this Alias, visit the URL shown as the value for the `file` key", - responses={400: DEFAULT_400_ERROR, 404: DEFAULT_404_ERROR, 200: Txt2stixExtractorSerializer}, - ), -) -class AliasesView(txt2stixView): - openapi_tags = ["Aliases"] - pagination_class = Pagination("aliases") - - lookup_url_kwarg = "alias_id" - - openapi_path_params = [ - OpenApiParameter( - lookup_url_kwarg, location=OpenApiParameter.PATH, type=OpenApiTypes.UUID, description="The `id` of the Alias." - ) - ] - - def get_all(self): - return self.all_extractors(["alias"]) + return self.all_extractors(["lookup", "pattern", "ai"]) \ No newline at end of file