From b79af8aeac672ae7780e87ddbdca9011685969be Mon Sep 17 00:00:00 2001 From: David G Date: Mon, 3 Jun 2024 08:21:43 +0100 Subject: [PATCH] initial commit --- .gitignore | 180 ++++++++++++++++++++++++ LICENSE | 202 +++++++++++++++++++++++++++ README.md | 334 ++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 17 +++ sigma2stix.py | 21 +++ src/__init__.py | 0 src/config.py | 21 +++ src/parser.py | 130 +++++++++++++++++ src/sigma2stix.py | 65 +++++++++ src/utils.py | 124 ++++++++++++++++ tests/demo_rule.yml | 32 +++++ 11 files changed, 1126 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 requirements.txt create mode 100644 sigma2stix.py create mode 100644 src/__init__.py create mode 100644 src/config.py create mode 100644 src/parser.py create mode 100644 src/sigma2stix.py create mode 100644 src/utils.py create mode 100644 tests/demo_rule.yml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..69b8e6e --- /dev/null +++ b/.gitignore @@ -0,0 +1,180 @@ +# Adapted from https://github.com/github/gitignore/blob/main/Python.gitignore + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +# VSCode settings (e.g. .vscode/settings.json containing personal preferred path to venv) +.vscode/ + +# macOS auto-generated file +.DS_Store +data/raw_json/ +data/raw_xml/ + +# venv in readme + +sigma2stix-venv/ + +# never post objects to the repo + +stix2_objects/ +data/ +bundle_store/ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f4a61da --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 DOGESEC (https://www.dogesec.com/) + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..5b1dcca --- /dev/null +++ b/README.md @@ -0,0 +1,334 @@ +# sigma2stix + +A command line tool that converts Sigma Rules into STIX 2.1 Objects + +## Before you get started + +If you do not want to backfill, maintain, or support your own Sigma STIX objects check out CTI Butler which provides a fully manage database of these objects and more! + +https://www.ctibutler.com/ + +## Overview + +> Sigma is a generic and open signature format that allows you to describe relevant log events in a straightforward manner. The rule format is very flexible, easy to write and applicable to any type of log file. + +[SigmaHQ/sigma](https://github.com/SigmaHQ/sigma) + +Sigma Rules are written in a YAML format, and distributed as YAML files. + +The public rules (approved by the Sigma team) are stored in the main Sigma repository, nested in the `rules*` directories, e.g. + +`rules-emerging-threats/2023/Exploits/CVE-2023-20198/cisco_syslog_cve_2023_20198_ios_xe_web_ui.yml` + +https://github.com/SigmaHQ/sigma/blob/master/rules-emerging-threats/2023/Exploits/CVE-2023-20198/cisco_syslog_cve_2023_20198_ios_xe_web_ui.yml + +Here at Signals Corps, most of the data we deal with is in STIX 2.1 format. This is because downstream threat intelligence tools understand STIX. + +Therefore sigma2stix works by converting Sigma Rules to STIX 2.1 objects. + +sigma2stix provides two modes: + +1. downloads the latest rules from the [SigmaHQ/sigma repository](https://github.com/SigmaHQ/sigma) and converts each rule into a range of STIX objects +2. accepts a Sigma rule in a YAML file and converts to a STIX indicator object + +## Installing the script + +To install sigma2stix; + +```shell +# clone the latest code +git clone https://github.com/muchdogesec/sigma2stix +# create a venv +cd sigma2stix +python3 -m venv sigma2stix-venv +source sigma2stix-venv/bin/activate +# install requirements +pip3 install -r requirements.txt +``` + +## Running the script + +### Mode 1: SigmaHQ/sigma repository -> STIX + +```shell +python3 sigma2stix.py \ + --mode sigmahq \ + --sigma_version_tag XXXX +``` + +Where; + +* `mode` (required): should always be `sigmahq` if you want to download the latest rules from the [SigmaHQ/sigma repository](https://github.com/SigmaHQ/sigma) +* `sigma_version_tag` (optional): is the name of the tag in the SigmaHQ/sigma repository ([tags listed here](https://github.com/SigmaHQ/sigma/releases)), e.g. `r2023-08-24`. If no value passed, the master branch will be cloned. + +Note this script only supports Sigma Rule version tags in the format `rYYYY-MM-DD`. + +On each run all objects will be regenerated in the `stix2_objects` directory + +#### Example 1.1: Download latest (master) + +```shell +python3 sigma2stix.py \ + --mode sigmahq +``` + +#### Example 1.2: Download specific version + +```shell +python3 sigma2stix.py \ + --mode sigmahq \ + --sigma_version_tag r2023-08-24 +``` + +### Mode 2: SigmaHQ YAML file -> STIX + +```shell +python3 sigma2stix.py \ + --mode sigmayaml \ + --file PATH/TO/FILE.yaml +``` + +Where; + +* `mode` (required): should always be `sigmayaml` if you want to convert a local YAML file +* `file` (required): is the path to the YAML file containing only the Sigma Rule + +On each run all objects will be regenerated in the `stix2_objects` directory + +#### Example 2.1: Convert a local rule + +```shell +python3 sigma2stix.py \ + --mode sigmayaml \ + --file tests/demo_rule.yml +``` + +## Mapping information + +Here is how sigma2stix maps data to STIX objects from each Sigma Rules YAML. + +Note, the Sigma specification defines the attributes that can be found in the YAML files, and some of the taxonomies used for the properties to populate them. [View the specification here](https://sigmahq.io/sigma-specification/). + +### Marking Definition / Identity + +These are hardcoded and imported from our [stix4doge repository](https://github.com/muchdogesec/stix4doge). Specifically these objects; + +* Marking Definition: https://raw.githubusercontent.com/muchdogesec/stix4doge/main/objects/marking-definition/sigma2stix.json +* Identity: https://raw.githubusercontent.com/muchdogesec/stix4doge/main/objects/identity/sigma2stix.json + +### Indicators + +The core object created to represent a rule. + +```json +{ + "type": "indicator", + "spec_version": "2.1", + "id": "indicator--", + "created_by_ref": "", + "created": "", + "modified": "", + "indicator_types": [ + "malicious-activity", + "anomalous-activity" + ], + "name": "", + "description": ". The following false positives can result from this detection; ", + "pattern": "", + "pattern_type": "sigma", + "valid_from": "", + "external_references": [ + { + "source_name": "sigma-rule", + "url": " / " + }, + { + "source_name": "sigma-rule", + "external_id": "id", + "description": "" + }, + { + "source_name": "sigma-rule", + "external_id": "reference", + "description": "" + }, + { + "source_name": "sigma-rule", + "external_id": "level", + "description": "" + }, + { + "source_name": "sigma-rule", + "external_id": "status", + "description": "" + }, + { + "source_name": "sigma-rule", + "external_id": "author", + "description": "" + }, + { + "source_name": "ATTACK", + "external_id": "tactic", + "description": "" + }, + { + "source_name": "ATTACK", + "external_id": "technique", + "description": "" + }, + { + "source_name": "ATTACK", + "external_id": "technique", + "description": "" + }, + { + "source_name": "sigma-rule", + "external_id": "detection", + "description": "" + }, + { + "source_name": "cve", + "external_id": "", + "description": "https://nvd.nist.gov/vuln/detail/", + } + ], + "object_marking_refs": [ + "marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487", + "" + ] +} +``` + +The UUID part of the rule is generated using the namespaces `860f4c0f-8c26-5889-b39d-ce94368bc416` and the Sigma Rule `id+pattern_type` (from STIX object). + +e.g. namespace = `860f4c0f-8c26-5889-b39d-ce94368bc416` and value = `d9365e39-febd-4a4b-8441-3ca91bb9d333+sigma` = `04f2c0c4-8cc8-5471-a71c-f8b7c0da0c06` = `indicator--04f2c0c4-8cc8-5471-a71c-f8b7c0da0c06` + +### Relationships + +Note, a Sigma Rules can also contain a `related` property, that links it to another rule. For example, + +```yaml +related: + - id: f305fd62-beca-47da-ad95-7690a0620084 + type: similar +``` + +Or an example with two relationships; + +```yaml +related: + - id: 455b9d50-15a1-4b99-853f-8d37655a4c1b + type: similar + - id: 75df3b17-8bcc-4565-b89b-c9898acef911 + type: obsoletes +``` + +Where the `related` property with an `id` and `type` is found in a rule (as noted above, could be many), a STIX relationship object is also created to link the two rules as follows; + +```json +{ + "type": "relationship", + "spec_version": "2.1", + "id": "relationship--", + "created_by_ref": "", + "created": "", + "modified": "", + "relationship_type": "", + "source_ref": "indicator--", + "target_ref": "indicator--", + "object_marking_refs": [ + "marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487", + "" + ] +} +``` + +To generate the id of the SRO, a UUIDv5 is generated using the namespace `860f4c0f-8c26-5889-b39d-ce94368bc416` and `source_ref+target_ref` (e.g, `indicator--0ec4f75f-74c1-4f66-a6d0-f488b20072f1+indicator--4c188857-cbcd-4c76-98e4-90fc2f5a6ddf`) = `13bb8925-d991-551b-bd00-05c45a85df2c` = `relationship--13bb8925-d991-551b-bd00-05c45a85df2c` + +Note, in `--mode sigmayaml`, no relationship objects are created. + +### Grouping + +The path to each rule (directory structure) also holds some significance, and should be represented as STIX 2.1 Grouping objects. + +```json +{ + "type": "grouping", + "spec_version": "2.1", + "id": "grouping--", + "created_by_ref": "", + "created": "", + "modified": "", + "name": "", + "context": "suspicious-activity", + "object_refs": [ + "indicator--", + "indicator--" + ], + "object_marking_refs": [ + "marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487", + "" + ] +} +``` + +To generate the id of the SRO, a UUIDv5 is generated using the namespace `860f4c0f-8c26-5889-b39d-ce94368bc416` and `name` property. e.g. `rules-threat-hunting/windows/file/file_event` = `c973b2fe-8bc9-5c4a-9172-7ec6542600d7` = `grouping--c973b2fe-8bc9-5c4a-9172-7ec6542600d7` + +For example, this directory path holds 3 rules: https://github.com/SigmaHQ/sigma/tree/master/rules-threat-hunting/windows/file/file_event, and thus 3 `object_refs` would exist in the grouping object representing it. + +Note, in `--mode sigmayaml`, no grouping objects are created. + +### Bundle + +sigma2stix also creates a STIX 2.1 Bundle JSON object containing all the other STIX 2.1 Objects created at each run. The Bundle takes the format; + +```json +{ + "type": "bundle", + "id": "bundle--", + "objects": [ + "" + ] +} +``` + +To generate the id of the SRO, a UUIDv5 is generated using the namespace `860f4c0f-8c26-5889-b39d-ce94368bc416` and ``. + +## Backfill old versions + +Here is a quick example of how to create bundles representing different versions of CWEs for comparison; + +```shell +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2023-08-24 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2023-08-24.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2023-10-09 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2023-10-09.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2023-10-23 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2023-10-23.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2023-11-06 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2023-11-06.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2023-11-20 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2023-11-20.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2023-12-04 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2023-12-04.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2023-12-21 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2023-12-21.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2024-01-15 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2024-01-15.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2024-01-29 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2024-01-29.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2024-02-12 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2024-02-12.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2024-02-26 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2024-02-26.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2024-03-11 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2024-03-11.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2024-03-26 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2024-03-26.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2024-04-29 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2024-04-29.json && \ +python3 sigma2stix.py --mode sigmahq --sigma_version_tag r2024-05-13 && mv stix2_objects/sigma-rule-bundle.json bundle_store/sigma-rule-bundle-r2024-05-13.json +``` + +Note, [you can easily download historic Sigma data from our cti_knowledge_base repository so you don't have to run this script](https://github.com/muchdogesec/cti_knowledge_base_store). + +## Useful supporting tools + +* To generate STIX 2.1 Objects: [stix2 Python Lib](https://stix2.readthedocs.io/en/latest/) +* The STIX 2.1 specification: [STIX 2.1 docs](https://docs.oasis-open.org/cti/stix/v2.1/stix-v2.1.html) +* [SigmaHQ on GitHub](https://github.com/SigmaHQ) + +## Support + +[Minimal support provided via the DOGESEC community](https://community.dogesec.com/). + +## License + +[Apache 2.0](/LICENSE). \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..398b09e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,17 @@ +-i https://pypi.org/simple +antlr4-python3-runtime==4.9.3 +certifi==2024.2.2; python_version >= '3.6' +charset-normalizer==3.3.2; python_full_version >= '3.7.0' +gitdb==4.0.11; python_version >= '3.7' +gitpython==3.1.40; python_version >= '3.7' +idna==3.7; python_version >= '3.5' +pytz==2024.1 +pyyaml==6.0.1; python_version >= '3.6' +requests==2.31.0; python_version >= '3.7' +simplejson==3.19.2; python_version >= '2.5' and python_version not in '3.0, 3.1, 3.2, 3.3' +six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' +smmap==5.0.1; python_version >= '3.7' +stix2==3.0.1; python_version >= '3.6' +stix2-patterns==2.0.0; python_version >= '3.6' +tqdm==4.66.1; python_version >= '3.7' +urllib3==2.2.1; python_version >= '3.8' diff --git a/sigma2stix.py b/sigma2stix.py new file mode 100644 index 0000000..da23cfe --- /dev/null +++ b/sigma2stix.py @@ -0,0 +1,21 @@ +import argparse +from src.sigma2stix import Sigma2Stix +from pathlib import Path + + +def filetype(file): + path = Path(file) + if not path.is_file(): + raise argparse.ArgumentTypeError(f"{path.absolute()} is not a file") + return path + +parser = argparse.ArgumentParser(description='Run Sigma2Stix with specific Sigma version tag.') +parser.add_argument('--sigma_version_tag', type=str, help='Sigma version tag to use', default='r2024-02-26') +parser.add_argument('--mode', choices=["sigmahq", "sigmayaml"], required=True) +fileaction = parser.add_argument('--file', type=filetype, nargs='+') +args = parser.parse_args() + +if args.mode == "sigmayaml" and not args.file: + parser.error(f"{'/'.join(fileaction.option_strings)} is required in mode {args.mode}") + +Sigma2Stix(branch=args.sigma_version_tag).run(args.mode, args.file) \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..2f507e1 --- /dev/null +++ b/src/config.py @@ -0,0 +1,21 @@ +import logging +from uuid import UUID +from stix2 import FileSystemStore +from .utils import check_dir + +logging.basicConfig( + level=logging.INFO, + format="[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s", # noqa D100 E501 + datefmt="%Y-%m-%d - %H:%M:%S", +) + +namespace = UUID("860f4c0f-8c26-5889-b39d-ce94368bc416") +source_repo = "https://github.com/SigmaHQ/sigma.git" +temporary_path = "data" +file_system_path = "stix2_objects" +check_dir(file_system_path) +fs = FileSystemStore(file_system_path) +SIGMA2STIX_MARKING_DEFINITION_URL = "https://raw.githubusercontent.com/muchdogesec/stix4doge/main/objects/marking-definition/sigma2stix.json" +SIGMA2STIX_IDENTITY_URL = "https://raw.githubusercontent.com/muchdogesec/stix4doge/main/objects/identity/sigma2stix.json" +MITRE_TECHNIQUE_PATH = "https://attack.mitre.org/techniques/{}" +CVE_PATH = "https://nvd.nist.gov/vuln/detail/{}" \ No newline at end of file diff --git a/src/parser.py b/src/parser.py new file mode 100644 index 0000000..ebb5a7a --- /dev/null +++ b/src/parser.py @@ -0,0 +1,130 @@ +import json + +from stix2 import Indicator, Grouping, Relationship, parse, Identity +from datetime import datetime +from src import config +from src import utils +import uuid +import re + + +class SigmaParser: + + @classmethod + def parse_indicator(cls, data:dict, path:str, url: str) -> list: + data_list = [] + id = data.get('id') + if not config.fs.get(f"indicator--{id}"): + try: + id = str(uuid.uuid5(config.namespace, f"{id}+sigma")) + indicator = Indicator( + id=f"indicator--{id}", + created_by_ref=utils.get_data_from_fs("identity")[0], + created=datetime.strptime(data.get('date'), "%Y/%m/%d"), + modified=datetime.strptime(data.get('modified') if data.get('modified') else data.get('date'), "%Y/%m/%d"), + indicator_types=["malicious-activity","anomalous-activity"], + name=data.get("title"), + description=f"{data.get('description')}. The following false positives can result from this detection; {', '.join(data.get('falsepositives',[]))}", + pattern=data, + pattern_type="sigma", + valid_from=datetime.strptime(data.get('date'), "%Y/%m/%d"), + external_references=[ + { + "source_name": "sigma-rule", + "external_id": "rule", + "url": url + } + ] + cls.process_tags_and_labels(data) + utils.generate_all_references(data), + object_marking_refs=[ + "marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487" + ]+[utils.get_data_from_fs("marking-definition")[0]] + ) + data_list.append(indicator) + config.fs.add(indicator) + except Exception as e: + raise + pass + return data_list + + @staticmethod + def parse_relationship(data:dict): + data_list = [] + for relation in data.get("related", None): # type dict + #id = f'indicator--{data.get("id")}+'+f'indicator--{relation.get("id")}' + #id = str(uuid.uuid5(config.namespace, f"{id}")) + source_object_id = uuid.uuid5(config.namespace, f"{data.get('id')}+sigma") + target_object_id = uuid.uuid5(config.namespace, f"{relation.get('id')}+sigma") + id = f'indicator--{source_object_id}+' + f'indicator--{target_object_id}' + id = str(uuid.uuid5(config.namespace, f"{id}")) + if not config.fs.get(f"relationship--{id}"): + relation = Relationship( + id=f"relationship--{id}", + created_by_ref=utils.get_data_from_fs("identity")[0], + created=datetime.strptime(data.get('date'), "%Y/%m/%d"), + modified=datetime.strptime(data.get('modified') if data.get('modified') else data.get('date'), "%Y/%m/%d"), + relationship_type=relation.get('type'), + source_ref=f"indicator--{source_object_id}", + target_ref=f"indicator--{target_object_id}", + object_marking_refs=[ + "marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487" + ]+[utils.get_data_from_fs("marking-definition")[0]] + ) + config.fs.add(relation) + data_list.append(relation.serialize()) + return data_list + + @classmethod + def process_tags_and_labels(cls, data: dict): + references = [] + for key in ['id', 'level', 'status', 'author', 'license']: + if value := data.get(key): + references.append(dict(source_name='sigma-rule', external_id=key, description=value)) + for tag in data.get('tags', []): + if match := re.match(r'detection\.(.*)', tag): + references.append(dict(source_name='sigma-rule', external_id='detection', description=match.group(1))) + elif match := re.match(r'(cve\..*)', tag): + cve_id = match.group(1).replace(".", '-').upper() + references.append(dict(source_name='cve', external_id=cve_id, url=config.CVE_PATH.format(cve_id))) + elif match := re.match(r'attack\.(t.*)', tag): + attack_id = match.group(1).upper() + references.append(dict(source_name="mitre-attack", external_id=attack_id, url=config.MITRE_TECHNIQUE_PATH.format(attack_id))) + elif match := re.match(r'attack\.(.*)', tag): + attack_id = match.group(1) + references.append(dict(source_name='ATTACK', external_id=attack_id, description='tactic')) #, url=config.TECHNIQUE_PATH.format(attack_id))) + return references + + @staticmethod + def parse_grouping(data:dict)-> list: + id = str(uuid.uuid5(config.namespace, f"{data.get('path')}")) + grouping = Grouping( + id=f"grouping--{id}", + context="suspicious-activity", + created_by_ref=utils.get_data_from_fs("identity")[0], + created=config.fs.get(data.get("indicators")[0]).get("created"), + modified=config.fs.get(data.get("indicators")[0]).get("modified"), + name=f"{data.get('path')}", + object_refs=data.get("indicators"), + object_marking_refs=[ + "marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487" + ]+[utils.get_data_from_fs("marking-definition")[0]] + ) + config.fs.add(grouping) + return [grouping.serialize()] + + @staticmethod + def parse_marking_definition(): + marking_definition = parse( + json.loads(utils.load_file_from_url(config.SIGMA2STIX_MARKING_DEFINITION_URL)) + ) + if not config.fs.get(marking_definition.get("id")): + config.fs.add(marking_definition) + return marking_definition + + @staticmethod + def parse_identity(): + identity = parse( + json.loads(utils.load_file_from_url(config.SIGMA2STIX_IDENTITY_URL)) + ) + if not config.fs.get(identity.get("id")): + config.fs.add(identity) + return identity diff --git a/src/sigma2stix.py b/src/sigma2stix.py new file mode 100644 index 0000000..40fa2d1 --- /dev/null +++ b/src/sigma2stix.py @@ -0,0 +1,65 @@ +import logging +from pathlib import Path +from tqdm import tqdm +from src import config +from src import utils +from src.parser import SigmaParser + + +class Sigma2Stix: + + def __init__(self, branch): + self.parser = SigmaParser() + self.tag = branch + print(self.tag) + + # + @staticmethod + def prepare_bundle(): + utils.store_in_bundle( + utils.append_data() + ) + + def run(self, mode, yamlfiles:Path): + files = [] + + utils.clean_filesystem(config.temporary_path) + utils.clean_filesystem(config.file_system_path) + if mode == 'sigmahq': + logging.info("Cloning start") + utils.clone_github_repository(config.source_repo, config.temporary_path, tag_name=self.tag) + logging.info("Cloning end") + utils.delete_files_and_folders_except_rules() + files = utils.get_all_yaml_files() + elif mode == 'sigmayaml': + files = [{mode: yamlfiles}] + + print(files) + self.parser.parse_marking_definition() + self.parser.parse_identity() + + data_list = [] + for d in tqdm(files): + temp_data = [] + for file in d.get(list(d.keys())[0]): + data = utils.read_yaml_file(file) + if mode == 'sigmahq': + url = f"https://github.com/SigmaHQ/sigma/blob/master/{file[5:]}" + elif mode == 'sigmayaml': + url = Path(file).absolute().as_uri() + temp_data += self.parser.parse_indicator(data, file, url) + data_list += temp_data + if data.get("related", None): + data_list += self.parser.parse_relationship(data) + + if len(temp_data)>0 and mode == 'sigmahq': + temp_data_ = [] + temp_data_ += [d.get("id") for d in temp_data] + data_list += self.parser.parse_grouping({ + "path": list(d.keys())[0][5:], + "indicators": temp_data_, + }) + + self.prepare_bundle() + utils.clean_filesystem(config.temporary_path) + diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..0133508 --- /dev/null +++ b/src/utils.py @@ -0,0 +1,124 @@ +import os +import shutil +import uuid +import json +import yaml +import hashlib +import requests +from git import Repo +from typing import List +from src import config +from stix2 import Bundle +from stix2 import Filter + +def clone_github_repository(repo_url, destination_path, tag_name): + try: + repo = Repo.clone_from(repo_url, destination_path, branch=tag_name) + print(f"Repository cloned successfully to {destination_path}") + except Exception as e: + print(f"Failed to clone repository: {e}") + + +def check_dir(dir:str): + if not os.path.exists(dir): + os.makedirs(dir) + + +def delete_files_and_folders_except_rules(prefix='rules', keep_count=2): + directory_path = 'data' + all_items = os.listdir(directory_path) + rules_folders = [item for item in all_items if item.startswith(prefix)] + rules_folders.sort() + folders_to_keep = rules_folders[:keep_count] + for item in all_items: + item_path = os.path.join(directory_path, item) + if os.path.isdir(item_path) and not item.startswith(prefix): + if item not in folders_to_keep: + shutil.rmtree(item_path) + print(f"Deleted: {item_path}") + elif os.path.isfile(item_path): + if not item.startswith(prefix): + os.remove(item_path) + + +def get_all_yaml_files(folder="data"): + file_with_path = [] + for root, dirs, files in list(os.walk(folder)): + yaml_files = [] + for file in files: + if file.endswith('.yml') or file.endswith('.yaml'): + file_path = os.path.join(root, file) + yaml_files.append(file_path) + if len(yaml_files)>1: + file_with_path.append({root:yaml_files}) + return file_with_path + + +def read_yaml_file(file_path): + try: + with open(file_path, 'r') as file: + yaml_data = yaml.safe_load(file) + return yaml_data + except Exception as e: + print(f"Error reading YAML file: {e}") + return None + + +def generate_all_references(data:dict) -> List[dict]: + return [ + {"source_name": "sigma-rule", "external_id": "reference", "description": reference} + for reference in data.get("references", []) + ] + + +def clean_filesystem(path): + try: + if os.path.isfile(path) or os.path.islink(path): + os.unlink(path) + elif os.path.isdir(path): + shutil.rmtree(path) + except Exception as e: + print(e) + pass + + +def append_data(): + results = [] + for root, _, files in os.walk(config.file_system_path): + for filename in files: + if filename.endswith(".json"): + file_path = os.path.join(root, filename) + with open(file_path, "r") as file: + stix_object = json.load(file) + results.append(stix_object) + return results + + +def generate_md5_from_list(stix_objects: list) -> str: + json_str = json.dumps(stix_objects, sort_keys=True).encode('utf-8') + return hashlib.md5(json_str).hexdigest() + + +def store_in_bundle(stix_objects): + bundle_id = "bundle--" + str(uuid.uuid5( + config.namespace, generate_md5_from_list(stix_objects)) + ) + bundle_of_all_objects = Bundle(id=bundle_id, objects=stix_objects) + stix_bundle_file = f"{config.file_system_path}/sigma-rule-bundle.json" + with open(stix_bundle_file, "w") as f: + f.write(json.dumps(json.loads(bundle_of_all_objects.serialize()), indent=4)) + + +def load_file_from_url(url): + try: + response = requests.get(url) + response.raise_for_status() # Raise an HTTPError for bad responses + return response.text + except requests.exceptions.RequestException as e: + print(f"Error loading JSON from {url}: {e}") + return None + + +def get_data_from_fs(query:str): + query = [Filter("type", "=", query)] + return config.fs.query(query) diff --git a/tests/demo_rule.yml b/tests/demo_rule.yml new file mode 100644 index 0000000..e3c2196 --- /dev/null +++ b/tests/demo_rule.yml @@ -0,0 +1,32 @@ +title: Exploitation Indicators Of CVE-2023-20198 +id: 2ece8816-b7a0-4d9b-b0e8-ae7ad18bc02b +status: experimental +description: Detecting exploitation indicators of CVE-2023-20198 a privilege escalation vulnerability in Cisco IOS XE Software Web UI. +references: + - https://sec.cloudapps.cisco.com/security/center/content/CiscoSecurityAdvisory/cisco-sa-iosxe-webui-privesc-j22SaA4z + - https://www.thestack.technology/security-experts-call-for-incident-response-exercises-after-mass-cisco-device-exploitation/ +author: Lars B. P. Frydenskov (Trifork Security) +date: 2023/10/20 +tags: + - attack.privilege_escalation + - attack.initial_access + - attack.t1114 + - detection.emerging_threats + - cve.2023.20198 +logsource: + product: cisco + service: syslog + definition: 'Requirements: Cisco IOS XE system logs needs to be configured and ingested' +detection: + keyword_event: + - '%WEBUI-6-INSTALL_OPERATION_INFO:' + - '%SYS-5-CONFIG_P:' + - '%SEC_LOGIN-5-WEBLOGIN_SUCCESS:' + keyword_user: + - 'cisco_tac_admin' + - 'cisco_support' + - 'cisco_sys_manager' + condition: keyword_event and keyword_user +falsepositives: + - Rare false positives might occur if there are valid users named "cisco_tac_admin" or "cisco_support", which are not created by default or CISCO representatives +level: high \ No newline at end of file