diff --git a/ci_tests/test_pygoat_findings.py b/ci_tests/test_pygoat_findings.py index 7602758f..ea3c6dfb 100644 --- a/ci_tests/test_pygoat_findings.py +++ b/ci_tests/test_pygoat_findings.py @@ -14,6 +14,7 @@ "pixee:python/url-sandbox", "pixee:python/use-defusedxml", "pixee:python/use-walrus-if", + "pixee:python/timezone-aware-datetime", ] diff --git a/src/codemodder/scripts/generate_docs.py b/src/codemodder/scripts/generate_docs.py index 10c574a9..42e03503 100644 --- a/src/codemodder/scripts/generate_docs.py +++ b/src/codemodder/scripts/generate_docs.py @@ -279,6 +279,10 @@ class DocMetadata: importance="Low", guidance_explained="We believe this replacement is safe and should not result in any issues.", ), + "timezone-aware-datetime": DocMetadata( + importance="Medium", + guidance_explained="This change makes your code more accurate with regards to timezones. However, it's possible you wish to specify a different timezone for your application needs.", + ), } DEFECTDOJO_CODEMODS = { "django-secure-set-cookie": DocMetadata( @@ -315,6 +319,7 @@ class DocMetadata: "break-or-continue-out-of-loop", "disable-graphql-introspection", "invert-boolean-check", + "timezone-aware-datetime", ] SONAR_CODEMODS = { name: DocMetadata( diff --git a/src/core_codemods/__init__.py b/src/core_codemods/__init__.py index ac717cb6..1abf97b7 100644 --- a/src/core_codemods/__init__.py +++ b/src/core_codemods/__init__.py @@ -90,11 +90,13 @@ from .sonar.sonar_secure_random import SonarSecureRandom from .sonar.sonar_sql_parameterization import SonarSQLParameterization from .sonar.sonar_tempfile_mktemp import SonarTempfileMktemp +from .sonar.sonar_timezone_aware_datetime import SonarTimezoneAwareDatetime from .sonar.sonar_url_sandbox import SonarUrlSandbox from .sql_parameterization import SQLQueryParameterization from .str_concat_in_seq_literal import StrConcatInSeqLiteral from .subprocess_shell_false import SubprocessShellFalse from .tempfile_mktemp import TempfileMktemp +from .timezone_aware_datetime import TimezoneAwareDatetime from .upgrade_sslcontext_minimum_version import UpgradeSSLContextMinimumVersion from .upgrade_sslcontext_tls import UpgradeSSLContextTLS from .url_sandbox import UrlSandbox @@ -136,6 +138,7 @@ UseDefusedXml, UseGenerator, UseSetLiteral, + TimezoneAwareDatetime, UseWalrusIf, WithThreadingLock, SQLQueryParameterization, @@ -197,6 +200,7 @@ SonarBreakOrContinueOutOfLoop, SonarDisableGraphQLIntrospection, SonarInvertedBooleanCheck, + SonarTimezoneAwareDatetime, ], ) diff --git a/src/core_codemods/docs/pixee_python_timezone-aware-datetime.md b/src/core_codemods/docs/pixee_python_timezone-aware-datetime.md new file mode 100644 index 00000000..9e20b3bc --- /dev/null +++ b/src/core_codemods/docs/pixee_python_timezone-aware-datetime.md @@ -0,0 +1,14 @@ +Some `datetime` object calls use the machine's local timezone instead of a reasonable default like UTC. This may be okay in some cases, but it can lead to bugs. Misinterpretation of dates have been the culprit for serious issues in banking, satellite communications, and other industries. + +The `datetime` [documentation](https://docs.python.org/3/library/datetime.html#datetime.datetime.utcnow) explicitly encourages using timezone aware objects to prevent bugs. + +Our changes look like the following: +```diff + from datetime import datetime + import time + +- datetime.utcnow() +- datetime.utcfromtimestamp(time.time()) ++ datetime.now(tz=timezone.utc) ++ datetime.fromtimestamp(time.time(), tz=timezone.utc) +``` diff --git a/src/core_codemods/sonar/sonar_timezone_aware_datetime.py b/src/core_codemods/sonar/sonar_timezone_aware_datetime.py new file mode 100644 index 00000000..f900e252 --- /dev/null +++ b/src/core_codemods/sonar/sonar_timezone_aware_datetime.py @@ -0,0 +1,9 @@ +from core_codemods.sonar.api import SonarCodemod +from core_codemods.timezone_aware_datetime import TimezoneAwareDatetime + +SonarTimezoneAwareDatetime = SonarCodemod.from_core_codemod( + name="timezone-aware-datetime", + other=TimezoneAwareDatetime, + rule_id="python:S6903", + rule_name='Using timezone-aware "datetime" objects should be preferred over using "datetime.datetime.utcnow" and "datetime.datetime.utcfromtimestamp"', +) diff --git a/src/core_codemods/timezone_aware_datetime.py b/src/core_codemods/timezone_aware_datetime.py new file mode 100644 index 00000000..bca4103a --- /dev/null +++ b/src/core_codemods/timezone_aware_datetime.py @@ -0,0 +1,115 @@ +import libcst as cst +from libcst import matchers + +from codemodder.codemods.libcst_transformer import ( + LibcstResultTransformer, + LibcstTransformerPipeline, + NewArg, +) +from codemodder.codemods.utils_mixin import NameResolutionMixin +from core_codemods.api import CoreCodemod, Metadata, Reference, ReviewGuidance + + +class TransformDatetimeWithTimezone(LibcstResultTransformer, NameResolutionMixin): + + change_description = "Add `tz=datetime.timezone.utc` to datetime call" + need_kwarg = ( + "datetime.datetime", + "datetime.datetime.now", + "datetime.datetime.fromtimestamp", + ) + _module_name = "datetime" + + def leave_Call(self, original_node: cst.Call, updated_node: cst.Call): + if not self.node_is_selected(original_node): + return updated_node + + match self.find_base_name(original_node): + case "datetime.datetime.utcnow": + self.report_change(original_node) + maybe_name, kwarg_val, module = self._determine_module_and_kwarg( + original_node + ) + new_args = self.replace_args( + original_node, + [ + NewArg( + name="tz", + value=kwarg_val, + add_if_missing=True, + ) + ], + ) + return self.update_call_target( + updated_node, module, "now", replacement_args=new_args + ) + case "datetime.datetime.utcfromtimestamp": + self.report_change(original_node) + maybe_name, kwarg_val, module = self._determine_module_and_kwarg( + original_node + ) + if len(original_node.args) != 2 and not self._has_timezone_arg( + original_node, "tz" + ): + new_args = self.replace_args( + original_node, + [ + NewArg( + name="tz", + value=kwarg_val, + add_if_missing=True, + ) + ], + ) + else: + new_args = original_node.args + + return self.update_call_target( + updated_node, + module, + "fromtimestamp", + replacement_args=new_args, + ) + + return updated_node + + def _determine_module_and_kwarg(self, original_node: cst.Call): + + if maybe_name := self.get_aliased_prefix_name(original_node, self._module_name): + # it's a regular import OR alias import + if maybe_name == self._module_name: + module = "datetime.datetime" + else: + module = f"{maybe_name}.datetime" + kwarg_val = f"{maybe_name}.timezone.utc" + else: + # it's from import so timezone should also be from import + self.add_needed_import("datetime", "timezone") + kwarg_val = "timezone.utc" + module = ( + "datetime" + if (curr_module := original_node.func.value.value) + in (self._module_name, "date") + else curr_module + ) + + return maybe_name, kwarg_val, module + + def _has_timezone_arg(self, original_node: cst.Call, name: str) -> bool: + return any( + matchers.matches(arg, matchers.Arg(keyword=matchers.Name(name))) + for arg in original_node.args + ) + + +TimezoneAwareDatetime = CoreCodemod( + metadata=Metadata( + name="timezone-aware-datetime", + summary="Make `datetime` Calls Timezone-Aware", + review_guidance=ReviewGuidance.MERGE_AFTER_REVIEW, + references=[ + Reference(url="https://docs.python.org/3/library/datetime.html"), + ], + ), + transformer=LibcstTransformerPipeline(TransformDatetimeWithTimezone), +) diff --git a/tests/codemods/sonar/test_sonar_timezone_aware_datetime.py b/tests/codemods/sonar/test_sonar_timezone_aware_datetime.py new file mode 100644 index 00000000..f93e4ed7 --- /dev/null +++ b/tests/codemods/sonar/test_sonar_timezone_aware_datetime.py @@ -0,0 +1,64 @@ +import json + +from codemodder.codemods.test import BaseSASTCodemodTest +from core_codemods.sonar.sonar_timezone_aware_datetime import SonarTimezoneAwareDatetime + + +class TestSonarSQLParameterization(BaseSASTCodemodTest): + codemod = SonarTimezoneAwareDatetime + tool = "sonar" + + def test_name(self): + assert self.codemod.name == "timezone-aware-datetime" + + def test_simple(self, tmpdir): + input_code = """\ + import datetime + + datetime.datetime.utcnow() + timestamp = 1571595618.0 + datetime.datetime.utcfromtimestamp(timestamp) + """ + expected = """\ + import datetime + + datetime.datetime.now(tz=datetime.timezone.utc) + timestamp = 1571595618.0 + datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc) + """ + issues = { + "issues": [ + { + "key": "AZFcGzHT5VEY3NanjlD7", + "rule": "python:S6903", + "severity": "MAJOR", + "component": "code.py", + "hash": "92aca3da1e08f944a3c408df27c54b28", + "textRange": { + "startLine": 3, + "endLine": 3, + "startOffset": 0, + "endOffset": 26, + }, + "status": "OPEN", + "message": "Don't use `datetime.datetime.utcnow` to create this datetime object.", + }, + { + "key": "AZFcGzHT5VEY3NanjlD8", + "rule": "python:S6903", + "severity": "MAJOR", + "component": "code.py", + "textRange": { + "startLine": 5, + "endLine": 5, + "startOffset": 0, + "endOffset": 45, + }, + "status": "OPEN", + "message": "Don't use `datetime.datetime.utcfromtimestamp` to create this datetime object.", + }, + ] + } + self.run_and_assert( + tmpdir, input_code, expected, results=json.dumps(issues), num_changes=2 + ) diff --git a/tests/codemods/test_timezone_aware_datetime.py b/tests/codemods/test_timezone_aware_datetime.py new file mode 100644 index 00000000..7e4d48df --- /dev/null +++ b/tests/codemods/test_timezone_aware_datetime.py @@ -0,0 +1,77 @@ +from codemodder.codemods.test import BaseCodemodTest +from core_codemods.timezone_aware_datetime import TimezoneAwareDatetime + + +class TestTimezoneAwareDatetimeNeedKwarg(BaseCodemodTest): + codemod = TimezoneAwareDatetime + + def test_name(self): + assert self.codemod.name == "timezone-aware-datetime" + + def test_import(self, tmpdir): + input_code = """ + import datetime + import time + + datetime.datetime.utcnow() + datetime.datetime.utcfromtimestamp(time.time()) + """ + expected = """ + import datetime + import time + + datetime.datetime.now(tz=datetime.timezone.utc) + datetime.datetime.fromtimestamp(time.time(), tz=datetime.timezone.utc) + """ + self.run_and_assert(tmpdir, input_code, expected, num_changes=2) + + def test_import_alias(self, tmpdir): + input_code = """ + import datetime as mydate + import time + + mydate.datetime.utcnow() + mydate.datetime.utcfromtimestamp(time.time()) + """ + expected = """ + import datetime as mydate + import time + + mydate.datetime.now(tz=mydate.timezone.utc) + mydate.datetime.fromtimestamp(time.time(), tz=mydate.timezone.utc) + """ + self.run_and_assert(tmpdir, input_code, expected, num_changes=2) + + def test_import_from(self, tmpdir): + input_code = """ + from datetime import datetime + import time + + datetime.utcnow() + datetime.utcfromtimestamp(time.time()) + """ + expected = """ + from datetime import timezone, datetime + import time + + datetime.now(tz=timezone.utc) + datetime.fromtimestamp(time.time(), tz=timezone.utc) + """ + self.run_and_assert(tmpdir, input_code, expected, num_changes=2) + + def test_import_from_alias(self, tmpdir): + input_code = """ + from datetime import datetime as mydate + import time + + mydate.utcnow() + mydate.utcfromtimestamp(time.time()) + """ + expected = """ + from datetime import timezone, datetime as mydate + import time + + mydate.now(tz=timezone.utc) + mydate.fromtimestamp(time.time(), tz=timezone.utc) + """ + self.run_and_assert(tmpdir, input_code, expected, num_changes=2)