From 482c5ecf60567d1c853ab463e3fdd7ff043fedba Mon Sep 17 00:00:00 2001 From: clavedeluna Date: Tue, 17 Oct 2023 13:09:43 -0300 Subject: [PATCH 1/8] context gets all args --- src/codemodder/codemodder.py | 8 +------- src/codemodder/context.py | 13 +++++++------ tests/codemods/base_codemod_test.py | 26 +++++++++++++------------- 3 files changed, 21 insertions(+), 26 deletions(-) diff --git a/src/codemodder/codemodder.py b/src/codemodder/codemodder.py index 69603d3a..690a68ac 100644 --- a/src/codemodder/codemodder.py +++ b/src/codemodder/codemodder.py @@ -129,13 +129,7 @@ def run(original_args) -> int: log_section("startup") logger.info("codemodder: python/%s", __VERSION__) - - context = CodemodExecutionContext( - Path(argv.directory), - argv.dry_run, - argv.verbose, - codemod_registry, - ) + context = CodemodExecutionContext(argv, codemod_registry) # TODO: this should be a method of CodemodExecutionContext codemods_to_run = codemod_registry.match_codemods( diff --git a/src/codemodder/context.py b/src/codemodder/context.py index 9b88e8a7..5ed69740 100644 --- a/src/codemodder/context.py +++ b/src/codemodder/context.py @@ -1,3 +1,4 @@ +import argparse import logging from pathlib import Path import itertools @@ -21,14 +22,14 @@ class CodemodExecutionContext: # pylint: disable=too-many-instance-attributes def __init__( self, - directory: Path, - dry_run: bool, - verbose: bool, + cli_args: argparse.Namespace, registry: CodemodRegistry, ): - self.directory = directory - self.dry_run = dry_run - self.verbose = verbose + self.directory = Path(cli_args.directory) + self.dry_run = cli_args.dry_run + self.verbose = cli_args.verbose + self.path_include = cli_args.path_include + self.path_exclude = cli_args.path_exclude self._results_by_codemod = {} self._failures_by_codemod = {} self.dependencies = {} diff --git a/tests/codemods/base_codemod_test.py b/tests/codemods/base_codemod_test.py index c35cd4cc..4695422c 100644 --- a/tests/codemods/base_codemod_test.py +++ b/tests/codemods/base_codemod_test.py @@ -1,10 +1,10 @@ # pylint: disable=no-member,not-callable,attribute-defined-outside-init +import argparse from collections import defaultdict import os from pathlib import Path from textwrap import dedent from typing import ClassVar - import libcst as cst from libcst.codemod import CodemodContext import mock @@ -21,18 +21,23 @@ class BaseCodemodTest: def setup_method(self): self.file_context = None + def _make_context(self, root): + cli_args = argparse.Namespace( + directory=root, + dry_run=True, + verbose=False, + path_include=[], + path_exclude=[], + ) + return CodemodExecutionContext(cli_args, registry=mock.MagicMock()) + def run_and_assert(self, tmpdir, input_code, expected): tmp_file_path = tmpdir / "code.py" self.run_and_assert_filepath(tmpdir, tmp_file_path, input_code, expected) def run_and_assert_filepath(self, root, file_path, input_code, expected): input_tree = cst.parse_module(dedent(input_code)) - self.execution_context = CodemodExecutionContext( - directory=root, - dry_run=True, - verbose=False, - registry=mock.MagicMock(), - ) + self.execution_context = self._make_context(root) self.file_context = FileContext( file_path, [], @@ -74,12 +79,7 @@ def results_by_id_filepath(self, input_code, file_path): return semgrep_run(self.execution_context, results[0].yaml_files) def run_and_assert_filepath(self, root, file_path, input_code, expected): - self.execution_context = CodemodExecutionContext( - directory=root, - dry_run=True, - verbose=False, - registry=mock.MagicMock(), - ) + self.execution_context = self._make_context(root) input_tree = cst.parse_module(input_code) all_results = self.results_by_id_filepath(input_code, file_path) results = all_results[str(file_path)] From 6279bc0f113c7b3f4ad645ea7d2d6615eb8a3afc Mon Sep 17 00:00:00 2001 From: clavedeluna Date: Tue, 17 Oct 2023 16:29:20 -0300 Subject: [PATCH 2/8] add flags to semgrep --- src/codemodder/semgrep.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/codemodder/semgrep.py b/src/codemodder/semgrep.py index 59493d04..1a2cad9a 100644 --- a/src/codemodder/semgrep.py +++ b/src/codemodder/semgrep.py @@ -26,6 +26,20 @@ def run(execution_context: CodemodExecutionContext, yaml_files: List[Path]) -> d "-o", temp_sarif_file.name, ] + + if execution_context.path_exclude: + command.extend( + itertools.chain.from_iterable( + map(lambda f: ["--exclude", str(f)], execution_context.path_exclude) + ) + ) + if execution_context.path_include: + command.extend( + itertools.chain.from_iterable( + map(lambda f: ["--include", str(f)], execution_context.path_include) + ) + ) + command.extend( itertools.chain.from_iterable( map(lambda f: ["--config", str(f)], yaml_files) From 2b8c7c59aad69b33a89bf596373c7eddfdbf2683 Mon Sep 17 00:00:00 2001 From: clavedeluna Date: Tue, 17 Oct 2023 16:32:25 -0300 Subject: [PATCH 3/8] remove import --- src/codemodder/codemodder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codemodder/codemodder.py b/src/codemodder/codemodder.py index 690a68ac..e697848c 100644 --- a/src/codemodder/codemodder.py +++ b/src/codemodder/codemodder.py @@ -3,7 +3,7 @@ import logging import os import sys -from pathlib import Path +from textwrap import indent import libcst as cst from libcst.codemod import CodemodContext From 71d9d69b3429648ea97d70d3d9a306e444b635ee Mon Sep 17 00:00:00 2001 From: clavedeluna Date: Thu, 19 Oct 2023 08:32:34 -0300 Subject: [PATCH 4/8] typehint for lists --- src/codemodder/context.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/codemodder/context.py b/src/codemodder/context.py index 5ed69740..06524b1d 100644 --- a/src/codemodder/context.py +++ b/src/codemodder/context.py @@ -28,8 +28,8 @@ def __init__( self.directory = Path(cli_args.directory) self.dry_run = cli_args.dry_run self.verbose = cli_args.verbose - self.path_include = cli_args.path_include - self.path_exclude = cli_args.path_exclude + self.path_include: list = cli_args.path_include + self.path_exclude: list = cli_args.path_exclude self._results_by_codemod = {} self._failures_by_codemod = {} self.dependencies = {} From 396ee70bd01ef14789b4b4fdc165acf75cd9a407 Mon Sep 17 00:00:00 2001 From: clavedeluna Date: Thu, 19 Oct 2023 08:45:17 -0300 Subject: [PATCH 5/8] unit test with defaults --- tests/codemods/base_codemod_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/codemods/base_codemod_test.py b/tests/codemods/base_codemod_test.py index 4695422c..ca090c99 100644 --- a/tests/codemods/base_codemod_test.py +++ b/tests/codemods/base_codemod_test.py @@ -10,6 +10,7 @@ import mock from codemodder.context import CodemodExecutionContext +from codemodder.code_directory import DEFAULT_INCLUDED_PATHS, DEFAULT_EXCLUDED_PATHS from codemodder.file_context import FileContext from codemodder.registry import CodemodRegistry, CodemodCollection from codemodder.semgrep import run as semgrep_run @@ -26,8 +27,8 @@ def _make_context(self, root): directory=root, dry_run=True, verbose=False, - path_include=[], - path_exclude=[], + path_include=DEFAULT_INCLUDED_PATHS, + path_exclude=DEFAULT_EXCLUDED_PATHS, ) return CodemodExecutionContext(cli_args, registry=mock.MagicMock()) From 3ce7a3b6242a0a3fe8fb64e81ee76e8e643c6836 Mon Sep 17 00:00:00 2001 From: clavedeluna Date: Fri, 20 Oct 2023 09:00:30 -0300 Subject: [PATCH 6/8] filter files with parent path --- src/codemodder/code_directory.py | 15 ++++++++- tests/test_code_directory.py | 55 ++++++++++++++++---------------- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/src/codemodder/code_directory.py b/src/codemodder/code_directory.py index 7e132340..b69ff655 100644 --- a/src/codemodder/code_directory.py +++ b/src/codemodder/code_directory.py @@ -36,13 +36,23 @@ def file_line_patterns(file_path: str | Path, patterns: Sequence[str]): ] -def filter_files(names: Sequence[str], patterns: Sequence[str], exclude: bool = False): +def filter_files( + names: Sequence[str], + parent_path: str, + patterns: Sequence[str], + exclude: bool = False, +): patterns = ( [x.split(":")[0] for x in (patterns or [])] if not exclude # An excluded line should not cause the entire file to be excluded else [x for x in (patterns or []) if ":" not in x] ) + + # Filter patterns should be relative to a parent_path, not absolute. + # TODO: handle case when parent path is "." + patterns = [f"{parent_path}{x}" for x in patterns] + return itertools.chain(*[fnmatch.filter(names, pattern) for pattern in patterns]) @@ -65,15 +75,18 @@ def match_files( that match the criteria of both exclude and include patterns. """ all_files = [str(path) for path in Path(parent_path).rglob("*")] + included_files = set( filter_files( all_files, + str(parent_path), include_paths if include_paths is not None else DEFAULT_INCLUDED_PATHS, ) ) excluded_files = set( filter_files( all_files, + str(parent_path), exclude_paths if exclude_paths is not None else DEFAULT_EXCLUDED_PATHS, exclude=True, ) diff --git a/tests/test_code_directory.py b/tests/test_code_directory.py index 17c34b2d..7a980804 100644 --- a/tests/test_code_directory.py +++ b/tests/test_code_directory.py @@ -23,7 +23,11 @@ def dir_structure(tmp_path_factory): (tests_dir / "test_make_request.py").touch() (tests_dir / "test_insecure_random.py").touch() - assert len(list(base_dir.rglob("*"))) == 9 + sub_tests_dir = tests_dir / "tests" + sub_tests_dir.mkdir() + (sub_tests_dir / "something.py").touch() + + assert len(list(base_dir.rglob("*"))) == 11 return base_dir @@ -35,11 +39,18 @@ def _assert_expected(self, result_files, expected_files): file_names.sort() assert file_names == expected_files - def test_all_py_files_match(self, dir_structure): + def test_all_py_files_match_except_tests_dir(self, dir_structure): expected = ["empty_for_testing.py", "insecure_random.py", "make_request.py"] files = match_files(dir_structure) self._assert_expected(files, expected) + def test_tests_not_excluded(self, dir_structure): + expected = ["test_insecure_random.py", "test_make_request.py"] + # anything in foo/tests will be analyzed but anything in + # foo/tests/tests will not be analyzed by default + files = match_files(dir_structure / "tests") + self._assert_expected(files, expected) + def test_match_excluded(self, dir_structure): expected = ["empty_for_testing.py", "insecure_random.py"] files = match_files(dir_structure, ["**/tests/**", "*request.py"]) @@ -102,39 +113,27 @@ def test_match_excluded_precedence_over_included(self, dir_structure): self._assert_expected(files, expected) def test_test_directory_not_excluded(self, dir_structure): - expected = ["test_insecure_random.py", "test_make_request.py"] + expected = ["something.py", "test_insecure_random.py", "test_make_request.py"] files = match_files( dir_structure, exclude_paths=["**/samples/**", "**/more_samples/**"] ) self._assert_expected(files, expected) - def test_include_test_overridden_by_default_excludes(self, mocker): - mocker.patch( - "codemodder.code_directory.Path.rglob", - return_value=[ - "foo/tests/test_insecure_random.py", - "foo/tests/test_make_request.py", - ], - ) - mocker.patch( - "codemodder.code_directory.Path.is_file", - return_value=True, - ) - files = match_files(Path("."), include_paths=["**/tests/**"]) + def test_include_test_overridden_by_default_excludes(self, dir_structure): + files = match_files(dir_structure, include_paths=["**/tests/**"]) self._assert_expected(files, []) - def test_include_test_without_default_includes(self, mocker): - files = ["foo/tests/test_insecure_random.py", "foo/tests/test_make_request.py"] - mocker.patch( - "codemodder.code_directory.Path.rglob", - return_value=files, - ) - mocker.patch( - "codemodder.code_directory.Path.is_file", - return_value=True, - ) - result = match_files(Path("."), exclude_paths=[]) - assert result == [Path(x) for x in files] + def test_include_test_without_default_excludes(self, dir_structure): + expected = [ + "empty_for_testing.py", + "insecure_random.py", + "make_request.py", + "something.py", + "test_insecure_random.py", + "test_make_request.py", + ] + files = match_files(dir_structure, exclude_paths=[]) + self._assert_expected(files, expected) def test_extract_line_from_pattern(self): lines = file_line_patterns(Path("insecure_random.py"), ["insecure_*.py:3"]) From 1817467a6f0cf9f10ac9b0bc01b6c091e4fdad1d Mon Sep 17 00:00:00 2001 From: clavedeluna Date: Fri, 20 Oct 2023 09:34:10 -0300 Subject: [PATCH 7/8] semgrep exclude pats get parent dir --- src/codemodder/semgrep.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/codemodder/semgrep.py b/src/codemodder/semgrep.py index 1a2cad9a..ad3e93ee 100644 --- a/src/codemodder/semgrep.py +++ b/src/codemodder/semgrep.py @@ -30,10 +30,14 @@ def run(execution_context: CodemodExecutionContext, yaml_files: List[Path]) -> d if execution_context.path_exclude: command.extend( itertools.chain.from_iterable( - map(lambda f: ["--exclude", str(f)], execution_context.path_exclude) + map( + lambda f: ["--exclude", f"{execution_context.directory}{f}"], + execution_context.path_exclude, + ) ) ) if execution_context.path_include: + # I don't know why but passing the parent path to include patterns has unexpected behavior command.extend( itertools.chain.from_iterable( map(lambda f: ["--include", str(f)], execution_context.path_include) From 791a81b1663c7b677e45361404e38fe83e8920cb Mon Sep 17 00:00:00 2001 From: clavedeluna Date: Tue, 24 Oct 2023 08:13:49 -0300 Subject: [PATCH 8/8] handle * v just path --- integration_tests/base_test.py | 21 ++++++++++++--------- src/codemodder/code_directory.py | 9 ++++++--- src/codemodder/codemodder.py | 1 - src/codemodder/semgrep.py | 2 +- tests/test_code_directory.py | 8 ++++++++ 5 files changed, 27 insertions(+), 14 deletions(-) diff --git a/integration_tests/base_test.py b/integration_tests/base_test.py index 21ed2e0f..b41326d3 100644 --- a/integration_tests/base_test.py +++ b/integration_tests/base_test.py @@ -67,6 +67,10 @@ def setup_method(self): "You must register the codemod to a CodemodCollection." ) from exc + @property + def relative_code_path(self): + return os.path.relpath(self.code_path, SAMPLES_DIR) + def _assert_run_fields(self, run, output_path): assert run["vendor"] == "pixee" assert run["tool"] == "codemodder-python" @@ -74,12 +78,12 @@ def _assert_run_fields(self, run, output_path): assert run["elapsed"] != "" assert ( run["commandLine"] - == f"codemodder {SAMPLES_DIR} --output {output_path} --codemod-include={self.codemod_wrapper.name} --path-include={self.code_path}" + == f"codemodder {SAMPLES_DIR} --output {output_path} --codemod-include={self.codemod_wrapper.name} --path-include={self.relative_code_path}" ) assert run["directory"] == os.path.abspath(SAMPLES_DIR) assert run["sarifs"] == [] - def _assert_results_fields(self, results, output_path): + def _assert_results_fields(self, results): assert len(results) == 1 result = results[0] assert result["codemod"] == self.codemod_wrapper.id @@ -94,9 +98,11 @@ def _assert_results_fields(self, results, output_path): # A codemod may change multiple files. For now we will # assert the resulting data for one file only. change = [ - result for result in result["changeset"] if result["path"] == output_path + result + for result in result["changeset"] + if result["path"] == self.relative_code_path ][0] - assert change["path"] == output_path + assert change["path"] == self.relative_code_path assert change["diff"] == self.expected_diff assert len(change["changes"]) == self.num_changes @@ -114,10 +120,7 @@ def _assert_codetf_output(self): run = codetf["run"] self._assert_run_fields(run, self.output_path) results = codetf["results"] - # CodeTf2 spec requires relative paths - self._assert_results_fields( - results, os.path.relpath(self.code_path, SAMPLES_DIR) - ) + self._assert_results_fields(results) def check_code_before(self): with open(self.code_path, "r", encoding="utf-8") as f: @@ -145,7 +148,7 @@ def test_file_rewritten(self): "--output", self.output_path, f"--codemod-include={self.codemod_wrapper.name}", - f"--path-include={self.code_path}", + f"--path-include={self.relative_code_path}", ] self.check_code_before() diff --git a/src/codemodder/code_directory.py b/src/codemodder/code_directory.py index b69ff655..d0a06cc5 100644 --- a/src/codemodder/code_directory.py +++ b/src/codemodder/code_directory.py @@ -49,10 +49,13 @@ def filter_files( else [x for x in (patterns or []) if ":" not in x] ) - # Filter patterns should be relative to a parent_path, not absolute. # TODO: handle case when parent path is "." - patterns = [f"{parent_path}{x}" for x in patterns] - + patterns = [ + str(Path(parent_path) / Path(pat)) + if not pat.startswith("*") + else parent_path + pat + for pat in patterns + ] return itertools.chain(*[fnmatch.filter(names, pattern) for pattern in patterns]) diff --git a/src/codemodder/codemodder.py b/src/codemodder/codemodder.py index e697848c..5eed3680 100644 --- a/src/codemodder/codemodder.py +++ b/src/codemodder/codemodder.py @@ -3,7 +3,6 @@ import logging import os import sys -from textwrap import indent import libcst as cst from libcst.codemod import CodemodContext diff --git a/src/codemodder/semgrep.py b/src/codemodder/semgrep.py index ad3e93ee..78f6297e 100644 --- a/src/codemodder/semgrep.py +++ b/src/codemodder/semgrep.py @@ -37,7 +37,7 @@ def run(execution_context: CodemodExecutionContext, yaml_files: List[Path]) -> d ) ) if execution_context.path_include: - # I don't know why but passing the parent path to include patterns has unexpected behavior + # Note: parent path is not passed with --include command.extend( itertools.chain.from_iterable( map(lambda f: ["--include", str(f)], execution_context.path_include) diff --git a/tests/test_code_directory.py b/tests/test_code_directory.py index 7a980804..debc55b1 100644 --- a/tests/test_code_directory.py +++ b/tests/test_code_directory.py @@ -138,3 +138,11 @@ def test_include_test_without_default_excludes(self, dir_structure): def test_extract_line_from_pattern(self): lines = file_line_patterns(Path("insecure_random.py"), ["insecure_*.py:3"]) assert lines == [3] + + def test_include_specific_file(self, dir_structure): + expected = ["empty_for_testing.py"] + files = match_files( + dir_structure / "samples" / "more_samples", + include_paths=["empty_for_testing.py"], + ) + self._assert_expected(files, expected)