Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix FileNotFoundError caused by SubprocessShellFalse, Improve CombineStartswithEndswith, Add CombineIsinstanceIssubclass #489

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion src/codemodder/codemods/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from enum import Enum
from pathlib import Path
from typing import Any, Optional, TypeAlias
from typing import Any, Iterator, Optional, Type, TypeAlias, TypeVar

import libcst as cst
from libcst import MetadataDependent, matchers
Expand Down Expand Up @@ -216,3 +216,23 @@ def is_zero(node: cst.CSTNode) -> bool:
case cst.Call(func=cst.Name("int")) | cst.Call(func=cst.Name("float")):
return is_zero(node.args[0].value)
return False


_CSTNode = TypeVar("_CSTNode", bound=cst.CSTNode)


def extract_boolean_operands(
node: cst.BooleanOperation, ensure_type: Type[_CSTNode] = cst.CSTNode
) -> Iterator[_CSTNode]:
"""
Recursively extract operands from a cst.BooleanOperation node from left to right as an iterator of nodes.
"""
if isinstance(node.left, cst.BooleanOperation):
# Recursively yield operands from the boolean operation on the left
yield from extract_boolean_operands(node.left, ensure_type)
else:
# Yield left operand
yield cst.ensure_type(node.left, ensure_type)

# Yield right operand
yield cst.ensure_type(node.right, ensure_type)
96 changes: 96 additions & 0 deletions src/codemodder/codemods/utils_decorators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from functools import wraps
from typing import Callable, TypeVar

import libcst as cst

from codemodder.codemods.base_visitor import BaseTransformer, BaseVisitor

# TypeVars for decorators below
_BaseVisitorOrTransformer = TypeVar(
"_BaseVisitorOrTransformer", BaseVisitor, BaseTransformer
)
_CSTNode = TypeVar("_CSTNode", bound=cst.CSTNode)


def check_filter_by_path_includes_or_excludes(
func: Callable[[_BaseVisitorOrTransformer, _CSTNode, _CSTNode], cst.CSTNode]
) -> Callable[[_BaseVisitorOrTransformer, _CSTNode, _CSTNode], cst.CSTNode]:
"""
Decorator to filter nodes based on path-includes or path-excludes flags.

Calls the decorated func only if the original node is not filtered.
otherwise, returns the updated node as is.

```
@check_filter_by_path_includes_or_excludes
def leave_Call(self, original_node: cst.Call, updated_node: cst.Call) -> cst.Call
# rest of the method
```

Is equivalent to:

```
def leave_Call(self, original_node: cst.Call, updated_node: cst.Call) -> cst.Call
# Added by the decorator
if not self.filter_by_path_includes_or_excludes(
self.node_position(original_node)
):
return updated_node

# rest of the method
```
"""

@wraps(func)
def wrapper(
instance: _BaseVisitorOrTransformer,
original_node: _CSTNode,
updated_node: _CSTNode,
) -> cst.CSTNode:
if not instance.filter_by_path_includes_or_excludes(
instance.node_position(original_node)
):
return updated_node
return func(instance, original_node, updated_node)

return wrapper


def check_node_is_not_selected(
func: Callable[[_BaseVisitorOrTransformer, _CSTNode, _CSTNode], cst.CSTNode]
) -> Callable[[_BaseVisitorOrTransformer, _CSTNode, _CSTNode], cst.CSTNode]:
"""
Decorator to filter nodes based on whether the node is selected or not.

Calls the decorated func only if the original node is not selected.
otherwise, returns the updated node as is.

```
@check_node_is_not_selected
def leave_Call(self, original_node: cst.Call, updated_node: cst.Call) -> cst.Call
# rest of the method
```

Is equivalent to:

```
def leave_Call(self, original_node: cst.Call, updated_node: cst.Call) -> cst.Call
# Added by the decorator
if not self.node_is_selected(original_node):
return updated_node

# rest of the method
```
"""

@wraps(func)
def wrapper(
instance: _BaseVisitorOrTransformer,
original_node: _CSTNode,
updated_node: _CSTNode,
) -> cst.CSTNode:
if not instance.node_is_selected(original_node):
return updated_node
return func(instance, original_node, updated_node)

return wrapper
98 changes: 98 additions & 0 deletions src/core_codemods/combine_isinstance_issubclass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import libcst as cst
from libcst import matchers as m

from codemodder.codemods.utils import extract_boolean_operands
from codemodder.codemods.utils_decorators import (
check_filter_by_path_includes_or_excludes,
)
from codemodder.codemods.utils_mixin import NameResolutionMixin
from core_codemods.api import Metadata, ReviewGuidance, SimpleCodemod


class CombineIsinstanceIssubclass(SimpleCodemod, NameResolutionMixin):
metadata = Metadata(
name="combine-isinstance-issubclass",
summary="Simplify Boolean Expressions Using `isinstance` and `issubclass`",
review_guidance=ReviewGuidance.MERGE_WITHOUT_REVIEW,
references=[],
)
change_description = "Use tuple of matches instead of boolean expression with `isinstance` or `issubclass`"

@check_filter_by_path_includes_or_excludes
def leave_BooleanOperation(
self, original_node: cst.BooleanOperation, updated_node: cst.BooleanOperation
) -> cst.CSTNode:
if self.matches_isinstance_issubclass_or_pattern(original_node):
self.report_change(original_node)

elements = []
seen_values = set()
for call in extract_boolean_operands(updated_node, ensure_type=cst.Call):
class_tuple_arg_value = call.args[1].value
if isinstance(class_tuple_arg_value, cst.Tuple):
arg_elements = class_tuple_arg_value.elements
else:
arg_elements = (cst.Element(value=class_tuple_arg_value),)

for element in arg_elements:
if (value := getattr(element.value, "value", None)) in seen_values:
# If an element has a non-None evaluated value that has already been seen, continue to avoid duplicates
continue
if value is not None:
seen_values.add(value)
elements.append(element)

instance_arg = updated_node.left.args[0]
new_class_tuple_arg = cst.Arg(value=cst.Tuple(elements=elements))
return cst.Call(func=call.func, args=[instance_arg, new_class_tuple_arg])

return updated_node

def matches_isinstance_issubclass_or_pattern(
self, node: cst.BooleanOperation
) -> bool:
# Match the pattern: isinstance(x, cls1) or isinstance(x, cls2) or isinstance(x, cls3) or ...
# and the same but with issubclass
args = [m.Arg(value=m.Name()), m.Arg(value=m.Name() | m.Tuple())]
isinstance_call = m.Call(
func=m.Name("isinstance"),
args=args,
)
issubclass_call = m.Call(
func=m.Name("issubclass"),
args=args,
)
isinstance_or = m.BooleanOperation(
left=isinstance_call, operator=m.Or(), right=isinstance_call
)
issubclass_or = m.BooleanOperation(
left=issubclass_call, operator=m.Or(), right=issubclass_call
)

# Check for simple case: isinstance(x, cls) or issubclass(x, cls)
if (
m.matches(node, isinstance_or | issubclass_or)
and node.left.func.value == node.right.func.value # Same function
and node.left.args[0].value.value
== node.right.args[0].value.value # Same first argument (instance)
):
return True

# Check for chained case: isinstance(x, cls1) or isinstance(x, cls2) or isinstance(x, cls3) or ...
chained_or = m.BooleanOperation(
left=m.BooleanOperation(operator=m.Or()),
operator=m.Or(),
right=isinstance_call | issubclass_call,
)
if m.matches(node, chained_or):
return all(
(
call.func.value == node.right.func.value # Same function
and call.args[0].value.value
== node.right.args[0].value.value # Same first argument (instance)
)
for call in extract_boolean_operands(node, ensure_type=cst.Call)
)

# No match
return False
75 changes: 55 additions & 20 deletions src/core_codemods/combine_startswith_endswith.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import libcst as cst
from libcst import matchers as m

from codemodder.codemods.utils import extract_boolean_operands
from codemodder.codemods.utils_decorators import (
check_filter_by_path_includes_or_excludes,
)
from codemodder.codemods.utils_mixin import NameResolutionMixin
from core_codemods.api import Metadata, ReviewGuidance, SimpleCodemod

Expand All @@ -14,39 +18,53 @@ class CombineStartswithEndswith(SimpleCodemod, NameResolutionMixin):
)
change_description = "Use tuple of matches instead of boolean expression"

@check_filter_by_path_includes_or_excludes
def leave_BooleanOperation(
self, original_node: cst.BooleanOperation, updated_node: cst.BooleanOperation
) -> cst.CSTNode:
if not self.filter_by_path_includes_or_excludes(
self.node_position(original_node)
):
return updated_node

if self.matches_startswith_endswith_or_pattern(original_node):
left_call = cst.ensure_type(updated_node.left, cst.Call)
right_call = cst.ensure_type(updated_node.right, cst.Call)

self.report_change(original_node)

new_arg = cst.Arg(
value=cst.Tuple(
elements=[
cst.Element(value=left_call.args[0].value),
cst.Element(value=right_call.args[0].value),
]
)
)
elements = []
seen_evaluated_values = set()
for call in extract_boolean_operands(updated_node, ensure_type=cst.Call):
arg_value = call.args[0].value
if isinstance(arg_value, cst.Tuple):
arg_elements = arg_value.elements
else:
arg_elements = (cst.Element(value=arg_value),)

for element in arg_elements:
if (
evaluated_value := getattr(
element.value, "evaluated_value", None
)
) in seen_evaluated_values:
# If an element has a non-None evaluated value that has already been seen, continue to avoid duplicates
continue
if evaluated_value is not None:
seen_evaluated_values.add(evaluated_value)
elements.append(element)

return cst.Call(func=left_call.func, args=[new_arg])
new_arg = cst.Arg(value=cst.Tuple(elements=elements))
return cst.Call(func=call.func, args=[new_arg])

return updated_node

def matches_startswith_endswith_or_pattern(
self, node: cst.BooleanOperation
) -> bool:
# Match the pattern: x.startswith("...") or x.startswith("...")
# Match the pattern: x.startswith("...") or x.startswith("...") or x.startswith("...") or ...
# and the same but with endswith
args = [m.Arg(value=m.SimpleString())]
args = [
m.Arg(
value=m.Tuple()
| m.SimpleString()
| m.ConcatenatedString()
| m.FormattedString()
| m.Name()
)
]
startswith = m.Call(
func=m.Attribute(value=m.Name(), attr=m.Name("startswith")),
args=args,
Expand All @@ -60,7 +78,24 @@ def matches_startswith_endswith_or_pattern(
)
endswith_or = m.BooleanOperation(left=endswith, operator=m.Or(), right=endswith)

return (
# Check for simple case: x.startswith("...") or x.startswith("...")
if (
m.matches(node, startswith_or | endswith_or)
and node.left.func.value.value == node.right.func.value.value
):
return True

# Check for chained case: x.startswith("...") or x.startswith("...") or x.startswith("...") or ...
chained_or = m.BooleanOperation(
left=m.BooleanOperation(operator=m.Or()),
operator=m.Or(),
right=startswith | endswith,
)
if m.matches(node, chained_or):
return all(
call.func.value.value == node.right.func.value.value # Same function
for call in extract_boolean_operands(node, ensure_type=cst.Call)
)

# No match
return False
25 changes: 15 additions & 10 deletions src/core_codemods/subprocess_shell_false.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import libcst as cst
from libcst import matchers
from libcst import matchers as m
from libcst.metadata import ParentNodeProvider

from codemodder.codemods.check_annotations import is_disabled_by_annotations
from codemodder.codemods.libcst_transformer import NewArg
from codemodder.codemods.utils_decorators import check_node_is_not_selected
from codemodder.codemods.utils_mixin import NameResolutionMixin
from core_codemods.api import Metadata, Reference, ReviewGuidance, SimpleCodemod

Expand Down Expand Up @@ -35,17 +36,20 @@ class SubprocessShellFalse(SimpleCodemod, NameResolutionMixin):
)
IGNORE_ANNOTATIONS = ["S603"]

def leave_Call(self, original_node: cst.Call, updated_node: cst.Call):
if not self.node_is_selected(original_node):
return updated_node

if self.find_base_name(original_node.func) in self.SUBPROCESS_FUNCS:
@check_node_is_not_selected
def leave_Call(self, original_node: cst.Call, updated_node: cst.Call) -> cst.Call:
if (
self.find_base_name(original_node.func) in self.SUBPROCESS_FUNCS
) and not m.matches(
original_node.args[0],
m.Arg(
value=m.SimpleString() | m.ConcatenatedString() | m.FormattedString()
), # First argument to subprocess.<func> cannot be a string or setting shell=False will cause a FileNotFoundError
):
for arg in original_node.args:
if matchers.matches(
if m.matches(
arg,
matchers.Arg(
keyword=matchers.Name("shell"), value=matchers.Name("True")
),
m.Arg(keyword=m.Name("shell"), value=m.Name("True")),
) and not is_disabled_by_annotations(
original_node,
self.metadata, # type: ignore
Expand All @@ -57,4 +61,5 @@ def leave_Call(self, original_node: cst.Call, updated_node: cst.Call):
[NewArg(name="shell", value="False", add_if_missing=False)],
)
return self.update_arg_target(updated_node, new_args)

return updated_node
Loading