Skip to content

Commit

Permalink
Also fuzz None values
Browse files Browse the repository at this point in the history
  • Loading branch information
pietermarsman committed Jun 27, 2024
1 parent cd4d715 commit 9b55c8d
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 30 deletions.
14 changes: 6 additions & 8 deletions fuzzing/extract_text_fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,12 @@ def fuzz_one_input(data: bytes) -> None:
fdp = EnhancedFuzzedDataProvider(data)

try:
with fdp.ConsumeMemoryFile() as f:
max_pages = fdp.ConsumeIntInRange(0, 1000)
extract_text(
f,
maxpages=max_pages,
page_numbers=fdp.ConsumeIntList(fdp.ConsumeIntInRange(0, max_pages), 2),
laparams=PDFValidator.generate_layout_parameters(fdp),
)
extract_text(
fdp.ConsumeMemoryFile(),
maxpages=fdp.ConsumeIntInRange(0, 10),
page_numbers=fdp.ConsumeOptionalIntList(10, 0, 10),
laparams=PDFValidator.generate_layout_parameters(fdp),
)
except (AssertionError, PSException):
return
except Exception as e:
Expand Down
5 changes: 2 additions & 3 deletions fuzzing/extract_text_to_fp_fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,13 @@ def fuzz_one_input(data: bytes) -> None:

try:
with fdp.ConsumeMemoryFile(all_data=False) as f_in, io.BytesIO() as f_out:
max_pages = fdp.ConsumeIntInRange(0, 1000)
extract_text_to_fp(
f_in,
f_out,
output_type=fdp.PickValueInList(available_output_formats),
laparams=PDFValidator.generate_layout_parameters(fdp),
maxpages=max_pages,
page_numbers=fdp.ConsumeIntList(fdp.ConsumeIntInRange(0, max_pages), 2),
maxpages=fdp.ConsumeIntInRange(0, 10),
page_numbers=fdp.ConsumeOptionalIntList(10, 0, 10),
scale=fdp.ConsumeFloatInRange(0.0, 2.0),
rotation=fdp.ConsumeIntInRange(0, 360),
layoutmode=fdp.PickValueInList(available_layout_modes),
Expand Down
9 changes: 9 additions & 0 deletions fuzzing/fuzz_helpers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import io
from typing import List, Optional

from atheris import FuzzedDataProvider

Expand All @@ -23,3 +24,11 @@ def ConsumeMemoryFile(self, all_data: bool = False) -> io.BytesIO:
return io.BytesIO(self.ConsumeRemainingBytes())
else:
return io.BytesIO(self.ConsumeRandomBytes())

def ConsumeOptionalIntList(
self, max_count: int, min: int, max: int
) -> Optional[List[int]]:
if self.ConsumeBool():
count = self.ConsumeIntInRange(0, max_count)
return [int(i) for i in self.ConsumeIntListInRange(count, min, max)]
return None
7 changes: 2 additions & 5 deletions fuzzing/page_extraction_fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,11 @@ def fuzz_one_input(data: bytes) -> None:

try:
with fdp.ConsumeMemoryFile() as f:
max_pages = fdp.ConsumeIntInRange(0, 1000)
list(
extract_pages(
f,
maxpages=max_pages,
page_numbers=fdp.ConsumeIntList(
fdp.ConsumeIntInRange(0, max_pages), 2
),
maxpages=fdp.ConsumeIntInRange(0, 10),
page_numbers=fdp.ConsumeOptionalIntList(10, 0, 10),
laparams=PDFValidator.generate_layout_parameters(fdp),
)
)
Expand Down
29 changes: 15 additions & 14 deletions fuzzing/pdf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,21 @@ def is_valid_byte_stream(data: bytes) -> bool:
def generate_layout_parameters(
fdp: atheris.FuzzedDataProvider,
) -> Optional[LAParams]:
return (
LAParams(
line_overlap=fdp.ConsumeFloat(),
char_margin=fdp.ConsumeFloat(),
line_margin=fdp.ConsumeFloat(),
word_margin=fdp.ConsumeFloat(),
boxes_flow=fdp.ConsumeFloatInRange(-1.0, 1.0)
if fdp.ConsumeBool()
else None,
detect_vertical=fdp.ConsumeBool(),
all_texts=fdp.ConsumeBool(),
)
if fdp.ConsumeBool()
else None
if fdp.ConsumeBool():
return None

boxes_flow: Optional[float] = None
if fdp.ConsumeBool():
boxes_flow = fdp.ConsumeFloatInRange(-1.0, 1.0)

return LAParams(
line_overlap=fdp.ConsumeFloat(),
char_margin=fdp.ConsumeFloat(),
line_margin=fdp.ConsumeFloat(),
word_margin=fdp.ConsumeFloat(),
boxes_flow=boxes_flow,
detect_vertical=fdp.ConsumeBool(),
all_texts=fdp.ConsumeBool(),
)

@staticmethod
Expand Down

0 comments on commit 9b55c8d

Please sign in to comment.