Skip to content

Commit

Permalink
Fix #495: resolve params in PDFStream.get_filters (#906)
Browse files Browse the repository at this point in the history
Co-authored-by: Pieter Marsman <[email protected]>
  • Loading branch information
EvaSDK and pietermarsman authored Jan 16, 2024
1 parent 48774a1 commit f428846
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 8 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Resolving mediabox and pdffont ([#834](https://github.com/pdfminer/pdfminer.six/pull/834))
- Keywords that aren't terminated by the pattern `END_KEYWORD` before end-of-stream are parsed ([#885](https://github.com/pdfminer/pdfminer.six/pull/885))
- `ValueError` wrong error message when specifying codec for text output ([#902](https://github.com/pdfminer/pdfminer.six/pull/902))
- Resolve stream filter parameters ([#906](https://github.com/pdfminer/pdfminer.six/pull/906))
- Reading cmap's with whitespace in the name ([#935](https://github.com/pdfminer/pdfminer.six/pull/935))

## [20231228]
Expand Down
12 changes: 4 additions & 8 deletions pdfminer/pdftypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,14 +305,10 @@ def get_filters(self) -> List[Tuple[Any, Any]]:
params = [params] * len(filters)
if settings.STRICT and len(params) != len(filters):
raise PDFException("Parameters len filter mismatch")
# resolve filter if possible
_filters = []
for fltr in filters:
if hasattr(fltr, "resolve"):
fltr = fltr.resolve()[0]
_filters.append(fltr)
# return list solves https://github.com/pdfminer/pdfminer.six/issues/15
return list(zip(_filters, params))

resolved_filters = [resolve1(f) for f in filters]
resolved_params = [resolve1(param) for param in params]
return list(zip(resolved_filters, resolved_params))

def decode(self) -> None:
assert self.data is None and self.rawdata is not None, str(
Expand Down
Binary file added samples/contrib/issue_495_pdfobjref.pdf
Binary file not shown.
20 changes: 20 additions & 0 deletions tests/test_highlevel_extracttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,21 @@ def run_with_file(sample_path):
"Link to heading “that is” not working with vim-pandoc.\n\n"
"Subheading\n\nSome “more text”\n\n1\n\n\f",
"zen_of_python_corrupted.pdf": "Mai 30, 18 13:27\n\nzen_of_python.txt",
"contrib/issue_495_pdfobjref.pdf": "8\n\n7\n\n6\n\n5\n\n4\n\n3\n\n2\n\n1"
"\n\n150,00\n\n30,00\n\n(cid:72) 0,05 A\n\n0\n0\n,\n0\n2\n\n0\n0\n,\n8\n\n"
"(cid:69) 0,05\n\n0\n0\n,\n0\n5\n\nA\n\nF\n\nE\n\nD\n\n20,00\n\n16,00\n\n"
"+\n0,05\n15,00 - 0,00\n\nC\n\n0\n0\n,\n0\n4\n\n0\n0\n,\n0\n2\n\n"
"R 1 8 , 0 0\n\nM12x1.75 - 6H\n\n0\n0\n,\n5\n4\n\nB\n\nA\n\n"
"0\n0\n,\n6\n1\n(cid:142)\n\n0\n0\n,\n6\n1\n\n+\n0,50\n15,00 - 0,00\n\n"
"60,00 (cid:66)0,02\n\n100,00 (cid:66)0,05\n\n132,00\n\n"
"9\nH\n0\n1\n(cid:142)\n\n9\nH\n0\n1\n(cid:142)\n\n(cid:68) 0,1 A\n\n"
"+\n0,00\n70,00 - 0,02\n\n50,00\n\n(cid:76) 0,1\n\n(cid:76) 0,1\n\n"
"0\n0\n,\n5\n3\n\nF\n\nE\n\nD\n\nC\n\nB\n\nAllgemeintoleranzen\n\n"
"MATERIAL\n\nDIN ISO 2768 - mK\n\nPET BLACK\n\nFINISH\n\n"
"Eloxieren (natur)\n\nRa 1,6\n\nDate\n29.03.2021\n\n"
"Name\nLucas Giering\n\nDrawn\n\nChecked\n\nStandard\n\n"
"Arretierungshilfe\n\nA\n\n1 \n\nA2\n\n8\n\n7\n\n6\n\n5\n\n4\n\nState\n\n"
"Changes\n\nDate\n\nName\n\n3\n\n2\n\n1",
"contrib/issue_566_test_1.pdf": "ISSUE Date:2019-4-25 Buyer:黎荣",
"contrib/issue_566_test_2.pdf": "甲方:中国饮料有限公司(盖章)",
"contrib/issue-625-identity-cmap.pdf": "Termin płatności: 2021-05-03",
Expand Down Expand Up @@ -105,6 +120,11 @@ def test_zlib_corrupted(self):
expected = test_strings[test_file]
self.assertEqual(s[: len(expected)], expected)

def test_issue_495_pdfobjref_iterable(self):
test_file = "contrib/issue_495_pdfobjref.pdf"
s = run_with_file(test_file)
self.assertEqual(s.strip(), test_strings[test_file])

def test_issue_566_cmap_bytes(self):
test_file = "contrib/issue_566_test_1.pdf"
s = run_with_file(test_file)
Expand Down

0 comments on commit f428846

Please sign in to comment.