diff --git a/playa/pdftypes.py b/playa/pdftypes.py index ac220663..49b3ec25 100644 --- a/playa/pdftypes.py +++ b/playa/pdftypes.py @@ -22,7 +22,7 @@ from playa.ccitt import ccittfaxdecode from playa.lzw import lzwdecode from playa.runlength import rldecode -from playa.utils import apply_png_predictor +from playa.utils import apply_png_predictor, apply_tiff_predictor if TYPE_CHECKING: from playa.document import Document @@ -450,6 +450,18 @@ def decode(self, strict: bool = False) -> None: if pred == 1: # no predictor pass + elif pred == 2: + # TIFF predictor 2 + colors = int_value(params.get("Colors", 1)) + columns = int_value(params.get("Columns", 1)) + raw_bits_per_component = params.get("BitsPerComponent", 8) + bitspercomponent = int_value(raw_bits_per_component) + data = apply_tiff_predictor( + colors, + columns, + bitspercomponent, + data, + ) elif pred >= 10: # PNG predictor colors = int_value(params.get("Colors", 1)) diff --git a/playa/utils.py b/playa/utils.py index e8eaa3d3..c292bdbc 100644 --- a/playa/utils.py +++ b/playa/utils.py @@ -2,7 +2,6 @@ import string from typing import ( - TYPE_CHECKING, Any, Iterable, Iterator, @@ -14,9 +13,6 @@ from playa.exceptions import PDFSyntaxError -if TYPE_CHECKING: - pass - def make_compat_bytes(in_str: str) -> bytes: """Converts to bytes, encoding to unicode.""" @@ -47,6 +43,33 @@ def paeth_predictor(left: int, above: int, upper_left: int) -> int: return upper_left +def apply_tiff_predictor( + colors: int, columns: int, bitspercomponent: int, data: bytes +) -> bytes: + """Reverse the effect of the TIFF predictor 2 + + Documentation: https://www.itu.int/itudoc/itu-t/com16/tiff-fx/docs/tiff6.pdf + (Section 14, page 64) + """ + if bitspercomponent != 8: + error_msg = f"Unsupported `bitspercomponent': {bitspercomponent}" + raise ValueError(error_msg) + bpp = colors * (bitspercomponent // 8) + nbytes = columns * bpp + buf: list[int] = [] + for scanline_i in range(0, len(data), nbytes): + raw: list[int] = [] + for i in range(nbytes): + new_value = data[scanline_i + i] + if i >= bpp: + new_value += raw[i - bpp] + new_value %= 256 + raw.append(new_value) + buf.extend(raw) + + return bytes(buf) + + def apply_png_predictor( pred: int, colors: int, diff --git a/samples/contrib/test_pdf_with_tiff_predictor.pdf b/samples/contrib/test_pdf_with_tiff_predictor.pdf new file mode 100644 index 00000000..80afd2fe Binary files /dev/null and b/samples/contrib/test_pdf_with_tiff_predictor.pdf differ diff --git a/tests/test_open.py b/tests/test_open.py index aa8864f2..cb40166a 100644 --- a/tests/test_open.py +++ b/tests/test_open.py @@ -170,6 +170,13 @@ def test_glyph_offsets() -> None: glyph_y = dic["glyph_offset_y"] +def test_tiff_predictor() -> None: + with playa.open(TESTDIR / "contrib" / "test_pdf_with_tiff_predictor.pdf") as doc: + image = next(doc.pages[0].images) + # Decoded TIFF: 600 x 600 + a header + assert len(image.stream.buffer) == 360600 + + if __name__ == "__main__": import logging