Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support TIFF predictor on image streams #18

Merged
merged 1 commit into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion playa/pdftypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from playa.ccitt import ccittfaxdecode
from playa.lzw import lzwdecode
from playa.runlength import rldecode
from playa.utils import apply_png_predictor
from playa.utils import apply_png_predictor, apply_tiff_predictor

if TYPE_CHECKING:
from playa.document import Document
Expand Down Expand Up @@ -450,6 +450,18 @@ def decode(self, strict: bool = False) -> None:
if pred == 1:
# no predictor
pass
elif pred == 2:
# TIFF predictor 2
colors = int_value(params.get("Colors", 1))
columns = int_value(params.get("Columns", 1))
raw_bits_per_component = params.get("BitsPerComponent", 8)
bitspercomponent = int_value(raw_bits_per_component)
data = apply_tiff_predictor(
colors,
columns,
bitspercomponent,
data,
)
elif pred >= 10:
# PNG predictor
colors = int_value(params.get("Colors", 1))
Expand Down
31 changes: 27 additions & 4 deletions playa/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import string
from typing import (
TYPE_CHECKING,
Any,
Iterable,
Iterator,
Expand All @@ -14,9 +13,6 @@

from playa.exceptions import PDFSyntaxError

if TYPE_CHECKING:
pass


def make_compat_bytes(in_str: str) -> bytes:
"""Converts to bytes, encoding to unicode."""
Expand Down Expand Up @@ -47,6 +43,33 @@ def paeth_predictor(left: int, above: int, upper_left: int) -> int:
return upper_left


def apply_tiff_predictor(
colors: int, columns: int, bitspercomponent: int, data: bytes
) -> bytes:
"""Reverse the effect of the TIFF predictor 2
Documentation: https://www.itu.int/itudoc/itu-t/com16/tiff-fx/docs/tiff6.pdf
(Section 14, page 64)
"""
if bitspercomponent != 8:
error_msg = f"Unsupported `bitspercomponent': {bitspercomponent}"
raise ValueError(error_msg)
bpp = colors * (bitspercomponent // 8)
nbytes = columns * bpp
buf: list[int] = []
for scanline_i in range(0, len(data), nbytes):
raw: list[int] = []
for i in range(nbytes):
new_value = data[scanline_i + i]
if i >= bpp:
new_value += raw[i - bpp]
new_value %= 256
raw.append(new_value)
buf.extend(raw)

return bytes(buf)


def apply_png_predictor(
pred: int,
colors: int,
Expand Down
Binary file added samples/contrib/test_pdf_with_tiff_predictor.pdf
Binary file not shown.
7 changes: 7 additions & 0 deletions tests/test_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,13 @@ def test_glyph_offsets() -> None:
glyph_y = dic["glyph_offset_y"]


def test_tiff_predictor() -> None:
with playa.open(TESTDIR / "contrib" / "test_pdf_with_tiff_predictor.pdf") as doc:
image = next(doc.pages[0].images)
# Decoded TIFF: 600 x 600 + a header
assert len(image.stream.buffer) == 360600


if __name__ == "__main__":
import logging

Expand Down