Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix TypeError in text-positioning operators Td and TD by safely parsing float #1000

Merged
merged 2 commits into from
Jul 9, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- `ValueError` when corrupt PDF specifies a negative xref location ([#980](http://github.com/pdfminer/pdfminer.six/pull/980))
- `ValueError` when corrupt PDF specifies an invalid mediabox ([#987](https://github.com/pdfminer/pdfminer.six/pull/987))
- `RecursionError` when corrupt PDF specifies a recursive /Pages object ([#998](https://github.com/pdfminer/pdfminer.six/pull/998))
- `TypeError` when corrupt PDF specifies text-positioning operators with invalid values ([#1000](https://github.com/pdfminer/pdfminer.six/pull/1000))

### Removed

7 changes: 7 additions & 0 deletions pdfminer/casting.py
Original file line number Diff line number Diff line change
@@ -6,3 +6,10 @@ def safe_int(o: Any) -> Optional[int]:
return int(o)
except (TypeError, ValueError):
return None


def safe_float(o: Any) -> Optional[float]:
try:
return float(o)
except (TypeError, ValueError):
return None
51 changes: 38 additions & 13 deletions pdfminer/pdfinterp.py
Original file line number Diff line number Diff line change
@@ -4,10 +4,11 @@
from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast

from pdfminer import settings
from pdfminer.casting import safe_float
from pdfminer.cmapdb import CMap, CMapBase, CMapDB
from pdfminer.pdfcolor import PREDEFINED_COLORSPACE, PDFColorSpace
from pdfminer.pdfdevice import PDFDevice, PDFTextSeq
from pdfminer.pdfexceptions import PDFException
from pdfminer.pdfexceptions import PDFException, PDFValueError
from pdfminer.pdffont import (
PDFCIDFont,
PDFFont,
@@ -791,20 +792,44 @@ def do_Ts(self, rise: PDFStackT) -> None:
self.textstate.rise = cast(float, rise)

def do_Td(self, tx: PDFStackT, ty: PDFStackT) -> None:
"""Move text position"""
tx = cast(float, tx)
ty = cast(float, ty)
(a, b, c, d, e, f) = self.textstate.matrix
self.textstate.matrix = (a, b, c, d, tx * a + ty * c + e, tx * b + ty * d + f)
"""Move to the start of the next line

Offset from the start of the current line by (tx , ty).
"""
tx_ = safe_float(tx)
ty_ = safe_float(ty)
if tx_ is not None and ty_ is not None:
(a, b, c, d, e, f) = self.textstate.matrix
e_new = tx_ * a + ty_ * c + e
f_new = tx_ * b + ty_ * d + f
self.textstate.matrix = (a, b, c, d, e_new, f_new)

elif settings.STRICT:
raise PDFValueError(f"Invalid offset ({tx!r}, {ty!r}) for Td")

self.textstate.linematrix = (0, 0)

def do_TD(self, tx: PDFStackT, ty: PDFStackT) -> None:
"""Move text position and set leading"""
tx = cast(float, tx)
ty = cast(float, ty)
(a, b, c, d, e, f) = self.textstate.matrix
self.textstate.matrix = (a, b, c, d, tx * a + ty * c + e, tx * b + ty * d + f)
self.textstate.leading = ty
"""Move to the start of the next line.

offset from the start of the current line by (tx , ty). As a side effect, this
operator sets the leading parameter in the text state.
"""
tx_ = safe_float(tx)
ty_ = safe_float(ty)

if tx_ is not None and ty_ is not None:
(a, b, c, d, e, f) = self.textstate.matrix
e_new = tx_ * a + ty_ * c + e
f_new = tx_ * b + ty_ * d + f
self.textstate.matrix = (a, b, c, d, e_new, f_new)

elif settings.STRICT:
raise PDFValueError("Invalid offset ({tx}, {ty}) for TD")

if ty_ is not None:
self.textstate.leading = ty_

self.textstate.linematrix = (0, 0)

def do_Tm(
@@ -961,7 +986,7 @@ def execute(self, streams: Sequence[object]) -> None:
except PSEOF:
# empty page
return
while 1:
while True:
try:
(_, obj) = parser.nextobject()
except PSEOF: