diff --git a/CHANGELOG.md b/CHANGELOG.md
index 71bb1615..ad4282d5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,7 +8,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
### Added
- Support for zipped jpeg's ([#938](https://github.com/pdfminer/pdfminer.six/pull/938))
-
- Fuzzing harnesses for integration into Google's OSS-Fuzz ([949](https://github.com/pdfminer/pdfminer.six/pull/949))
- Support for setuptools-git-versioning version 2.0.0 ([#957](https://github.com/pdfminer/pdfminer.six/pull/957))
@@ -21,6 +20,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Reading cmap's with whitespace in the name ([#935](https://github.com/pdfminer/pdfminer.six/pull/935))
- Optimize `apply_png_predictor` by using lists ([#912](https://github.com/pdfminer/pdfminer.six/pull/912))
+### Changed
+
+- Updated Python 3.7 syntax to 3.8 ([#956](https://github.com/pdfminer/pdfminer.six/pull/956))
+
## [20231228]
### Removed
diff --git a/pdfminer/converter.py b/pdfminer/converter.py
index 3a390d49..9b90a769 100644
--- a/pdfminer/converter.py
+++ b/pdfminer/converter.py
@@ -458,9 +458,7 @@ def write_header(self) -> None:
return
def write_footer(self) -> None:
- page_links = [
- '{} '.format(i, i) for i in range(1, self.pageno)
- ]
+ page_links = [f'{i} ' for i in range(1, self.pageno)]
s = '
Page: %s
\n' % ", ".join(
page_links
)
@@ -784,7 +782,9 @@ def render(item: LTItem) -> None:
)
self.write(s)
elif isinstance(item, LTFigure):
- s = '\n' % (item.name, bbox2str(item.bbox))
+ s = '\n'.format(
+ item.name, bbox2str(item.bbox)
+ )
self.write(s)
for child in item:
render(child)
@@ -975,7 +975,7 @@ def render(item: LTItem) -> None:
self.write("\n")
elif isinstance(item, LTTextLine):
self.write(
- "" % ((self.bbox_repr(item.bbox)))
+ "" % (self.bbox_repr(item.bbox))
)
for child_line in item:
render(child_line)
diff --git a/pdfminer/fontmetrics.py b/pdfminer/fontmetrics.py
index 54b2c5a9..72038a10 100644
--- a/pdfminer/fontmetrics.py
+++ b/pdfminer/fontmetrics.py
@@ -36,7 +36,7 @@ def convert_font_metrics(path: str) -> None:
See below for the output.
"""
fonts = {}
- with open(path, "r") as fileinput:
+ with open(path) as fileinput:
for line in fileinput.readlines():
f = line.strip().split(" ")
if not f:
@@ -66,7 +66,7 @@ def convert_font_metrics(path: str) -> None:
print("# -*- python -*-")
print("FONT_METRICS = {")
for (fontname, (props, chars)) in fonts.items():
- print(" {!r}: {!r},".format(fontname, (props, chars)))
+ print(f" {fontname!r}: {(props, chars)!r},")
print("}")
diff --git a/pdfminer/glyphlist.py b/pdfminer/glyphlist.py
index 9d4eb908..9e5135f3 100644
--- a/pdfminer/glyphlist.py
+++ b/pdfminer/glyphlist.py
@@ -58,7 +58,7 @@ def convert_glyphlist(path: str) -> None:
See output below.
"""
state = 0
- with open(path, "r") as fileinput:
+ with open(path) as fileinput:
for line in fileinput.readlines():
line = line.strip()
if not line or line.startswith("#"):
diff --git a/pdfminer/layout.py b/pdfminer/layout.py
index ebaac1e2..e706f6e1 100644
--- a/pdfminer/layout.py
+++ b/pdfminer/layout.py
@@ -130,7 +130,7 @@ class LTText:
"""Interface for things that have text"""
def __repr__(self) -> str:
- return "<%s %r>" % (self.__class__.__name__, self.get_text())
+ return "<{} {!r}>".format(self.__class__.__name__, self.get_text())
def get_text(self) -> str:
"""Text contained in this object"""
@@ -145,7 +145,7 @@ def __init__(self, bbox: Rect) -> None:
self.set_bbox(bbox)
def __repr__(self) -> str:
- return "<%s %s>" % (self.__class__.__name__, bbox2str(self.bbox))
+ return "<{} {}>".format(self.__class__.__name__, bbox2str(self.bbox))
# Disable comparison.
def __lt__(self, _: object) -> bool:
@@ -331,7 +331,7 @@ def __init__(self, name: str, stream: PDFStream, bbox: Rect) -> None:
self.colorspace = [self.colorspace]
def __repr__(self) -> str:
- return "<%s(%s) %s %r>" % (
+ return "<{}({}) {} {!r}>".format(
self.__class__.__name__,
self.name,
bbox2str(self.bbox),
@@ -411,7 +411,7 @@ def __init__(
return
def __repr__(self) -> str:
- return "<%s %s matrix=%s font=%r adv=%s text=%r>" % (
+ return "<{} {} matrix={} font={!r} adv={} text={!r}>".format(
self.__class__.__name__,
bbox2str(self.bbox),
matrix2str(self.matrix),
@@ -504,7 +504,7 @@ def __init__(self, word_margin: float) -> None:
return
def __repr__(self) -> str:
- return "<%s %s %r>" % (
+ return "<{} {} {!r}>".format(
self.__class__.__name__,
bbox2str(self.bbox),
self.get_text(),
@@ -675,7 +675,7 @@ def __init__(self) -> None:
return
def __repr__(self) -> str:
- return "<%s(%s) %s %r>" % (
+ return "<{}({}) {} {!r}>".format(
self.__class__.__name__,
self.index,
bbox2str(self.bbox),
@@ -1008,7 +1008,7 @@ def __init__(self, name: str, bbox: Rect, matrix: Matrix) -> None:
return
def __repr__(self) -> str:
- return "<%s(%s) %s matrix=%s>" % (
+ return "<{}({}) {} matrix={}>".format(
self.__class__.__name__,
self.name,
bbox2str(self.bbox),
@@ -1036,7 +1036,7 @@ def __init__(self, pageid: int, bbox: Rect, rotate: float = 0) -> None:
return
def __repr__(self) -> str:
- return "<%s(%r) %s rotate=%r>" % (
+ return "<{}({!r}) {} rotate={!r}>".format(
self.__class__.__name__,
self.pageid,
bbox2str(self.bbox),
diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py
index 075585fa..a3564909 100644
--- a/pdfminer/pdfdevice.py
+++ b/pdfminer/pdfdevice.py
@@ -292,11 +292,11 @@ def begin_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None
if isinstance(props, dict):
s = "".join(
[
- ' {}="{}"'.format(utils.enc(k), utils.make_compat_str(v))
+ f' {utils.enc(k)}="{utils.make_compat_str(v)}"'
for (k, v) in sorted(props.items())
]
)
- out_s = "<{}{}>".format(utils.enc(cast(str, tag.name)), s)
+ out_s = f"<{utils.enc(cast(str, tag.name))}{s}>"
self._write(out_s)
self._stack.append(tag)
return
diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py
index e9f91ad9..6898759f 100644
--- a/pdfminer/pdfdocument.py
+++ b/pdfminer/pdfdocument.py
@@ -157,12 +157,12 @@ def load(self, parser: PDFParser) -> None:
break
f = line.split(b" ")
if len(f) != 2:
- error_msg = "Trailer not found: {!r}: line={!r}".format(parser, line)
+ error_msg = f"Trailer not found: {parser!r}: line={line!r}"
raise PDFNoValidXRef(error_msg)
try:
(start, nobjs) = map(int, f)
except ValueError:
- error_msg = "Invalid line: {!r}: line={!r}".format(parser, line)
+ error_msg = f"Invalid line: {parser!r}: line={line!r}"
raise PDFNoValidXRef(error_msg)
for objid in range(start, start + nobjs):
try:
@@ -829,7 +829,7 @@ def _getobj_parse(self, pos: int, objid: int) -> object:
objid1 = x[-2]
# #### end hack around malformed pdf files
if objid1 != objid:
- raise PDFSyntaxError("objid mismatch: {!r}={!r}".format(objid1, objid))
+ raise PDFSyntaxError(f"objid mismatch: {objid1!r}={objid!r}")
if kwd != KWD(b"obj"):
raise PDFSyntaxError("Invalid object spec: offset=%r" % pos)
diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py
index b521cd52..a32b55e4 100644
--- a/pdfminer/pdffont.py
+++ b/pdfminer/pdffont.py
@@ -1063,7 +1063,7 @@ def __init__(
cid_ordering = resolve1(self.cidsysteminfo.get("Ordering", b"unknown")).decode(
"latin1"
)
- self.cidcoding = "{}-{}".format(cid_registry.strip(), cid_ordering.strip())
+ self.cidcoding = f"{cid_registry.strip()}-{cid_ordering.strip()}"
self.cmap: CMapBase = self.get_cmap_from_spec(spec, strict)
try:
diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py
index 79b351a6..3ff2c144 100644
--- a/pdfminer/pdfinterp.py
+++ b/pdfminer/pdfinterp.py
@@ -320,7 +320,7 @@ def do_keyword(self, pos: int, token: PSKeyword) -> None:
try:
(_, objs) = self.end_type("inline")
if len(objs) % 2 != 0:
- error_msg = "Invalid dictionary construct: {!r}".format(objs)
+ error_msg = f"Invalid dictionary construct: {objs!r}"
raise PSTypeError(error_msg)
d = {literal_name(k): v for (k, v) in choplist(2, objs)}
(pos, data) = self.get_inline_data(pos + len(b"ID "))
diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py
index 635c5a66..a2dced55 100644
--- a/pdfminer/pdftypes.py
+++ b/pdfminer/pdftypes.py
@@ -1,7 +1,7 @@
import io
import logging
-import sys
import zlib
+from typing import Protocol
from typing import (
TYPE_CHECKING,
Any,
@@ -42,25 +42,17 @@
LITERALS_JPX_DECODE = (LIT("JPXDecode"),)
-if sys.version_info >= (3, 8):
- from typing import Protocol
+class DecipherCallable(Protocol):
+ """Fully typed a decipher callback, with optional parameter."""
- class DecipherCallable(Protocol):
- """Fully typed a decipher callback, with optional parameter."""
-
- def __call__(
- self,
- objid: int,
- genno: int,
- data: bytes,
- attrs: Optional[Dict[str, Any]] = None,
- ) -> bytes:
- raise NotImplementedError
-
-else: # Fallback for older Python
- from typing import Callable
-
- DecipherCallable = Callable[..., bytes]
+ def __call__(
+ self,
+ objid: int,
+ genno: int,
+ data: bytes,
+ attrs: Optional[Dict[str, Any]] = None,
+ ) -> bytes:
+ raise NotImplementedError
class PDFObject(PSObject):
@@ -319,7 +311,7 @@ def decode(self) -> None:
except zlib.error as e:
if settings.STRICT:
- error_msg = "Invalid zlib bytes: {!r}, {!r}".format(e, data)
+ error_msg = f"Invalid zlib bytes: {e!r}, {data!r}"
raise PDFException(error_msg)
try:
diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py
index cbaba002..36172c7a 100755
--- a/pdfminer/psparser.py
+++ b/pdfminer/psparser.py
@@ -120,7 +120,7 @@ def intern(self, name: PSLiteral.NameType) -> _SymbolT:
def literal_name(x: object) -> Any:
if not isinstance(x, PSLiteral):
if settings.STRICT:
- raise PSTypeError("Literal required: {!r}".format(x))
+ raise PSTypeError(f"Literal required: {x!r}")
else:
name = x
else:
@@ -580,7 +580,7 @@ def start_type(self, pos: int, type: str) -> None:
def end_type(self, type: str) -> Tuple[int, List[PSStackType[ExtraT]]]:
if self.curtype != type:
- raise PSTypeError("Type mismatch: {!r} != {!r}".format(self.curtype, type))
+ raise PSTypeError(f"Type mismatch: {self.curtype!r} != {type!r}")
objs = [obj for (_, obj) in self.curstack]
(pos, self.curtype, self.curstack) = self.context.pop()
log.debug("end_type: pos=%r, type=%r, objs=%r", pos, type, objs)
diff --git a/pdfminer/utils.py b/pdfminer/utils.py
index 93bcd449..fae1f643 100644
--- a/pdfminer/utils.py
+++ b/pdfminer/utils.py
@@ -41,7 +41,7 @@
AnyIO = Union[TextIO, BinaryIO]
-class open_filename(object):
+class open_filename:
"""
Context manager that allows opening a filename
(str or pathlib.PurePath type is supported) and closes it on exit,
@@ -91,7 +91,7 @@ def shorten_str(s: str, size: int) -> str:
return s[:size]
if len(s) > size:
length = (size - 5) // 2
- return "{} ... {}".format(s[:length], s[-length:])
+ return f"{s[:length]} ... {s[-length:]}"
else:
return s
@@ -645,12 +645,12 @@ def enc(x: str) -> str:
def bbox2str(bbox: Rect) -> str:
(x0, y0, x1, y1) = bbox
- return "{:.3f},{:.3f},{:.3f},{:.3f}".format(x0, y0, x1, y1)
+ return f"{x0:.3f},{y0:.3f},{x1:.3f},{y1:.3f}"
def matrix2str(m: Matrix) -> str:
(a, b, c, d, e, f) = m
- return "[{:.2f},{:.2f},{:.2f},{:.2f}, ({:.2f},{:.2f})]".format(a, b, c, d, e, f)
+ return f"[{a:.2f},{b:.2f},{c:.2f},{d:.2f}, ({e:.2f},{f:.2f})]"
def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point:
diff --git a/setup.py b/setup.py
index 96595f0f..d50bc033 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
from setuptools import setup
root_dir = Path(__file__).parent
-with open(root_dir / "README.md", "rt") as f:
+with open(root_dir / "README.md") as f:
readme = f.read()
extras_require = {
diff --git a/tests/test_pdfdocument.py b/tests/test_pdfdocument.py
index e2643cd7..594e7abe 100644
--- a/tests/test_pdfdocument.py
+++ b/tests/test_pdfdocument.py
@@ -9,7 +9,7 @@
from tests.helpers import absolute_sample_path
-class TestPdfDocument(object):
+class TestPdfDocument:
def test_get_zero_objid_raises_pdfobjectnotfound(self):
with open(absolute_sample_path("simple1.pdf"), "rb") as in_file:
parser = PDFParser(in_file)
diff --git a/tests/test_pdfpage.py b/tests/test_pdfpage.py
index c3fe86c2..a99a2f47 100644
--- a/tests/test_pdfpage.py
+++ b/tests/test_pdfpage.py
@@ -4,7 +4,7 @@
from tests.helpers import absolute_sample_path
-class TestPdfPage(object):
+class TestPdfPage:
def test_page_labels(self):
path = absolute_sample_path("contrib/pagelabels.pdf")
expected_labels = ["iii", "iv", "1", "2", "1"]
diff --git a/tests/test_tools_dumppdf.py b/tests/test_tools_dumppdf.py
index 971c3d07..735556e2 100644
--- a/tests/test_tools_dumppdf.py
+++ b/tests/test_tools_dumppdf.py
@@ -11,9 +11,9 @@ def run(filename, options=None):
absolute_path = absolute_sample_path(filename)
with TemporaryFilePath() as output_file_name:
if options:
- s = "dumppdf -o %s %s %s" % (output_file_name, options, absolute_path)
+ s = "dumppdf -o {} {} {}".format(output_file_name, options, absolute_path)
else:
- s = "dumppdf -o %s %s" % (output_file_name, absolute_path)
+ s = "dumppdf -o {} {}".format(output_file_name, absolute_path)
dumppdf.main(s.split(" ")[1:])
diff --git a/tests/test_tools_pdf2txt.py b/tests/test_tools_pdf2txt.py
index ccbe55f9..a6e0ee1a 100644
--- a/tests/test_tools_pdf2txt.py
+++ b/tests/test_tools_pdf2txt.py
@@ -12,9 +12,9 @@ def run(sample_path, options=None):
absolute_path = absolute_sample_path(sample_path)
with TemporaryFilePath() as output_file_name:
if options:
- s = "pdf2txt -o{} {} {}".format(output_file_name, options, absolute_path)
+ s = f"pdf2txt -o{output_file_name} {options} {absolute_path}"
else:
- s = "pdf2txt -o{} {}".format(output_file_name, absolute_path)
+ s = f"pdf2txt -o{output_file_name} {absolute_path}"
pdf2txt.main(s.split(" ")[1:])
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 160b02b4..af37ce9a 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -71,7 +71,7 @@ def given_plane_with_one_object(object_size=50, gridsize=50):
return plane, obj
-class TestFunctions(object):
+class TestFunctions:
def test_shorten_str(self):
s = shorten_str("Hello there World", 15)
assert s == "Hello ... World"
diff --git a/tools/conv_afm.py b/tools/conv_afm.py
index f666ee18..2d663cc1 100755
--- a/tools/conv_afm.py
+++ b/tools/conv_afm.py
@@ -36,7 +36,7 @@ def main(argv):
print("# -*- python -*-")
print("FONT_METRICS = {")
for (fontname, (props, chars)) in fonts.items():
- print(" {!r}: {!r},".format(fontname, (props, chars)))
+ print(f" {fontname!r}: {(props, chars)!r},")
print("}")
return 0
diff --git a/tools/dumppdf.py b/tools/dumppdf.py
index 6e19e275..b870b400 100755
--- a/tools/dumppdf.py
+++ b/tools/dumppdf.py
@@ -196,7 +196,7 @@ def resolve_dest(dest: object) -> Any:
dest = resolve_dest(action["D"])
pageno = pages[dest[0].objid]
s = escape(title)
- outfp.write('\n'.format(level, s))
+ outfp.write(f'\n')
if dest is not None:
outfp.write("")
dumpxml(outfp, dest)
@@ -310,7 +310,7 @@ def create_parser() -> ArgumentParser:
"--version",
"-v",
action="version",
- version="pdfminer.six v{}".format(pdfminer.__version__),
+ version=f"pdfminer.six v{pdfminer.__version__}",
)
parser.add_argument(
"--debug",
diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py
index fe158593..1aaee573 100755
--- a/tools/pdf2txt.py
+++ b/tools/pdf2txt.py
@@ -22,7 +22,7 @@ def float_or_disabled(x: str) -> Optional[float]:
try:
return float(x)
except ValueError:
- raise argparse.ArgumentTypeError("invalid float value: {}".format(x))
+ raise argparse.ArgumentTypeError(f"invalid float value: {x}")
def extract_text(
@@ -41,7 +41,7 @@ def extract_text(
output_dir: Optional[str] = None,
debug: bool = False,
disable_caching: bool = False,
- **kwargs: Any
+ **kwargs: Any,
) -> AnyIO:
if not files:
raise PDFValueError("Must provide files to work upon!")
@@ -78,7 +78,7 @@ def create_parser() -> argparse.ArgumentParser:
"--version",
"-v",
action="version",
- version="pdfminer.six v{}".format(pdfminer.__version__),
+ version=f"pdfminer.six v{pdfminer.__version__}",
)
parser.add_argument(
"--debug",