Reference#
kitpdf#
PDFbox Package.
- kitpdf.PDF_REDUCE_THRESHOLD = 2000000#
Reduce pdf for files bigger than 2MB
- kitpdf.exif_rm_tags(file)[source]#
Removes tags with exiftool in pdf.
- Parameters:
file (Path | str) –
- kitpdf.exif_transform_date(data)[source]#
Convert a pdf date such as “D:20120321183444+07’00’” into a usable datetime.
https://www.verypdf.com/pdfinfoeditor/pdf-date-format.htm (D:YYYYMMDDHHmmSSOHH’mm’)
Examples
>>> from kitpdf import exif_transform_date >>> >>> exif_transform_date("D:20201002181301Z") datetime.datetime(2020, 10, 2, 18, 13, 1, tzinfo=tzutc())
- kitpdf.linearized(file)[source]#
Check if metadata Linearize if Yes.
Examples
>>> import datetime >>> from kitpdf import linearized, PDFBOX_DATA_TESTS >>> >>> assert linearized(PDFBOX_DATA_TESTS / "BBVA.pdf") is False
- kitpdf.metadata(file, slash=False)[source]#
Returns file metadata.
Examples
>>> import datetime >>> from kitpdf import metadata, PDFBOX_DATA_TESTS >>> >>> meta = metadata(PDFBOX_DATA_TESTS / "BBVA.pdf") >>> assert isinstance(meta["CreationDate"], datetime.datetime) >>> assert meta["Author"] == "BBVA"
- Parameters:
- Returns:
datetime.datetime or None if no match.
- Return type:
dict[LiteralString | datetime | str | Object, LiteralString | datetime | str | Object]
- kitpdf.pdf_equal(file1, file2)[source]#
Checks if two pdfs files are visually equal.
Examples
>>> from kitpdf import pdf_equal, PDFBOX_DATA_TESTS >>> >>> assert pdf_equal(PDFBOX_DATA_TESTS / "ing1.pdf", PDFBOX_DATA_TESTS / "ing2.pdf") is True >>> assert pdf_equal(PDFBOX_DATA_TESTS / "ing1.pdf", PDFBOX_DATA_TESTS / "ing3.pdf") is False
- kitpdf.pdf_linearize(file)[source]#
Linearize pdf (overwrites original).
- Parameters:
file (Path | str) –
- Return type:
None
- kitpdf.pdf_reduce(path, level='/ebook', threshold=2000000)[source]#
Compress pdf.
https://www.adobe.com/acrobat/hub/how-to-compress-pdf-in-linux.html
Examples
>>> import shutil >>> from nodeps import Path >>> from kitpdf import PDFBOX_DATA_TESTS >>> from kitpdf import pdf_reduce >>> >>> original = PDFBOX_DATA_TESTS / "5.2M.pdf" >>> backup = PDFBOX_DATA_TESTS / "5.2M-bk.pdf" >>> >>> shutil.copyfile(original, backup) Path('.../kitpdf/data/tests/5.2M-bk.pdf') >>> original_size = original.stat().st_size >>> pdf_reduce(original, level="/screen") >>> reduced_size = original.stat().st_size >>> assert original_size != reduced_size >>> shutil.move(backup, original) Path('.../kitpdf/data/tests/5.2M.pdf')
- Parameters:
- Returns:
None
- Return type:
None
- kitpdf.pdf_scan(file, directory=None)[source]#
Looks like scanned, linearize and sets tag color.
Examples
>>> from pathlib import Path >>> from kitpdf import PDFBOX_DATA >>> from kitpdf import PDFBOX_DATA_TESTS >>> from kitpdf import SCAN_PREFIX >>> from kitpdf import pdf_scan >>> >>> for f in Path(PDFBOX_DATA_TESTS).iterdir(): ... if f.is_file() and f.suffix == ".pdf": ... assert f"generated/{SCAN_PREFIX}" in str(pdf_scan(f, PDFBOX_DATA_TESTS / "generated"))
- Parameters:
file (Path) – path of file to be scanned
directory (Path | None) – destination directory (Default: file directory)
- Returns:
Destination file
- Return type:
Path
- kitpdf.pdf_to_picture(source, dest='dir', dpi=300, fmt='png')[source]#
Creates a file with jpeg in the same directory from first page of pdf.
Examples
>>> from kitpdf import PDFBOX_DATA_TESTS >>> from kitpdf import pdf_to_picture >>> >>> src = PDFBOX_DATA_TESTS / "BBVA.pdf" >>> >>> with pdf_to_picture(src, PDFBOX_DATA_TESTS / f"generated/BBVA-{putalpha_random.__name__}.png") as output: ... assert output.exists() ... assert output.suffix == ".png" >>> >>> with pdf_to_picture(src, "tmp") as temp: ... assert temp.exists() ... assert temp.suffix == ".png" >>> >>> with pdf_to_picture(src) as png: ... assert png.exists() ... assert png.suffix == ".png"
- Parameters:
source (Path | AnyStr | PathLike[str] | PathLike[bytes] | IO) – Source pdf to converto to picture
dest (Path | AnyStr | PathLike[str] | PathLike[bytes] | IO | Literal['dir', 'tmp']) – Destination path, dir to use the same same path with different suffix or tmp for temp file
dpi (int) – dpi
fmt (Literal['jpeg', 'png']) – output jpeg or png
- Returns:
Temp path with new image or destination
- Return type:
Path
- kitpdf.picture_paste(background, foreground, dest=None, putalpha=True, position=(0, 0), stamp=False)[source]#
Paste the foreground image on top of the background image.
Examples
>>> from kitpdf import PDFBOX_DATA_TESTS >>> from kitpdf import picture_paste >>> >>> src = PDFBOX_DATA_TESTS / "BBVA.png" >>> fo = PDFBOX_DATA_TESTS / "folded.png" >>> >>> with picture_paste(fo, src, PDFBOX_DATA_TESTS / f"generated/folded-BBVA-{picture_paste.__name__}.png") as o: ... assert o.exists() ... assert o.suffix == ".png" >>> >>> with picture_paste(fo, src) as temp: ... assert temp.exists() ... assert temp.suffix == ".png" >>> >>> src = PDFBOX_DATA_TESTS / "BioSalud Stamp Transparent.png" >>> d = PDFBOX_DATA_TESTS / f"generated/BioSalud Stamp Transparent-{picture_paste.__name__}.png" >>> with picture_paste(fo, src, d, position=(300, 420)) as o: ... assert o.exists() ... assert o.suffix == ".png" >>> >>> src = PDFBOX_DATA_TESTS / "generated/BBVA-white_alpha.png" >>> d = PDFBOX_DATA_TESTS / f"generated/BBVA-white_alpha-{picture_paste.__name__}.png" >>> with picture_paste(fo, src, d, position=(0, 210)) as o: ... assert o.exists() ... assert o.suffix == ".png" >>>
- Parameters:
background (Path | AnyStr | PathLike[str] | PathLike[bytes] | IO) – Background image
foreground (Path | AnyStr | PathLike[str] | PathLike[bytes] | IO) – Foreground image
dest (Path | AnyStr | PathLike[str] | PathLike[bytes] | IO) – None for temp path or dest
putalpha (bool) – Put alpha channel (transparency) to random value putalpha_random() before pasting
position (tuple[int, int] | tuple[int, int, int, int] | None) – position of foreground image, if position is (0,0) background is resized to same size as forground
stamp (bool) – True to stamp the foreground image
- Return type:
Path
- kitpdf.putalpha_random(source, dest=None, value=(0.62, 0.72))[source]#
Put alpha channel (transparency) to random value.
Examples
>>> from kitpdf import putalpha_random >>> from kitpdf import PDFBOX_DATA_TESTS >>> >>> src = PDFBOX_DATA_TESTS / "BBVA.pdf" >>> pic = PDFBOX_DATA_TESTS / f"generated/BBVA-{putalpha_random.__name__}.png" >>> with (pdf_to_picture(src, dest=pic) as picture, putalpha_random(picture) as out): ... assert out.exists() ... assert out.suffix == ".png" >>> with (putalpha_random(src) as temp): ... assert temp.exists() ... assert temp.suffix == ".png"
- Parameters:
- Returns:
Temp path with new image or destination
- Return type:
Path
- kitpdf.white_alpha(source, dest=None)[source]#
Make the white pixels transparent.
Examples
>>> from kitpdf import white_alpha >>> from kitpdf import PDFBOX_DATA_TESTS >>> >>> src = PDFBOX_DATA_TESTS / "Biosalud Stamp.png" >>> with white_alpha(src, PDFBOX_DATA_TESTS / f"generated/Biosalud Stamp-{white_alpha.__name__}.png", ) as out: ... assert out.exists() ... assert out.suffix == ".png" >>> with white_alpha(src) as temp: ... assert temp.exists() ... assert temp.suffix == ".png" >>> >>> src = PDFBOX_DATA_TESTS / "BBVA.png" >>> with white_alpha(src, PDFBOX_DATA_TESTS / f"generated/BBVA-{white_alpha.__name__}.png", ) as out: ... assert out.exists() ... assert out.suffix == ".png"