Reference#

kitpdf#

PDFbox Package.

kitpdf.PDF_REDUCE_THRESHOLD = 2000000#

Reduce pdf for files bigger than 2MB

kitpdf.exif_rm_tags(file)[source]#

Removes tags with exiftool in pdf.

Parameters:

file (Path | str) –

kitpdf.exif_transform_date(data)[source]#

Convert a pdf date such as “D:20120321183444+07’00’” into a usable datetime.

https://www.verypdf.com/pdfinfoeditor/pdf-date-format.htm (D:YYYYMMDDHHmmSSOHH’mm’)

Examples

>>> from kitpdf import exif_transform_date
>>>
>>> exif_transform_date("D:20201002181301Z")
datetime.datetime(2020, 10, 2, 18, 13, 1, tzinfo=tzutc())
Parameters:

data (str | Object) – text to find match and convert.

Returns:

datetime.datetime or None if no match.

Return type:

datetime | str | Object

kitpdf.linearized(file)[source]#

Check if metadata Linearize if Yes.

Examples

>>> import datetime
>>> from kitpdf import linearized, PDFBOX_DATA_TESTS
>>>
>>> assert linearized(PDFBOX_DATA_TESTS / "BBVA.pdf") is False
Parameters:

file (Path | str) – file to check linearize metadata

Return type:

bool

kitpdf.metadata(file, slash=False)[source]#

Returns file metadata.

Examples

>>> import datetime
>>> from kitpdf import metadata, PDFBOX_DATA_TESTS
>>>
>>> meta = metadata(PDFBOX_DATA_TESTS / "BBVA.pdf")
>>> assert isinstance(meta["CreationDate"], datetime.datetime)
>>> assert meta["Author"] == "BBVA"
Parameters:
  • file (Path | str) – file to get metadata

  • slash (bool) – False default to remove start / and convert pikepdf.String to str.

Returns:

datetime.datetime or None if no match.

Return type:

dict[LiteralString | datetime | str | Object, LiteralString | datetime | str | Object]

kitpdf.pdf_diff(file1, file2)[source]#

Show diffs of two pdfs.

Parameters:
  • file1 (Path | str) – file 1

  • file2 (Path | str) – file 2

Returns:

True if equals

Return type:

list[bytes]

kitpdf.pdf_equal(file1, file2)[source]#

Checks if two pdfs files are visually equal.

Examples

>>> from kitpdf import pdf_equal, PDFBOX_DATA_TESTS
>>>
>>> assert pdf_equal(PDFBOX_DATA_TESTS / "ing1.pdf", PDFBOX_DATA_TESTS / "ing2.pdf") is True
>>> assert pdf_equal(PDFBOX_DATA_TESTS / "ing1.pdf", PDFBOX_DATA_TESTS / "ing3.pdf") is False
Parameters:
  • file1 (Path | str) – file 1

  • file2 (Path | str) – file 2

Returns:

True if equals

Return type:

bool

kitpdf.pdf_from_picture(file, picture, rm=True)[source]#

Creates pdf from image.

Parameters:
  • file (Path | str) – pdf file

  • picture (Path | str) – image file

  • rm (bool) – remove image file (default: True)

Return type:

Path

kitpdf.pdf_linearize(file)[source]#

Linearize pdf (overwrites original).

Parameters:

file (Path | str) –

Return type:

None

kitpdf.pdf_reduce(path, level='/ebook', threshold=2000000)[source]#

Compress pdf.

https://www.adobe.com/acrobat/hub/how-to-compress-pdf-in-linux.html

Examples

>>> import shutil
>>> from nodeps import Path
>>> from kitpdf import PDFBOX_DATA_TESTS
>>> from kitpdf import pdf_reduce
>>>
>>> original = PDFBOX_DATA_TESTS / "5.2M.pdf"
>>> backup = PDFBOX_DATA_TESTS / "5.2M-bk.pdf"
>>>
>>> shutil.copyfile(original, backup)  
Path('.../kitpdf/data/tests/5.2M-bk.pdf')
>>> original_size = original.stat().st_size
>>> pdf_reduce(original, level="/screen")
>>> reduced_size = original.stat().st_size
>>> assert original_size != reduced_size  
>>> shutil.move(backup, original)  
Path('.../kitpdf/data/tests/5.2M.pdf')
Parameters:
  • path (Path | str) – path to file

  • threshold (int | None) – limit in MB to reduce file size, None to reuce any pdf

  • level (Literal['/default', '/prepress', 'ebook', '/screen']) – /default is selected by the system, /prepress 300 dpi, ebook 150 dpi, screen 72 dpi

Returns:

None

Return type:

None

kitpdf.pdf_scan(file, directory=None)[source]#

Looks like scanned, linearize and sets tag color.

Examples

>>> from pathlib import Path
>>> from kitpdf import PDFBOX_DATA
>>> from kitpdf import PDFBOX_DATA_TESTS
>>> from kitpdf import SCAN_PREFIX
>>> from kitpdf import pdf_scan
>>>
>>> for f in Path(PDFBOX_DATA_TESTS).iterdir():
...     if f.is_file() and f.suffix == ".pdf":
...         assert f"generated/{SCAN_PREFIX}" in str(pdf_scan(f, PDFBOX_DATA_TESTS / "generated"))
Parameters:
  • file (Path) – path of file to be scanned

  • directory (Path | None) – destination directory (Default: file directory)

Returns:

Destination file

Return type:

Path

kitpdf.pdf_to_picture(source, dest='dir', dpi=300, fmt='png')[source]#

Creates a file with jpeg in the same directory from first page of pdf.

Examples

>>> from kitpdf import PDFBOX_DATA_TESTS
>>> from kitpdf import pdf_to_picture
>>>
>>> src = PDFBOX_DATA_TESTS / "BBVA.pdf"
>>>
>>> with pdf_to_picture(src, PDFBOX_DATA_TESTS / f"generated/BBVA-{putalpha_random.__name__}.png") as output:
...     assert output.exists()
...     assert output.suffix == ".png"
>>>
>>> with pdf_to_picture(src, "tmp") as temp:
...     assert temp.exists()
...     assert temp.suffix == ".png"
>>>
>>> with pdf_to_picture(src) as png:
...     assert png.exists()
...     assert png.suffix == ".png"
Parameters:
  • source (Path | AnyStr | PathLike[str] | PathLike[bytes] | IO) – Source pdf to converto to picture

  • dest (Path | AnyStr | PathLike[str] | PathLike[bytes] | IO | Literal['dir', 'tmp']) – Destination path, dir to use the same same path with different suffix or tmp for temp file

  • dpi (int) – dpi

  • fmt (Literal['jpeg', 'png']) – output jpeg or png

Returns:

Temp path with new image or destination

Return type:

Path

kitpdf.picture_paste(background, foreground, dest=None, putalpha=True, position=(0, 0), stamp=False)[source]#

Paste the foreground image on top of the background image.

Examples

>>> from kitpdf import PDFBOX_DATA_TESTS
>>> from kitpdf import picture_paste
>>>
>>> src = PDFBOX_DATA_TESTS / "BBVA.png"
>>> fo = PDFBOX_DATA_TESTS / "folded.png"
>>>
>>> with picture_paste(fo, src, PDFBOX_DATA_TESTS / f"generated/folded-BBVA-{picture_paste.__name__}.png") as o:
...     assert o.exists()
...     assert o.suffix == ".png"
>>>
>>> with picture_paste(fo, src) as temp:
...     assert temp.exists()
...     assert temp.suffix == ".png"
>>>
>>> src = PDFBOX_DATA_TESTS / "BioSalud Stamp Transparent.png"
>>> d = PDFBOX_DATA_TESTS / f"generated/BioSalud Stamp Transparent-{picture_paste.__name__}.png"
>>> with picture_paste(fo, src, d, position=(300, 420)) as o:
...     assert o.exists()
...     assert o.suffix == ".png"
>>>
>>> src = PDFBOX_DATA_TESTS / "generated/BBVA-white_alpha.png"
>>> d = PDFBOX_DATA_TESTS / f"generated/BBVA-white_alpha-{picture_paste.__name__}.png"
>>> with picture_paste(fo, src, d, position=(0, 210)) as o:
...     assert o.exists()
...     assert o.suffix == ".png"
>>>
Parameters:
  • background (Path | AnyStr | PathLike[str] | PathLike[bytes] | IO) – Background image

  • foreground (Path | AnyStr | PathLike[str] | PathLike[bytes] | IO) – Foreground image

  • dest (Path | AnyStr | PathLike[str] | PathLike[bytes] | IO) – None for temp path or dest

  • putalpha (bool) – Put alpha channel (transparency) to random value putalpha_random() before pasting

  • position (tuple[int, int] | tuple[int, int, int, int] | None) – position of foreground image, if position is (0,0) background is resized to same size as forground

  • stamp (bool) – True to stamp the foreground image

Return type:

Path

kitpdf.putalpha_random(source, dest=None, value=(0.62, 0.72))[source]#

Put alpha channel (transparency) to random value.

Examples

>>> from kitpdf import putalpha_random
>>> from kitpdf import PDFBOX_DATA_TESTS
>>>
>>> src = PDFBOX_DATA_TESTS / "BBVA.pdf"
>>> pic = PDFBOX_DATA_TESTS / f"generated/BBVA-{putalpha_random.__name__}.png"
>>> with (pdf_to_picture(src, dest=pic) as picture, putalpha_random(picture) as out):
...     assert out.exists()
...     assert out.suffix == ".png"
>>> with (putalpha_random(src) as temp):
...     assert temp.exists()
...     assert temp.suffix == ".png"
Parameters:
Returns:

Temp path with new image or destination

Return type:

Path

kitpdf.white_alpha(source, dest=None)[source]#

Make the white pixels transparent.

Examples

>>> from kitpdf import white_alpha
>>> from kitpdf import PDFBOX_DATA_TESTS
>>>
>>> src = PDFBOX_DATA_TESTS / "Biosalud Stamp.png"
>>> with white_alpha(src, PDFBOX_DATA_TESTS / f"generated/Biosalud Stamp-{white_alpha.__name__}.png", ) as out:
...     assert out.exists()
...     assert out.suffix == ".png"
>>> with white_alpha(src) as temp:
...     assert temp.exists()
...     assert temp.suffix == ".png"
>>>
>>> src = PDFBOX_DATA_TESTS / "BBVA.png"
>>> with white_alpha(src, PDFBOX_DATA_TESTS / f"generated/BBVA-{white_alpha.__name__}.png", ) as out:
...     assert out.exists()
...     assert out.suffix == ".png"
Parameters:
Returns:

path temp or dest

Return type:

Path