Source code for pdfcropper.cropper

#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Interpreter version: python 2.7
#
"""
Note:
    All sizes used in this module should be in millimeters.

API
---
"""
# Imports =====================================================================
from pyPdf import PdfFileWriter, PdfFileReader
from pyPdf.generic import FloatObject


# Variables ===================================================================
POINT_MM = 25.4 / 72.0  #: 1pt = inch/72, 1 inch = 25.4 mm


# Functions & objects =========================================================
[docs]def get_width_height(page): """ Return width and height of the `page`. Args: page (obj): ``PdfFileReader.pages`` instance. Returns: tuple: ``(width, height)`` as float, in millimeters. """ return ( float(page.mediaBox.getWidth()) * POINT_MM, float(page.mediaBox.getHeight()) * POINT_MM )
[docs]def crop_page(page, left, right, top, bottom): """ Crop `page` to size given by `left`, `right`, `top` and `bottom`. Args: page (obj): :mod:`pyPdf` PdfFileReader's page object. left (int): Cut X millimeters from left. right (int): Cut X millimeters from right. top (int): Cut X millimeters from top. bottom (int): Cut X millimeters from bottom. Warning: This functions modifies the `page` reference! Returns: obj: Modified page object. """ page.mediaBox.upperRight = ( page.mediaBox.getUpperRight_x() - FloatObject(right / POINT_MM), page.mediaBox.getUpperRight_y() - FloatObject(top / POINT_MM) ) page.mediaBox.lowerLeft = ( page.mediaBox.getLowerLeft_x() + FloatObject(left / POINT_MM), page.mediaBox.getLowerLeft_y() + FloatObject(bottom / POINT_MM) ) return page
[docs]def crop_all(pdf, left, right, top, bottom, remove=[]): """ Crop all pages in `pdf`. Remove pages specified by `remove`. Args: pdf (obj): :mod:`pyPdf` :class:`PdfFileReader` object. left (int): Cut X millimeters from left. right (int): Cut X millimeters from right. top (int): Cut X millimeters from top. bottom (int): Cut X millimeters from bottom. remove (list/tuple, default []): List of integers. As the function iterates thru the pages in `pdf`, indexes of the pages which matchs those in `remove` will be skipped. Returns: obj: :class:`PdfFileWriter` instance, with modified pages. """ out = PdfFileWriter() # crop pages for cnt, page in enumerate(pdf.pages): if cnt in remove: continue out.addPage( crop_page(page, left, right, top, bottom) ) return out
[docs]def crop_differently(pdf, even_vector, odd_vector, remove=[]): """ Crop `pdf` even pages by `even_vector` and odd pages by `odd_vector`. Remove pages specified by `remove`. Args: pdf (obj): :mod:`pyPdf` :class:`PdfFileReader` object. even_vector (list): List of coordinates to which all even pages will be cropped. ``[Left, Right, Top, Bottom]``. odd_vector (list): List of coordinates to which all odd pages will be cropped. ``[Left, Right, Top, Bottom]``. remove (list/tuple, default []): List of integers. As the function iterates thru the pages in `pdf`, indexes of the pages which matchs those in `remove` will be skipped. Returns: obj: :class:`PdfFileWriter` instance, with modified pages. """ out = PdfFileWriter() # crop pages for cnt, page in enumerate(pdf.pages): if cnt in remove: continue crop_vector = even_vector if (cnt % 2) == 0 else odd_vector out.addPage( crop_page(page, *crop_vector) ) return out
[docs]def remove_pages(pdf, remove): """ Remove pages specified in vector `remove`. Args: pdf (obj): :mod:`pyPdf` :class:`PdfFileReader` object. remove (list/tuple, default []): List of integers. As the function iterates thru the pages in `pdf`, indexes of the pages which matchs those in `remove` will be skipped. Returns: obj: :class:`PdfFileWriter` instance, with modified pages. """ out = PdfFileWriter() # crop pages for cnt, page in enumerate(pdf.pages): if cnt in remove: continue out.addPage(page) return out
[docs]def read_pdf(filename): """ Read pdf file specified by `filename`. Args: filename (str): Path to the pdf file. Returns: obj: :class:`PdfFileReader` object. """ return PdfFileReader( open(filename, 'rb') )
[docs]def save_pdf(filename, content): """ Save `content` to `filename`. Args: filename (str): Path which will be used for `content`. content (obj): :class:`PdfFileWriter` object which will be serialized. """ with file(filename, 'wb') as f: content.write(f)