-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmerge_and_label_pdfs.py
executable file
·61 lines (48 loc) · 2.02 KB
/
merge_and_label_pdfs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python
#
# Merges a bunch of PDF files together, stamping a header of the file name and global page number.
# Useful when you've generated a bunch of individual PDFs of figures and need them all in one file.
import io
import sys
import argparse
import PyPDF2 as PDF
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib.units import inch
def make_pdf_of_text(text, for_page):
text_buf = make_pdfbytes_of_text(text, for_page.mediabox)
return PDF.PdfReader(text_buf).pages[0]
def make_pdfbytes_of_text(text, media_box):
buf = io.BytesIO()
c = canvas.Canvas(buf, pagesize=media_box)
# Put the text at the top center
c.drawCentredString((float(media_box.lower_right[0]) - float(media_box.lower_left[0]))/2,
float(media_box.upper_left[1]) - 12, text)
c.save()
buf.seek(0)
return buf
def main():
ap = argparse.ArgumentParser(description="Concatenate PDFs, labeling them with prettified versions of their filenames")
ap.add_argument('--out', '-o', default='out.pdf', help='Output filename')
ap.add_argument('--page-number-prefix', '-p', default='', help='Prefix for each page number (e.g. "S" for S1, S2, ...)')
ap.add_argument('pdfs', nargs='+', help='PDF files to concatenate, in order')
args = ap.parse_args()
output = PDF.PdfWriter()
out_page_num = 1
for fname in args.pdfs:
# We don't use a "with" block here because PyPDF4 needs each file to remain open until done writing output
f = open(fname, 'rb')
this_pdf = PDF.PdfReader(f)
this_num_pages = len(this_pdf.pages)
print(f"Working on {fname} ({this_num_pages} pages)...", file=sys.stderr)
for page_i in range(this_num_pages):
page = this_pdf.pages[page_i]
text_string = f"{fname.replace('_', ' ').replace('.pdf', '')} (p. {args.page_number_prefix}{out_page_num})"
page.merge_page(make_pdf_of_text(text_string, page))
output.add_page(page)
out_page_num += 1
print(f"Writing output to {args.out}.", file=sys.stderr)
with open(args.out, 'wb') as f:
output.write(f)
if __name__ == '__main__':
main()