You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
49 lines
1.8 KiB
49 lines
1.8 KiB
3 years ago
|
#!/usr/bin/env python3
|
||
|
""" Extract pages from a PDF. """
|
||
|
|
||
|
import click
|
||
3 years ago
|
from pikepdf import Pdf
|
||
3 years ago
|
|
||
|
from asl_rulebook2.utils import parse_page_numbers
|
||
|
|
||
|
# ---------------------------------------------------------------------
|
||
|
|
||
|
@click.command()
|
||
|
@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) )
|
||
3 years ago
|
@click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False), help="Output PDF file." )
|
||
3 years ago
|
@click.option( "--pages","-p", help="Page(s) to dump (e.g. 2,5,9-15)." )
|
||
|
def main( pdf_file, output_fname, pages ):
|
||
|
"""Extract pages from a PDF."""
|
||
|
|
||
|
# NOTE: This extracts pages from the eASLRB, so we can work on specific parts of it without having to load
|
||
|
# the entire document each time. In particular, it maintains the internal PDF strucuture of each page.
|
||
3 years ago
|
# The files are not as small as you might expect (e.g. extracting a single page results in a file only
|
||
|
# about half the size), but processing them is significantly faster.
|
||
3 years ago
|
|
||
|
# process the command-line arguments
|
||
|
pages = parse_page_numbers( pages, offset=-1 )
|
||
|
|
||
|
print( "Loading PDF:", pdf_file )
|
||
|
with Pdf.open( pdf_file ) as pdf:
|
||
|
|
||
|
# delete the TOC
|
||
|
print( "Removing the TOC..." )
|
||
|
with pdf.open_outline() as outline:
|
||
|
while outline.root:
|
||
|
del outline.root[-1]
|
||
|
|
||
|
# extract the specified pages
|
||
3 years ago
|
print( "Extracting pages:", ", ".join( str(1+p) for p in sorted(pages) ) )
|
||
3 years ago
|
for page_no in range( len(pdf.pages)-1, -1, -1 ):
|
||
|
if page_no not in pages:
|
||
|
del pdf.pages[ page_no ]
|
||
|
|
||
|
# save the new PDF
|
||
|
print( "Saving file:", output_fname )
|
||
|
pdf.save( output_fname )
|
||
|
|
||
|
# ---------------------------------------------------------------------
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main() #pylint: disable=no-value-for-parameter
|