#!/usr/bin/perl -w use strict; use Pod::Usage; use PDF::Reuse; use Getopt::Std; use vars qw($opt_s $opt_j); my $VERSION = 0.2; ########### # Main program # &getopts("s:j:"); unless ( $ARGV[0] && -f $ARGV[0] ) { print "** You must specify a PDF document as first argument\n"; usage(); exit; } my $pdfinfile = $ARGV[0]; # # If a second arg. is present, it must be a digit being the first page number # my $first = 1; if ($ARGV[1] ) { unless ($ARGV[1] =~ m/\d+/) { print "** Second argument must be a page number\n"; usage(); exit; } $first = $ARGV[1]; } # # If a third arg. is present, it must be a digit being the last page number # my $last = $first; if ($ARGV[2] ) { unless ($ARGV[2] =~ m/\d+/) { print "** Second argument must be a page number\n"; usage(); exit; } $last = $ARGV[2]; unless ($last >= $first) { print "** Last page number must be equal to or greater than first page number\n"; usage(); exit; } } prDocDir("."); my $pg; my $jump = 0; # must be a digit if ($opt_s) { unless ($opt_s > 0) { print "** Split argument must be a number > 0\n"; usage(); exit; } } if ($opt_j) { unless ($opt_s) { print "** Jump can only be used with split option\n"; usage(); exit; } unless ($opt_j > 0) { print "** Jump argument must be a number > 0\n"; usage(); exit; } $jump = $opt_j; unless ($jump > 0 && $jump < $opt_s) { print "** Jump number must be smaller than split count\n"; usage(); exit; } } if ($opt_s) { # split output on separate files, one per $opt_s page for (my $pg = $first; $pg <= $last; $pg+=$opt_s) { last unless (dump_pages($pdfinfile, $pg+$jump, $pg+$opt_s-1)); } } else { dump_pages($pdfinfile, $first, $last); } 1; ####################### # # Subs # sub dump_pages { my ($pdfinfile, $first, $last) = @_; my $pdfoutfile = get_out_file_name ($pdfinfile, $first, $last); prFile($pdfoutfile); #print "dump_pages: $first, $last\n"; my $pages = prDoc ($pdfinfile, $first, $last); if ($pages ) { my $actlast = $first + $pages -1; prEnd(); if ($actlast != $last) { my $newpdfoutfile = get_out_file_name ($pdfinfile, $first, $actlast); `mv $pdfoutfile $newpdfoutfile`; $pdfoutfile = $newpdfoutfile; } print "$pages pages extracted from $pdfinfile and dumped to $pdfoutfile\n"; return $pages; } else { print "Page $first is not found in $pdfinfile\n"; unlink $pdfoutfile; return; } } sub usage { pod2usage(1); } sub get_out_file_name { my ($pdfinfile, $first, $last) = @_; my $pdfoutfile = $pdfinfile . "-pg"; $pdfoutfile .= $first if ($first); $pdfoutfile .= "_" . $last if ($last); $pdfoutfile .= ".pdf"; return $pdfoutfile; } ############### DOCS #################### =pod =head1 NAME pdfxtract.pl - PDF Page Extracter Utility =head1 SYNOPSIS pdfxtract.pl -sX -jY [pdffile] {firstpage} {lastpage} Mandatory pdffile: PDF file name to extract pages from When ran without page arguments, the first page is extracted. Optional: -sX : Split, e.g. save each X page to a separate page file, else save is done to one target file. -jY : Jump, skip Y pages before next file, Y=0 default firstpage: page no of first page, default is 1 lastpage : page no of last page, default is 1 or firstpage Run perldoc(1) on this file for additional documentation, or use a text viewer on the file and look look at the DOCS section =head1 PREREQUISITES This script requires the C module =head1 EXAMPLES Example: 'pdfxtract.pl 2005-08-01.pdf 2 5' will extract pages 2 to 5 to a single new file. 'pdfxtract.pl -s2 2005-08-01.pdf 3 10' will extract pages 3,4; 5,6; 7,8 and 9,10 to four separate new files, each consisting of two pages. 'pdfxtract.pl -s2 -j1 2005-08-01.pdf 3 10' will extract pages 2; 4; 6 and 8 to four separate new files, each consisting of one page being the last of the two pages specified by X. Tip: Use 'pdfinfo [pdffile]' to se overall PDF-file info. =head1 DESCRIPTION A utility to crop (extract) one or more pages from a PDF file and dump to separate file or files. =head1 AUTHOR Trond Haugen, C =head1 COPYRIGHT Copyright 2006 Trond Haugen. All rights reserved. This module is free software. You can redistribute it and/or modify it under the same terms as perl itself. =head1 HISTORY Revision 0.1, 2006-12-06: Intial =pod OSNAMES any =pod SCRIPT CATEGORIES CPAN/Administrative Fun/Educational =pod README A utility to crop (extract) one or more pages from a PDF file and dump to separate file or files. =cut