################################################################################
#
# Example "peg_ini.pl".
#
################################################################################

use strict;
use warnings;

# These are defined by peg itself.
our ($Bin_dir, %Env, $Newline, %Peg_longopt, %Peg_S);

sub Warn {
    my $msg = join '', @_;
    print STDERR "peg_ini: $msg\n";
}

sub Die {
    Warn @_;
    exit(2);
}

################################################################################
#
# Define some 'long options':
#

# Find files matching a given PERLEXPR/ALIAS.
# eg% peg -find /foo/
#
$Peg_longopt{find} = sub {
    my $argv_ref = shift;
    @$argv_ref or die "expected EXTENSION or /PATTERN/ argument";
    my $p_arg = shift @$argv_ref;
    if ($p_arg =~ /^[\w\.\-]{2,}/) {
	$p_arg = "m," . quotemeta($p_arg) . "\[^/]*\$,i";
    }
    Warn "-find magic: $p_arg";
    unshift @$argv_ref, '-Y,p', '+1', '-dlnp', $p_arg;
};

# Pipe output thro a pager.
#
$Peg_longopt{pager} = $Peg_longopt{more} = $Peg_longopt{less} = sub {
    -t STDOUT or Warn("cannot run pager as STDOUT is not attached to a terminal"), return;
    my $argv_ref = shift;
    unshift @$argv_ref, '-##';
    $! = 0;
    open(PAGER_OUT, '|-', "less -mR") && !$!
	or die "unable to pipe STDOUT via less\n";
    *STDOUT = \*PAGER_OUT;
    *STDERR = \*PAGER_OUT;
};

# Option to comment out -pager on cmdline
#
$Peg_longopt{pagerx} = $Peg_longopt{morex} = $Peg_longopt{lessx} = sub {};

# Option to open files matche by the last run of peg in your editor.
# eg% peg -edit 22
#
$Peg_longopt{edit} = sub {
    my $argv_ref = shift;
    @$argv_ref or die "expected NUM... arguments";
    my @matches = last_matches(1);
    my $editor = $ENV{EDITOR} or die "EDITOR is not set";
    my %done;
    foreach my $n (@$argv_ref) {
	if ($n =~ /^(\d+)-(\d+)$/) { # RANGE
	    push @$argv_ref, $1..$2;
	    next;
	}
	$n =~ /^-?\d+$/ or die "bad integer: $n";
	$n = 1 if $n == 0;
	$n += 1 + @matches if $n < 0; # -1 => N
	$n = @matches if $n > @matches;
	next if $done{$n}++;
	my $file = $matches[$n-1];
	my $size = -s $file;
	$file =~ s|/|\\|g if ($^O eq 'MSWin32');
	if ($size > 10_000_000) {
	    Warn "file $n too large $file: $size";
	    next;
	}
	print "= $file\n";
	system "\"$editor\" \"$file\"";
    }
    exit;
};

# Determine total file size of cwd or the given directory.
#
$Peg_longopt{dirsize} = sub {
    my $argv_ref = shift;
    Warn "dirsize!";
    unshift @$argv_ref, (
	'-dPP', q[ $Z += -s $_; return; ],
	'-PPPP', q[
	    # PEG_NO_FORK
	    if ($Z > 1024*1024) {
		print +int($Z / (1024*1024)), " Mb";
	    } elsif ($Z > 1024) {
		print +int($Z / 1024), " Kb";
	    } else {
		print "$Z b";
	    }
	],
	'die("should not see this")',
    );
};

# Get full #if context.
# eg% peg -ifdef WHATEVER foobar.h
#
$Peg_longopt{ifdef} = sub {
    my $argv_ref = shift;
    # Turn on both context matchers, but don't match.
    # We then set the #ifdef context into $Context_line2 using -P code.
    unshift @$argv_ref, "-z", "+0", "-zz", "+0";
    $Env{PEG_CONTEXT_FORMAT2} = '$_';
    $Env{PEG_Z_INDEPENDENT} = 1;
    unshift @$argv_ref, "-P", <<'EOT';
	# PEG_NEWLINE_NEUTRAL
	# NB. some compilers allow whitespace preceding
	#  the '#' in preprocessor lines.
	if (/^\s*\#/) {
	    my $new_cxt = 1;
	    if (/^\s*\#\s*if(n?def)?\b/) {
		push @cxt, [$_, $.];
	    }
	    elsif (/^\s*\#\s*elif\b/) {
		$cxt[$#cxt] = [$_, $.];
	    }
	    elsif (/^(\s*\#\s*else)\b/) {
		my $else_line = $1;
		if (@cxt) {
		    my $if_line = $cxt[$#cxt]->[0];
		    if ($if_line !~ /^\s*\#\s*elif/) {
			$if_line =~ s/[\n\r\t ]+\z//;
			$else_line = "$else_line  /* $if_line */$Newline";
		    } else {
			$else_line = $_;
		    }
		    $cxt[$#cxt] = [$else_line, $.];
		} else {
		    # Found a #else before seeing a #if !
		    $new_cxt = 0;
		}
	    }
	    elsif (/^\s*\#\s*endif\b/) {
		pop @cxt;
	    }
	    else {
		$new_cxt = 0;
	    }
	    if ($new_cxt) {
		if (@cxt) {
		    $Context_line2 = '';
		    for (@cxt) { # trim trailing whitespace, and use native newline
			$_->[0] =~ s/[ \t\r\n]+\z//;
			$_->[0] .= $Newline;
		    }
		    # Minimize padding to ensure #'s aligned.
		    my $max_lineno_len = 1;
		    foreach my $cxt (@cxt) {
			my (undef, $lineno) = @$cxt;
			my $len = length $lineno;
			$max_lineno_len = $len if $len > $max_lineno_len;
		    }
		    foreach my $cxt (@cxt) {
			my ($line, $lineno) = @$cxt;
			my $pad = ' ' x (1 + $max_lineno_len - length($lineno));
			$line =~ s/^\s+//;
			$Context_line2 .= "#### ($lineno)$pad$line";
		    }
		} elsif ($Printed_Context_line2) {
		    $Context_line2 = "#### *none*$Newline";
		} else {
		    $Context_line2 = undef;
		}
		if (defined $Printed_Context_line2 and defined $Context_line2
			and $Context_line2 eq $Printed_Context_line2) {
		    # Ensure we don't reprint the same context eg.
		    # #if CXT
		    # ...match1
		    # #if SOMETHINGELSE
		    # #endif
		    # ...match2          // do not repeat CXT
		    #
		    $Context_line2 = undef;
		}
	    }
	}

EOT
};

# Option to open a file in the "vim" editor.
# eg% peg -vim 22
#
$Peg_longopt{vim} = sub {
    my $argv_ref = shift;
    my $n = shift @$argv_ref or die;
    my @matches = last_matches();
    $n = @matches if $n > @matches;
    my $file = $matches[$n-1];
    system "vim \"$file\"";
    exit;
};

# Option to ignore files within the specified directory.
# eg% peg -idir CVS whatever
#
$Peg_longopt{idir} = sub {
    my $argv_ref = shift;
    my $dir_name = quotemeta shift @$argv_ref or die;
    unshift @$argv_ref, "-p", qq{ \$File !~ m:(^|/)$dir_name/: };
};

# Process backslashed lines as one.
#
$Peg_longopt{bsl} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref, '-P' => <<'EOT';
	# PEG_SAFE_BEFORE_CONTEXT
	if (defined $orign) { $. = 1 + $orign; $orign = undef }
	if (/\\$/) { $startn = $. unless defined $l; $l .= $_; next }
	if (defined $l) { $_ = $l . $_; $orign = $.; $. = $startn; $l = undef }
EOT
};

# Ignore Perl comments.
#
$Peg_longopt{ipc} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref, '-P' => <<'EOT';
	next if /^\#/;
	next if /^=[a-z]/ .. /^=cut/; # POD can start with head1/item/pod etc.
	last if /^__(?:END|DATA)__/;
	s/(?<!\\)\#.*$//; # strip Perl comments from search string
EOT
};

# Ignore C comments
#
$Peg_longopt{icc} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref, '-P' => <<'EOT';
	s|/\*.*?\*/||g; # /* ... */
	s|/\*.*$||;     # /* ...
	s|//.*$||;      # // ...
	s|^\s*\*.*$||;  # * ...
EOT
};

# Work on files given as git revisions eg. "perl-5.6.0:sv.c".
#
$Peg_longopt{git} = sub {
    my $argv_ref = shift;
    unshift @$argv_ref, '-S';
    $::Peg_S{'*'} = sub {
	my ($file, $fullpath) = @_;
	my $cmd = "git show \"$file\"";
	Warn "running $cmd" if $::Verbose;
	$! = 0;
	open(my $fh, "$cmd |") && !$!
	    or (Warn "error running $cmd: $!"), return;
	S($fh, $fullpath);
	return 1;
    };
};

################################################################################
#
# General peg configuration variables:
#

# This is the key to getting good performance for "-r" on Win32:
my $qfind = $Bin_dir . "qfind.exe";
$Env{PEG_R_CMD} = $qfind if -f $qfind;
$Env{PEG_R_FORK} = 1;

# This looks good on a black background:
$Env{PEG_COLOR} = 'f=dg,c=dy,l=dc,b=dm,n=dw,m=dr,z=wob,y=dyor';

# Default options:
$Env{PEG_OPTIONS} = '-JJJss#+_';

#$Env{PEG_OPTIONS} .= ' -p "$File !~ m:(^|/)\.git/:" '; # ignore ".git" directories

################################################################################
#
# Define some -p ALIASes:
#

$Env{PEG_P_C} = '/\.(?:c|cpp|h|hpp|xs)$/i';
$Env{PEG_P_P} = '/\.(?:pm|pl|t)$/i';

################################################################################
#
# Define some -z ALIASes:
#

# C functions/struct/template/#define context.
#
$Env{PEG_Z_C} = <<'EOT';
# PEG_FAST_Z_CONTEXT
	(
	    # A multi line #define. Only valid while lines are \'d.
	    (/^\#\s*define\s+\w+.*\\$/ and $::Multi_line_define = 1) # context
		or
	    (($::Multi_line_define and (/\\$/
		? undef # still in mld
		: ($::Multi_line_define == 2
		    ? ($::Multi_line_define = $Context_line = undef) # beyond mld
		    : ($::Multi_line_define = 2))) # last line of mld
	    ) and 0) # not context
	)
    or
	(
	    # Functions.
	    /^\w[\w\s\*\&:~]*\(/ # (1) looks like a c/C++ function
		and
	    not /^(?:if|for|switch|while)\b/ # (2) and isn't a statement
		and
	    (
		$::L = $_,
		$::L =~ s/\/\*.*?\*\/|\/[\*\/].*//g, # remove comments
		$::L !~ /[!^%;\"]/ # (3) and isn't a expression/statement
	    )
	)
    or
	# An unnamed "typedef struct".
	(/^typedef\s+struct\s*(?:\{[^\}]*)?$/ and do {{
	    # Read forward to find the struct name!
	    # Do the entire file in one pass.
	    unless ($::Last_file eq $File) {
		$::Last_file = $File;
		%::Typedef_struct = ();
		my $start_pos = tell(F);
		my $start_line = $.;
		my $typedef_struct_line = $.;
		my $inside = 1;
		while (<F>) {
		    if ($inside) {
			if (/^\}\s+(\w+)/) {
			    $::Typedef_struct{$typedef_struct_line} = $1;
			    $inside = undef;
			}
		    } elsif (/^typedef\s+struct\s*(?:\{[^\}]*)?$/) {
			$typedef_struct_line = $.;
			$inside = 1;
		    }
		}
		# Restore IO position.
		$. = $start_line;
		seek F, $start_pos, 0
		    or die "PEG_Z_C: cannot seek back in $File: $!\n";
	    }
	    my $found;
	    if (exists $::Typedef_struct{$.}) {
		$_ = "typedef struct " . $::Typedef_struct{$.} . " {" . $Newline;
		$found = 1;
	    }
	    $found;
	}})
    or
	(/^(?:typedef\s+struct|struct|template)\s+\w+/ and not /[,;\)]/)
    or
	(/^class\s+\w+\s*$/)
    or
	(/^\}/ and $Context_line = undef) # outside function/typedef scope
EOT

$Env{PEG_Z_P} = '/^(?:\s*sub\s+\w|=head|__(?:END|DATA)__)/';

$Env{PEG_Z_T} = '/^\s*(?:proc|namespace)\b/';

################################################################################
#
# -S code.
#
# Relies on the availability of the following external programs:
#   tar, unzip, gzip & pdftotext.
#

%Peg_S = (
    'pdf'     => \&process_pdf,
    '*gz'     => \&process_gz,
    '*tar'    => \&process_tar,
    '*tar.gz' => \&process_targz,
    '*zip'    => \&process_zip,
);

# The routines below do 'quick' scans _unless_ the -pp option is specified,
#  in which case each file within each archive is individually processed.

sub process_tar {
    return process_tar_slow(@_) if pp();
    Warn "use -pp /./ to search each file within the tar file"
	    unless $::Done_process_archive_warning++;
    return process_tar_fast(@_);

} # process_tar


sub process_targz {
    return process_targz_slow(@_) if pp();
    Warn "use -pp /./ to search each file within the tar.gz file"
	    unless $::Done_process_archive_warning++;
    return process_targz_fast(@_);

} # process_targz


sub process_zip {
    return process_zip_slow(@_) if pp();
    Warn "use -pp /./ to search each file within the zip file"
	    unless $::Done_process_archive_warning++;
    return process_zip_fast(@_);

} # process_zip


sub process_tar_slow {
    my ($file, $fullpath) = @_;
    my $cmd = "tar -tf \"$file\"";
    Warn "running $cmd" if $::Verbose;
    my @filelist = `$cmd`;
    if ($?
	    # Heuristic - seen "tar -tf" give correct results AND error code!
	    and @filelist < 3
    ) {
	Warn "failed to get file list from $fullpath: $?\n", @filelist;
	return 0; # signal to process the file as usual
    }
    foreach my $f (@filelist) {
	$f =~ s/\015?\012\z//;
	next if $f =~ m|/$|; # skip directory names
	next unless pp($f);
	$cmd = qq(tar -xOf "$file" "$f");
	Warn "running $cmd" if $::Verbose;
	open(my $fh, "$cmd|")
	    or Die "can't extract $f from $fullpath: $!";
	S($fh, "$fullpath # $f", 1);
	close $fh;
    }
    return 1;

} # process_tar_slow


sub process_tar_fast {
    my ($file, $fullpath) = @_;
    my $cmd = "tar -xOf \"$file\"";
    my $fh;
    Warn "running $cmd" if $::Verbose;
    if (!open($fh, "$cmd|")) {
	Warn "can't extract $fullpath: $!";
	return 0;
    }
    S($fh, $fullpath);
    close $fh;
    return 1;

} # process_tar_fast


# Process the contents of a .tar.gz file by file.
sub process_targz_slow {
    require File::Temp;
    my ($file, $fullpath) = @_;
    my ($fh, $tempfile) = File::Temp::tempfile("peg-targz-XXXXX",
	SUFFIX => '.tar', UNLINK => 1);
    close $fh;
    my $cmd = qq(gzip -dc "$file" > "$tempfile");
    Warn "running $cmd" if $::Verbose;
    system $cmd and Die "error: $cmd: $?";
    process_tar_slow($tempfile, $fullpath);
    unlink $tempfile;
    return 1;

} # process_targz_slow


# Process the contents of a .tar.gz as one entity.
sub process_targz_fast {
    my ($file, $fullpath) = @_;
    my $cmd = qq(gzip -dc "$file" | tar -xOf -);
    Warn "running $cmd" if $::Verbose;
    my $fh;
    if (!open($fh, "$cmd|")) {
	Warn "can't extract $fullpath: $!";
	return 0;
    }
    S($fh, $fullpath);
    close $fh;
    return 1;

} # process_targz_fast


# Process each individual file within a ".zip" file.
sub process_zip_slow {
    my ($file, $fullpath) = @_;
    my $cmd = "unzip -Z1 \"$file\" 2>&1";
    Warn "running $cmd" if $::Verbose;
    my @filelist = `$cmd`;
    if ($?) {
	Warn "unzip failed with $fullpath: $?\n", @filelist;
	return 0; # signal to process the file as usual
    }
    Warn "zip contains @{[ scalar @filelist ]} files" if $::Verbose;
    foreach my $f (@filelist) {
	$f =~ s/\015?\012\z//;
	next unless pp($f);
	my $cmd = qq(unzip -p "$file" "$f");
	Warn "running $cmd" if $::Verbose;
	open(my $fh, "$cmd|")
	    or Die "can't extract $f from $fullpath: $!";
	S($fh, "$fullpath # $f", 1);
	close $fh;
    }
    return 1;

} # process_zip_slow


# Process the entire contents inside a ".zip" file as one.
sub process_zip_fast {
    my ($file, $fullpath) = @_;
    my $cmd = qq(unzip -p "$file");
    Warn "running $cmd" if $::Verbose;
    open(my $fh, "$cmd|")
	or Die "can't unzip $fullpath: $!";
    S($fh, $fullpath);
    close $fh;
    return 1;

} # process_zip_fast


sub process_gz {
    my ($file, $fullpath) = @_;
    my $cmd = qq(gzip -dc "$file");
    Warn "running $cmd" if $::Verbose;
    open(my $fh, "$cmd|")
	or Die "error: $cmd: $!";
    S($fh, $fullpath);
    close $fh;
    return 1;

} # process_gz


sub process_pdf {
    require File::Temp;
    my ($file, $fullpath) = @_;
    my ($fh, $tempfile) = File::Temp::tempfile("peg-pdf-XXXXX",
	SUFFIX => '.pdf', UNLINK => 1);
    close $fh;
    my $cmd = "pdftotext \"$file\" $tempfile";
    Warn "running $cmd" if $::Verbose;
    system $cmd;
    if ($?) {
	Warn "pdftotext failed: $?";
	unlink $tempfile;
	return 0;
    }
    unless (open($fh, "<", $tempfile)) {
	Warn "could not open $tempfile: $!";
	unlink $tempfile;
	return 0;
    }
    S($fh, $fullpath);
    close $fh;
    unlink $tempfile;
    return 1;

} # process_pdf

################################################################################

# Look for a ".peg_ini.pl" file in parent directories if one not in cwd.
unless (-e ".peg_ini.pl") {
    foreach my $dir (qw( .. ../.. )) {
	my $f = "$dir/.peg_ini.pl";
	if (-e $f) {
	    require $f;
	    last;
	}
    }
}

1;