#!/usr/bin/perl -w

# Ken Suzuki <ken.suzuki@oeaw.ac.at>
# 1.2.2009    created    v0.1
# 25.12.2011  modified for CPAN upload v0.2

=head1 NAME
    
LatexReferenceCheck
    
=head1 STATUS
    
version 0.2
    
=head1 AUTHOR
    
Ken Suzuki<ken.suzuki@oeaw.ac.at>
    
=head1 DESCRIPTION
    
Perform trivial editorial checks of papers from a tex source code.
It checks 
.) if there's any citation in the abstract
.) if all figures/tables are referred
.) if either PACS code or keyword is given
.) if all references are cited
.) if reference appears in sequential order

=head1 USAGE
LatexReferenceCheck [options] texfilename

  [options]
   -v : verbose mode
   -h : print usage()

=pod SCRIPT CATEGORIES
    
Unix/System_administration
CPAN/Administrative
Educational
    
=cut
    
#initialization----------------------------------------------
$begin_abstract = $end_abstract = $begin_bib = $end_bib = 0;
$begin_figures = $end_figures = $labels = 0;
$citation_in_abstract = -999;
$is_citation_order_okay = -999;
$is_bibtex = 0;
$is_any_figure_unlabeled = 0;
$is_all_figure_referred = 0;
$nfigures = 0;
$verbose = 0;
#------------------------------------------------------------

#$filename = $ARGV[0];
for ($i=0; $i<=$#ARGV; $i++){
    
    if ($ARGV[$i] =~ m/^-v$/){
        $verbose = 1;
    }elsif ($ARGV[$i] =~ m/^-h$/){
        &usage();
#    }elsif ($ARGV[$i] =~ m/^-o$/){
#        $outfile = $ARGV[$i+1];
    }else{
	$filename = $ARGV[$i];
#       print("invalid option: $ARGV[$i]\n");
#       &usage();
    }
}

sub usage(){
    
    print "
LatexReferenceCheck [options] filename:    perform basic checks of latex
                                             references, e.x.
                                              .if all figures are referred
                                              .if all references are cited
                                              .if citations are in right order

 options
  -v                   verbose mode
  -h                   print out usage (this page)

";
  exit(1);
}
#  -o    <filename>     filename to which the result is written. If not specified, the result
#                       goes to STDOUT.


die "Please specify filename to be inspected\n" unless (defined $filename) ;
print "Checking tex file: $filename\n";
open(TEXFILEINPUT, "$filename") or die "Cannot open file: $filename\n";

if (defined $outfile){
    open(TEXCHECKRESULT, ">$outfile") or die "Cannot open file: $outfile\n";
    select TEXCHECKRESULT;
}

$nline = 0;
@data = ();
while(<TEXFILEINPUT>){
#    s/[^[:ascii:]]+//g;  # get rid of non-ASCII characters    
    $line = $_;
    chop $line;
    @words = ();
    unless ($line =~ /^%/){
	@words = split(" ", $line);
	for($i=0; $i<=$#words; $i++){
	    $words[$i] =~ s/\+//g;  # get rid of plus, the special character of perl RE
	    if ($words[$i] =~ /^%.*/){
		splice (@words, $i, $#words);  # cut comment like %this
	    }
	}
	push(@data, @words);
	$nline++;
    }
}
if ($verbose){ print "$nline lines $#data words scanned\n"; }

if ($verbose){ print "Making markers... \n"; }
for ($i=0; $i<$#data; $i++){
    if ($data[$i] =~ /^\\begin{abstract}/){ $begin_abstract = $i; }
    if ($data[$i] =~ /^\\end{abstract}/){ $end_abstract = $i; }
    if ($data[$i] =~ /^\\begin{thebibliography}/){ $begin_bib = $i; }
    if ($data[$i] =~ /^\\end{thebibliography}/){ $end_bib = $i; }
    if ($data[$i] =~ /^\\begin{figure\*?}/){ push(@begin_figures, $i); }
    if ($data[$i] =~ /\\end{figure\*?}/){ push(@end_figures, $i); }
    if ($data[$i] =~ /^\\label{.*}/){ push(@labels, $i); }
}
if ($begin_bib == 0){
    $is_bibtex = 1;
    $begin_bib = $end_bib = $#data;
}
$nfigures = $#begin_figures+1;
if ($verbose){ print " done\n\n"; }

@figures = ();
@figure_labels = (); # array of figure labels. length #figures
if ($verbose){ print "Creating figure database...\n"; }
for ($i=0; $i<$nfigures; $i++){
    @a_figure = ();
    for ($j=$begin_figures[$i]; $j<=$end_figures[$i]; $j++){
	push(@a_figure, $data[$j]);
	if ($data[$j] =~ /\\label{(.*?)}/){ # ? minimum matching
	    $label = $1;
	    $figure_labels[$i] = $label;
	}
    }
    $figures[$i] =[ @a_figure ];
}
if ($verbose){ print " done\n"; }
if ($verbose){ print $nfigures," figures are found\n"; }
for ($i=0; $i<$nfigures; $i++){
    if (defined $figure_labels[$i]){
	if ($verbose){ print "Figure[$i] key=$figure_labels[$i]\n"; }
    }else{
	if ($verbose){ print "Figure[$i] key= (Not Defined). Reference checking will not work properly.\n"; }
	$is_any_figure_unlabeled++;
    }
}
if ($verbose){ print "\n"; }

#@tables = ();

@references = ();
if ($verbose){ print "Creating reference database...\n"; }
for ($i=$end_abstract+1; $i<$begin_bib; $i++){
    if ($data[$i] =~ /\\ref{(.*?)}/){ # ? minimum matching
	$reference_candidate = $1;
	@reference_candidates = split(",", $reference_candidate);
	push(@references, @reference_candidates);
	for($ii=0; $ii<=$#reference_candidates; $ii++){
	    if ($verbose){ print "   $reference_candidates[$ii]\n";}
	}
    }
}
if ($verbose){ print " done\n"; }
$buf = $#references+1;
if ($verbose){ print "$buf references are found\n\n"; }

@bibliography = ();
if ($verbose){ print "Creating bibliography database...\n"; }
for ($i=$begin_bib; $i<$end_bib; $i++){
    if ($data[$i] =~ /\\bibitem{(.*)}/){
	push(@bibliography, $1);
	if ($verbose){ print "   Bibliography[$#bibliography] key=$1\n"}
    }
}
if ($verbose){ print " done\n"; }
$buf = $#bibliography+1;
if ($verbose){ print "$buf bibliography are found\n"; }
if ($is_bibtex){
    if ($verbose){ print "This document possibly uses bibtex. This may cause finding 0 bibliography.\n"; }
}
if ($verbose){ print "\n"; }


$citation_in_abstract = 0;
if ($verbose){ print "Checking if there\'s citation in the abstract... \n"; }
for ($i=$begin_abstract; $i<$end_abstract; $i++){
    if ($data[$i] =~ /\\cite/){
	$citation_in_abstract++;
    }
}
if ($verbose){ print "done\n"; }
if ($verbose){ print "$citation_in_abstract citation(s) in abstract found\n\n"; }


$is_all_figure_referred = 0;
if ($verbose){ print "Checking if all figures are referred in the text\n"; }
for ($i=0; $i<$nfigures; $i++){
    if ($verbose){ print "Figure[$i] "; }
    $is_figure_referred = 0;
    for ($j=0; $j<=$#references; $j++){
	if (defined $figure_labels[$i]){
	    if ($references[$j] eq $figure_labels[$i]){
		$is_figure_referred++;
	    }
	}
    }
    if ($is_figure_referred>0){
	if ($verbose){ print " okay\n"; }
    }else{
	$is_all_figure_referred++;
	if ($verbose){ print " not okay\n"; }
    }
}
if ($is_all_figure_referred == 0){
    if ($verbose){ print "All figures are referred\n"; }
}else{
    if ($verbose){ print "Not all figures are referred\n"; }
}
if ($verbose){ print "\n"; }


if ($verbose){ print "Searching the first reference position of figures \n"; }
for ($j=0; $j<$nfigures; $j++){
    if ($verbose){ print "Figure [$j] is referred at .. "; }
    for ($i=$begin_bib; $i>$end_abstract+1; $i--){
	if (defined $figure_labels[$j]){
	    if ($data[$i] =~ /\\ref{$figure_labels[$j]}/){
		$reference_figures[$j] = $i;
		if ($verbose){ print " $i "; }
	    }
	}
    }
    if (defined $reference_figures[$j]){
	if ($verbose){ print ".  ==> ($reference_figures[$j]) \n"; }
    }else{
	if ($verbose){ print ".  ==> (n.A.) \n"; }
    }
}
if ($verbose){ print " done\n\n"; }


#@citations = ();
#print "Creating citation database...\n";
#for ($i=$end_abstract+1; $i<$begin_bib; $i++){
#    if ($data[$i] =~ /\\cite{(.*?)}/){ # ? minimum matching
#	$citation_candidate = $1;
#	@citation_candidates = split(",", $citation_candidate);
#	push(@citations, @citation_candidates);
	
#	for($ii=0; $ii<=$#citation_candidates; $ii++){
#	    if ($verbose){ print "   $citation_candidates[$ii]\n";}
#	}
#    }
#}
#print " done\n";
#$buf = $#citations+1;
#print "$buf citations are found\n\n";


$citation_infigure_captions = 0;
if ($verbose){ print "Checking if there\'s citation in the figure captions\n"; }
for ($idx=0; $idx<$nfigures; $idx++){
    if ($verbose){ print " in the figure $idx ...."; }
    $citation_infigure_caption = 0;
    for($i=$begin_figures[$idx]; $i<$end_figures[$idx]; $i++){
	if ($data[$i] =~ /\\cite{(.*?)}/){
	    $citation_infigure_caption++;
	    $citation_infigure_captions++;
	}
    }
    if ($citation_infigure_caption>0) {
	if ($verbose){ print "  found\n"; }
    }else{
	if ($verbose){ print "  not found\n"; }
    }
}
if ($verbose){ print "\n"; }


if ($verbose){ print "Relocating figure/table captions at the first referenced position in the main text\n"; }
#very first, remove unreferenced figure informatino from array @reference_figure
@reference_figures_copy = @reference_figures;
$remove_offset = 0;
for($i=0; $i<$nfigures; $i++){
    unless (defined $reference_figures_copy[$i]){
	splice (@reference_figures, $i-$remove_offset, 1);
	splice (@figures, $i-$remove_offset, 1);
	if ($verbose){ print "Figure[$i] entry removed from \@figures, \@reference_figure\n"; }
	$remove_offset++;
    }
}

#first remove from original place (overwrite with null)
for ($i=0; $i<=$#reference_figures; $i++){
    for ($j=$begin_figures[$i]; $j<=$end_figures[$i]; $j++){
	$data[$j] = "";
    }
#    print "ahoaho @{$figures[$i]} \n\n";
}
#split whole data into slices at the first reference positions
@data_slices = ();
if (defined $reference_figures[0]){
    @data_slice = @data[$end_abstract+1 .. $reference_figures[0]];
    push(@data_slices, [ @data_slice ] );
    for ($i=0; $i<$#figures; $i++){
	@data_slice = @data[$reference_figures[$i]+1 .. $reference_figures[$i+1]];
	push(@data_slices, [ @data_slice ] );
    }
    @data_slice = @data[$reference_figures[$#figures]+1 .. $begin_bib-1];
    push(@data_slices, [ @data_slice ] );
#recombine data slices
    @new_data = ();
    for ($i=0; $i<=$#figures; $i++){
	@new_data = (@new_data, @{$data_slices[$i]}, @{$figures[$i]});
    }
    @new_data = (@new_data, @{$data_slices[$#data_slices]});
}else{
    @new_data = @data;
}
if ($verbose){ print " done\n"; }

if ($verbose){ print "Re-Making markers... \n"; }
for ($i=0; $i<$#new_data; $i++){
    if ($new_data[$i] =~ /^\\begin{figure\*?}/){ push(@begin_figures, $i); }
    if ($new_data[$i] =~ /^\\end{figure\*?}/){ push(@end_figures, $i); }
}
if ($verbose){ print " done\n\n"; }


@citations = ();
if ($verbose){ print "Re-Creating citation database...\n"; }
for ($i=0; $i<$#new_data; $i++){
    if ($new_data[$i] =~ /\\cite{(.*?)}/){ # ? minimum matching
	$citation_candidate = $1;
	@citation_candidates = split(",", $citation_candidate);
	push(@citations, @citation_candidates);
	
	for($ii=0; $ii<=$#citation_candidates; $ii++){
	    if ($verbose){
		printf("   %3i  %s\n", 
		       $#citations-$#citation_candidates+$ii,
		       $citation_candidates[$ii]);
	    }
	}
    }
}
if ($verbose){ print " done\n"; }
$buf = $#citations+1;
if ($verbose){ print "$buf citations are found\n\n"; }


if ($verbose){ print "Checking the order of ciation ...\n"; }
$maxrefnum = 1;
$is_citation_order_okay = 0;
for ($j=0; $j<=$#citations; $j++){
    for ($i=0; $i<=$#bibliography; $i++){
	if ($citations[$j] eq $bibliography[$i]){
	    $refnum = $i;
	    if ($refnum<$maxrefnum){  
		if ($verbose) {print "-I- Ref[$refnum] cited once again at citation number $j\n";}
	    }elsif ($i==$maxrefnum){
		$maxrefnum++;
	    }else{
		if ($verbose) {print "-E- Ref[$refnum] ($bibliography[$i]) may be cited in wrong order at citation number $j\n";}
		$is_citation_order_okay++;
	    }
	}
    }
}
if ($verbose){ print " done\n"; }


#=====
print "\n\n";
print "Result Summary\n";
$status = 0;

print "Using bibtex ... ";
if ($is_bibtex){
    print " Possibly yes\n";
}else{
    print " No\n";
}

print "No citation in abstract   ...... ";
if ($citation_in_abstract>0){
    $status+=1;
    print " Not Okay\n";
}else{
    print " Okay\n";
}

print "All figures referred   ...... ";
if ($is_all_figure_referred > 0){
    $status+=10;
    if ($is_any_figure_unlabeled){
	print " Suspicious. Please check manually.\n";
    }else{
	print " Not Okay\n";
    }
}else{
    print " Okay\n";
}

print "Citation in figure caption  ......  ";
if ($citation_infigure_captions>0){
    print " Yes\n";
}else{
    print " No\n";
}

print "Bibliography cited sequentially .... ";
if( $is_citation_order_okay>0){
    $status+=100;
    print " Not Okay\n";
}else{
    if ($is_bibtex){
	print " Unknown. Please check manually\n";
	$status+=200;
    }else{
	print " Okay\n";
    }
}
#print "\n$status\n";
select STDOUT;