#! /usr/bin/perl -w
use lib '/usr/lib/perl'; use INN::Config;

##############################################################################
# perl-nocem - a NoCeM-on-spool implementation for INN 2.x.
# Copyright 2000 by Miquel van Smoorenburg <miquels@cistron.nl>
# Copyright 2001 by Marco d'Itri <md@linux.it>
# This program is licensed under the terms of the GNU General Public License.
#
# List of changes:
#
# 2002: Patch by Steven M. Christey for untrusted printf input.
# 2007: Patch by Christoph Biedl for checking a timeout.
# Documentation improved by Jeffrey M. Vinocur (2002), Russ Allbery (2006)
# and Julien Elie (2007).
#
##############################################################################

require 5.00403;
use strict;

# XXX FIXME I haven't been able to load it only when installed.
# If nobody can't fix it just ship the program with this line commented.
#use Time::HiRes qw(time);

my $keyring = $INN::Config::pathetc . '/pgp/ncmring.gpg';

# XXX To be moved to a config file.
#sub local_want_cancel_id {
#    my ($group, $hdrs) = @_;
#
## Hippo has too many false positives to be useful outside of pr0n groups
#    if ($hdrs->{issuer} =~ /(?:Ultra|Spam)Hippo/) {
#        foreach (split(/,/, $group)) {
#            return 1 if /^alt\.(?:binar|sex)/;
#        }
#        return 0;
#    }
#    return 1;
#}

# no user serviceable parts below this line ###################################

# global variables
my ($working, $got_sighup, $got_sigterm, @ncmperm, $cancel);
my $use_syslog = 0;
my $log_open = 0;
my $nntp_open = 0;
my $last_cancel = 0;
my $socket_timeout = $INN::Config::peertimeout - 100;

my $logfile = $INN::Config::pathlog . '/perl-nocem.log';

# initialization and main loop ###############################################

eval { require Sys::Syslog; import Sys::Syslog; $use_syslog = 1; };

if ($use_syslog) {
    if ($Sys::Syslog::VERSION lt 0.15) {
        eval "sub Sys::Syslog::_PATH_LOG { '/dev/log' }" if $^O eq 'dec_osf';
        Sys::Syslog::setlogsock('unix')
          if $^O =~ /linux|dec_osf|freebsd|darwin/;
    }
    openlog('nocem', '', $INN::Config::syslog_facility);
}

if (not $INN::Config::gpgv and not $INN::Config::gpg) {
    logmsg('cannot find the gpgv or gpg binary', 'err');
    sleep 5;
    exit 1;
}

if ($INN::Config::version and not $INN::Config::version =~ /^INN 2\.[0123]\./)
{
    $cancel = \&cancel_nntp;
} else {
    $cancel = \&cancel_ctlinnd;
}

$SIG{HUP} = \&hup_handler;
$SIG{INT} = \&term_handler;
$SIG{TERM} = \&term_handler;
$SIG{PIPE} = \&term_handler;

logmsg('starting up');

unless (read_ctlfile()) {
    sleep 5;
    exit 1;
}

while (<STDIN>) {
    chop;
    $working = 1;
    do_nocem($_);
    $working = 0;
    term_handler() if $got_sigterm;
    hup_handler() if $got_sighup;
}

logmsg('exiting because of EOF', 'debug');
exit 0;

##############################################################################

# Process one NoCeM notice.
sub do_nocem {
    my $token = shift;
    my $start = time;

    # open the article and verify the notice
    my $artfh = open_article($token);
    return if not defined $artfh;
    my ($msgid, $nid, $issuer, $nocems) = read_nocem($artfh);
    close $artfh;
    return unless $nocems;

    &$cancel($nocems);
    logmsg("Articles cancelled: " . join(' ', @$nocems));
    my $diff = (time - $start) || 0.01;
    my $nr = scalar @$nocems;
    logmsg(
        sprintf(
            "processed notice %s by %s (%d ids, %.5f s, %.1f/s)",
            $nid, $issuer, $nr, $diff, $nr / $diff
        )
    );
}

# - Check if it is a PGP signed NoCeM notice
# - See if we want it
# - Then check PGP signature
sub read_nocem {
    my $artfh = shift;

    # Examine the first 200 lines to see if it is a PGP signed NoCeM.
    my $ispgp = 0;
    my $isncm = 0;
    my $inhdr = 1;
    my $i = 0;
    my $body = '';
    my $from = '';
    my $msgid = '<>';

    while (<$artfh>) {
        last if $i++ > 200;
        s/\r\n$/\n/;
        if ($inhdr) {
            if (/^$/) {
                $inhdr = 0;
            } elsif (/^From:\s+(.*)\s*$/i) {
                $from = $1;
            } elsif (/^Message-ID:\s+(<.*>)/i) {
                $msgid = $1;
            }
        } else {
            $body .= $_;
            $ispgp = 1 if /^-----BEGIN PGP SIGNED MESSAGE-----/;
            if (/^\@\@BEGIN NCM HEADERS/) {
                $isncm = 1;
                last;
            }
        }
    }

    # must be a PGP signed NoCeM.
    if (not $ispgp) {
        logmsg("Article $msgid: not PGP signed", 'debug');
        return;
    }
    if (not $isncm) {
        logmsg("Article $msgid: not a NoCeM", 'debug');
        return;
    }

    # read the headers of this NoCeM, and check if it's supported.
    my %hdrs;
    while (<$artfh>) {
        s/\r\n/\n/;
        $body .= $_;
        last if /^\@\@BEGIN NCM BODY/;
        my ($key, $val) = /^([^:]+)\s*:\s*(.*)$/;
        $hdrs{ lc $key } = $val;
    }
    foreach (qw(action issuer notice-id type version)) {
        next if $hdrs{$_};
        logmsg("Article $msgid: missing $_ pseudo header", 'debug');
        return;
    }
    return if not supported_nocem($msgid, \%hdrs);

    # decide if we want it.
    if (not want_nocem(\%hdrs)) {
        logmsg(
            "Article $msgid: unwanted ($hdrs{issuer}/$hdrs{type})",
            'debug'
        );
        return;
    }
    # XXX want_hier() not implemented
    #    if ($hdrs{hierarchies} and not want_hier($hdrs{hierarchies})) {
    #        logmsg("Article $msgid: unwanted hierarchy ($hdrs{hierarchies})",
    #            'debug');
    #        return;
    #    }

    # We do want it, so read the entire article.  Also copy it to
    # a temp file so that we can check the PGP signature when done.
    my $tmpfile = "$INN::Config::pathtmp/nocem.$$";
    if (not open(OFD, ">$tmpfile")) {
        logmsg("cannot open temp file $tmpfile: $!", 'err');
        return;
    }
    print OFD $body;
    undef $body;

    # process NoCeM body.
    my $inbody = 1;
    my @nocems;
    my ($lastid, $lastgrp);
    while (<$artfh>) {
        s/\r\n$/\n/;
        print OFD;
        $inbody = 0 if /^\@\@END NCM BODY/;
        next if not $inbody or /^#/;

        my ($id, $grp) = /^(\S*)\s+(\S+)/;
        next if not $grp;
        if ($id) {
            push @nocems, $lastid
              if $lastid and want_cancel_id($lastgrp, \%hdrs);
            $lastid = $id;
            $lastgrp = $grp;
        } else {
            $lastgrp .= ',' . $grp;
        }
    }
    push @nocems, $lastid if $lastid and want_cancel_id($lastgrp, \%hdrs);
    close OFD;

    # at this point we need to verify the PGP signature.
    return if not @nocems;
    my $e = pgp_check($hdrs{issuer}, $msgid, $tmpfile);
    unlink $tmpfile;
    return if not $e;

    return ($msgid, $hdrs{'notice-id'}, $hdrs{issuer}, \@nocems);
}

# XXX not implemented: code to discard notices for groups we don't carry
sub want_cancel_id {
    my ($group, $hdrs) = @_;

    return local_want_cancel_id(@_) if defined &local_want_cancel_id;
    1;
}

# Do we actually want this NoCeM?
sub want_nocem {
    my $hdrs = shift;

    foreach (@ncmperm) {
        my ($issuer, $type) = split(/\001/);
        if ($hdrs->{issuer} =~ /$issuer/i) {
            return 1 if '*' eq $type or lc $hdrs->{type} eq $type;
        }
    }
    return 0;
}

sub supported_nocem {
    my ($msgid, $hdrs) = @_;

    if ($hdrs->{version} !~ /^0\.9[0-9]?$/) {
        logmsg(
            "Article $msgid: version $hdrs->{version} not supported",
            'debug'
        );
        return 0;
    }
    if ($hdrs->{action} ne 'hide') {
        logmsg(
            "Article $msgid: action $hdrs->{action} not supported",
            'debug'
        );
        return 0;
    }
    return 1;
}

# Check the PGP signature on an article.
sub pgp_check {
    my ($issuer, $msgid, $art) = @_;

    # fork and spawn a child
    my $pid = open(PFD, '-|');
    if (not defined $pid) {
        logmsg("pgp_check: cannot fork: $!", 'err');
        return 0;
    }
    if ($pid == 0) {
        open(STDERR, '>&STDOUT');
        if ($INN::Config::gpg) {
            exec($INN::Config::gpg, '--status-fd=1',
                '--verify', '--allow-weak-digest-algos',
                $keyring ? '--keyring=' . $keyring : '', $art
            );
        } else {
            exec($INN::Config::gpgv, '--status-fd=1',
                $keyring ? '--keyring=' . $keyring : '', $art
            );
        }
        exit 126;
    }

    # Read the result and check status code.
    local $_ = join('', <PFD>);
    my $status = 0;
    if (not close PFD) {
        if ($? >> 8) {
            $status = $? >> 8;
        } else {
            if ($INN::Config::gpg) {
                logmsg("Article $msgid: $INN::Config::gpg killed by signal "
                      . ($? & 255));
            } else {
                logmsg("Article $msgid: $INN::Config::gpgv killed by signal "
                      . ($? & 255));
            }
            return 0;
        }
    }
    #logmsg("Command line was: $INN::Config::gpgv --status-fd=1"
    #     . ($keyring ? ' --keyring=' . $keyring : '') . " $art", 'debug');
    #logmsg("Full PGP output: >>>$_<<<", 'debug');

    if (/^\[GNUPG:\]\s+GOODSIG\s+\S+\s+(.*)/m) {
        return 1 if $1 =~ /\Q$issuer\E/;
        logmsg("Article $msgid: signed by $1 instead of $issuer");
    } elsif (/^\[GNUPG:\]\s+NO_PUBKEY\s+(\S+)/m) {
        logmsg("Article $msgid: $issuer (ID $1) not in keyring");
    } elsif (/^\[GNUPG:\]\s+BADSIG\s+\S+\s+(.*)/m) {
        logmsg("Article $msgid: bad signature from $1");
    } elsif (/^\[GNUPG:\]\s+BADARMOR/m or /^\[GNUPG:\]\s+UNEXPECTED/m) {
        logmsg("Article $msgid: malformed signature");
    } elsif (/^\[GNUPG:\]\s+ERRSIG\s+(\S+)/m) {
        # safety net: we get there if we don't know about some token
        logmsg("Article $msgid: unknown error (ID $1)");
    } else {
        # some other error we don't know about happened.
        # 126 is returned by the child if exec fails.
        s/ at \S+ line \d+\.\n$//;
        s/\n/_/;
        if ($INN::Config::gpg) {
            logmsg(
                "Article $msgid: $INN::Config::gpg exited "
                  . (($status == 126) ? "($_)" : "with status $status"),
                'err'
            );
        } else {
            logmsg(
                "Article $msgid: $INN::Config::gpgv exited "
                  . (($status == 126) ? "($_)" : "with status $status"),
                'err'
            );
        }
    }
    return 0;
}

# Read article.
sub open_article {
    my $token = shift;

    if ($token =~ /^\@.+\@$/) {
        my $pid = open(ART, '-|');
        if ($pid < 0) {
            logmsg('Cannot fork: ' . $!, 'err');
            return undef;
        }
        if ($pid == 0) {
            exec("$INN::Config::newsbin/sm", '-q', $token)
              or logmsg("Cannot exec sm: $!", 'err');
            return undef;
        }
        return *ART;
    } else {
        return *ART if open(ART, $token);
        logmsg("Cannot open article $token: $!", 'err');
    }
    return undef;
}

# Cancel a number of Message-IDs.  We use ctlinnd to do this,
# and we run up to 15 of them at the same time (10 usually).
sub cancel_ctlinnd {
    my @ids = @{ $_[0] };

    while (@ids > 0) {
        my $max = @ids <= 15 ? @ids : 10;
        for (my $i = 1; $i <= $max; $i++) {
            my $msgid = shift @ids;
            my $pid;
            sleep 5 until (defined($pid = fork));
            if ($pid == 0) {
                exec "$INN::Config::pathbin/ctlinnd", '-s', '-t', '180',
                  'cancel', $msgid;
                exit 126;
            }
            #            logmsg("cancelled: $msgid [$i/$max]", 'debug');
        }
        #    Now wait for all children.
        while ((my $pid = wait) > 0) {
            next unless $?;
            if ($? >> 8) {
                logmsg("Child $pid died with status " . ($? >> 8), 'err');
            } else {
                logmsg("Child $pid killed by signal " . ($? & 255), 'err');
            }
        }
    }
}

sub cancel_nntp {
    my $ids = shift;
    my $r;

    if ($nntp_open and time - $socket_timeout > $last_cancel) {
        logmsg('Close socket for timeout');
        close(NNTP);
        $nntp_open = 0;
    }
    if (not $nntp_open) {
        use Socket;
        if (not socket(NNTP, PF_UNIX, SOCK_STREAM, 0)) {
            logmsg("socket: $!", 'err');
            goto ERR;
        }
        if (not connect(NNTP, sockaddr_un($INN::Config::pathrun . '/nntpin')))
        {
            logmsg("connect: $!", 'err');
            goto ERR;
        }
        if (($r = <NNTP>) !~ /^200 /) {
            $r =~ s/\r\n$//;
            logmsg("bad reply from server: $r", 'err');
            goto ERR;
        }
        select NNTP;
        $| = 1;
        select STDOUT;
        print NNTP "MODE CANCEL\r\n";
        if (($r = <NNTP>) !~ /^284 /) {
            $r =~ s/\r\n$//;
            logmsg("MODE CANCEL not supported: $r", 'err');
            goto ERR;
        }
        $nntp_open = 1;
    }
    foreach (@$ids) {
        print NNTP "$_\r\n";
        if (($r = <NNTP>) !~ /^289/) {
            $r =~ s/\r\n$//;
            logmsg("cannot cancel $_: $r", 'err');
            goto ERR;
        }
    }
    $last_cancel = time;
    return;

  ERR:
    # discard unusable socket
    close(NNTP);
    logmsg('Switching to ctlinnd...', 'err');
    cancel_ctlinnd($ids);
    $cancel = \&cancel_ctlinnd;
}

sub read_ctlfile {
    my $permfile = $INN::Config::pathetc . '/nocem.ctl';

    unless (open(CTLFILE, $permfile)) {
        logmsg("Cannot open $permfile: $!", 'err');
        return 0;
    }
    while (<CTLFILE>) {
        chop;
        s/^\s+//;
        s/\s+$//;
        next if /^#/ or /^$/;
        my ($issuer, $type) = split(/:/, lc $_);
        if (not(defined($issuer) and defined($type))) {
            logmsg(
                "Cannot parse nocem.ctl line <<$_>>; "
                  . "syntax is <<issuer:type>>.",
                'err'
            );
            next;
        }
        $type =~ s/\s//g;
        foreach (split(/,/, $type)) {
            push(@ncmperm, "$issuer\001$_");
        }
    }
    close CTLFILE;
    return 1;
}

sub logmsg {
    my ($msg, $lvl) = @_;

    if (not $use_syslog) {
        if ($log_open == 0) {
            open(LOG, ">>$logfile") or die "Cannot open log: $!";
            $log_open = 1;
            select LOG;
            $| = 1;
            select STDOUT;
        }
        $lvl ||= 'notice';
        print LOG "$lvl: $msg\n";
        return;
    }
    syslog($lvl || 'notice', '%s', $msg);
}

sub hup_handler {
    $got_sighup = 1;
    return if $working;
    close LOG;
    $log_open = 0;
}

sub term_handler {
    $got_sigterm = 1;
    return if $working;
    logmsg('exiting because of signal');
    exit 1;
}

__END__

=head1 NAME

perl-nocem - A NoCeM-on-spool implementation for INN 2.x

=head1 SYNOPSIS

B<perl-nocem>

=head1 DESCRIPTION

NoCeM, which is pronounced I<No See 'Em>, is a protocol enabling
authenticated third-parties to issue notices which can be used
to cancel unwanted articles (like spam and articles in moderated
newsgroups which were not approved by their moderators).  It can
also be used by readers as a I<third-party killfile>.  It is
intended to eventually replace the protocol for third-party cancel
messages.

B<perl-nocem> processes third-party, PGP-signed article cancellation
notices.  It is possible not to honour all NoCeM notices but only those
which are sent by people whom you trust (that is to say if you trust
the PGP key they use to sign their NoCeM notices).  Indeed, it is up
to you to decide whether you wish to honour their notices, depending
on the criteria they use.

The B<perl-nocem> program appends all status messages to F<perl-nocem.log>
in I<pathlog> if C<Sys::Syslog> is not available; otherwise, and it should
normally be the case, the syslog facility is used.  Status messages are then
appended to a file usually named F<news.notice> in I<pathlog>.  (Some logs
will be written only if debug level is set up to be logged via syslog.)

Processing NoCeM notices is easy to set up:

=over 4

=item 1.

If not already done, install GnuPG, or an equivalent implementation of the
OpenPGP standard, to be able to verify the signature of NoCeM notices.
It will provide the B<gpg> and B<gpgv> programs.  If GnuPG was already
installed when INN was configured, then the paths to these programs were taken
into account.  Otherwise, you have to set the C<$gpg> and C<$gpgv> variables
in I<pathlib>/perl/INN/Config.pm to the paths to these programs.

All still active NoCeM issuers use rather modern PGP keys accepted by both
GnuPG 1.x and 2.x versions.  It is no longer needed to explicitly use B<gpg1>
to process NoCeM notices.

=item 2.

Import the keys of the NoCeM issuers you trust in order to check the
authenticity of their notices.  You can run the following command:

    gpg --no-default-keyring --allow-non-selfsigned-uid \
        --primary-keyring <pathetc>/pgp/ncmring.gpg --no-options \
        --no-permission-warning --batch --import <key-file>
    chmod 644 <pathetc>/pgp/ncmring.gpg

where <pathetc> is the value of the I<pathetc> parameter set in F<inn.conf>
and <key-file> the file containing the key(s) to import.  The keyring must
be located in F<< <pathetc>/pgp/ncmring.gpg >>; you only have to create the
directory F<< <pathetc>/pgp >> before using B<gpg> (it will automatically
generate the F<ncmring.gpg> file) and make sure the news user can read this
file, once generated.

The keys of NoCeM issuers can be found in the web site of I<The NoCeM
Registry>: L<http://rosalind.home.xs4all.nl/nocemreg/nocemreg.html>.
You can even download there a unique file which contains all the keys.

=item 3.

Create a F<nocem.ctl> config file in I<pathetc> indicating the NoCeM issuers
and notices you want to follow.  This permission file contains lines like:

    bleachbot@httrack.com:spam,site
    pgpmoose@killfile.org:pgpmoose-forged-moderation

This will remove all articles for which the issuer (first part of the line,
before the colon C<:>) has issued NoCeM notices corresponding to the criteria
specified after the colon.  Usually, you just keep the lines corresponding to
the keys previously installed.

You will also find information about that on the web site of I<The NoCeM
Registry>.  Note that INN is shipped with an up-to-date F<nocem.ctl> file
already configured with the current NoCeM issuers.  (Only the keys installed
at the previous step are not included, so as to leave you the choice of whom
to trust, and download the most recent ones, in case they have changed.)

=item 4.

Add to the F<newsfeeds> file an entry like this one in order to feed
B<perl-nocem> the NoCeM notices posted to alt.nocem.misc and
news.lists.filters, the usual groups where notices are sent:

    nocem!\
        :!*,alt.nocem.misc,news.lists.filters\
        :Tc,Wf,Ap:<pathbin>/perl-nocem

with the correct path to B<perl-nocem>, located in <pathbin>.  Then, run
C<inncheck> to ensure the syntax of the modified F<newsfeeds> file is correct,
and reload it (via C<ctlinnd reload newsfeeds 'NoCeM channel feed'>).

Note that you should at least carry news.lists.filters on your news
server (or other newsgroups where NoCeM notices are sent) if you wish
to process them.

=item 5.

Everything should now work.  However, do not hesitate to manually test
B<perl-nocem> with a NoCeM notice, using:

    grephistory '<Message-ID>' | perl-nocem

Indeed, B<perl-nocem> expects tokens on its standard input, and
B<grephistory> can easily give it the token of a known article,
thanks to its Message-ID.

=back

=head1 FILES

=over 4

=item I<pathbin>/perl-nocem

The Perl script itself used to process NoCeM notices.

=item I<pathetc>/nocem.ctl

The configuration file which specifies the NoCeM notices to be processed.

=item I<pathetc>/pgp/ncmring.gpg

The keyring which contains the public keys of trusted NoCeM issuers.

=back

=head1 BUGS

The Subject header field body is not checked for the C<@@NCM> string and
there is no check for the presence of the References header field.

The Newsgroups pseudo header field body is not checked, but this can
be done in C<local_want_cancel_id()>.

The Hierarchies header field is ignored.

=head1 HISTORY

Copyright 2000 by Miquel van Smoorenburg <miquels@cistron.nl>.

Copyright 2001 by Marco d'Itri <md@linux.it>.

=head1 SEE ALSO

gpg(1), gpgv(1), grephistory(1), inn.conf(5), newsfeeds(5), pgp(1).

=cut
