#!/usr/bin/perl
#
# amavis-stats -- generate rrds from amavis log output
#
# Copyright (C) 2003, Mark Lawrence (nomad@null.net)
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License with
# the Debian GNU/Linux distribution in file /usr/share/common-licenses/GPL;
# if not, write to the Free Software Foundation, Inc., 59 Temple Place,
# Suite 330, Boston, MA  02111-1307  USA
#
# On Debian systems, the complete text of the GNU General Public
# License, version 2, can be found in /usr/share/common-licenses/GPL-2.


# ########################################################################
# Dependencies
# ########################################################################
use strict;
use warnings;
use Getopt::Std;
use Time::localtime;
use Time::Local;
use RRDs;
use Fcntl ':flock';
use POSIX qw(strftime);
use POSIX qw(locale_h);
use Compress::Zlib;


# ########################################################################
# Globals
# ########################################################################
our (
    $me,         # this program name
    $version,    # this program version
    $debug,      # result of the -d flag
    $verbose,    # result of the -v flag
    $warncount,  # number of times we have warned of something
    $locale,     # use setlocale to determine language of dates?
    $pkg,        # name of this package (usually amavis-stats)
    $statedir,   # location of rrd files
    $lockfile,   # lock file to prevent more than one invocation at a time
    $logfile,    # input log file on command line
    $statefile,  # between invocation status file
    $namesfile,  # mappings of IDs to virus names
    $countfile,  # mappings of IDs to virus occurences
    $seenfile,   # mappings of IDs to virus first/last seen times
    $rrdstep,    # rrd step size
    $spos,       # start position of input file this run
    $pos,        # current position in input file
    $eof,        # position of end of input file
    $line,       # string containing current line
    $hostname,   # string to match hostname against
    $epoch,      # seconds since 1970 of current line
    $lastepoch,  # seconds since 1970, previous time around
    $isodate,    # human-readable version of epoch
    $numv,       # number of virus seen, continually incrementing
    $lastupdate, # epoch of last global rrd update
    $year,       # current year
    %months,     # locale-dependent hash of months-to-index
    %rvid,       # in-memory mapping of virus names to IDs
    %vnames,     # in-memory mapping of IDs to virus names
    %occurence,  # in-memory mapping of IDs to counts/occurences
    %firstseen,  # in-memory mapping of virus first seen times
    %lastseen,   # in-memory mapping of virus last seen times
    %spamsess,   # in-memory store of session ids (amavis process-count field)
                 # used for SPAM-TAG
    %opt         # command line options
);


# ########################################################################
# Initial values & Constants
# ########################################################################
($me = $0) =~ s%.*/%%;    # get rid of the leading directory
$version   = "0.1.13-rc6"; # this value is auto-updated by packing system
$pkg       = "amavis-stats";
$locale    = "C";
$warncount = 0;
$lockfile  = "/var/lock/$pkg";
$rrdstep   = 300;
$hostname  = '.*';


# ########################################################################
# Subroutines
# ########################################################################

#
# Message about this program and how to use it
#
sub usage() {
    print STDERR "usage: $0 [-hVqdrl] [-n host] file\n";
}

sub help() {
    usage();
    print STDERR << "EOF";

    Version: $version

    This program generates virus infection statistics from amavis/syslog
    log files. It is typically called from cron(8), but can also be used
    from the command line when populating the databases with historical
    data.

    -h        : this (help) message
    -v        : does nothing (legacy verbose option)
    -q        : quiet mode - no output
    -d        : print debugging messages to stderr
    -r        : reset file pointer to 0, instead of starting at last position
    -l        : take locale from the environment (instead of English)
    -n <string> : only classify messages from hostnames matching <string>
    -V        : display version and exit

    examples:

    Initial import of existing data:
    amavis# $me /var/log/mail.info.2 
    amavis# $me -r /var/log/mail.info.1 
    amavis# $me -r /var/log/mail.info.0 
    amavis# $me -r /var/log/mail.info 

    Normal usage:
    amavis# $me /var/log/amavis.log 

EOF

}


#
# Command line options processing
#
sub init()
{
    my $opt_string = 'hvqdf:lrn:V';
    if (!getopts( "$opt_string", \%opt )) {
        usage();
        do_exit(1);
    }

    if ($opt{h}) {
        help();
        do_exit(1);
    }

    if ($opt{V}) {
        print "$version\n";
        do_exit(1);
    }

    $verbose = 1;
    $verbose = 0 if ($opt{q});
    $debug   = 1 if $opt{d};

    # Now we can finally say who we are!
    dbg("version $version");

    $statedir  = "/var/lib/$pkg";
    $namesfile = "$statedir/$pkg.names"; # stores the virus name to id mappings
                                         # and is used by all hosts

    if ($opt{n}) {
        $hostname = $opt{n};
        $statedir = "$statedir/$hostname";
        if (! -d $statedir) {
            do_exit(1, "could not create $statedir") if ! mkdir($statedir);
        }
    }

    $statefile = "$statedir/$pkg.state"; # last read position of the logfile
    $countfile = "$statedir/$pkg.count"; # per virus totals
    $seenfile  = "$statedir/$pkg.seen";  # first and last time() seen

    if ($opt{f}) { # legacy way to specify input file
        $logfile = $opt{f};
        if ( ! -f $logfile ) {
            do_exit(1, "file \"$logfile\" does not exist");
        }

    } elsif ($ARGV[0]) { # now expect file on command line
        $logfile = $ARGV[0];
        if ( ! -f $logfile ) {
            do_exit(1, "file \"$logfile\" does not exist");
        }

    } else {
        usage();
        do_exit(1);
    }

    $year    = localtime->year() + 1900;

    # build default (English?) hash of Month-to-Numbers
    %months = (
        "Jan" => "0", "Feb" => "1",  "Mar" => "2",  "Apr" => "3",
        "May" => "4", "Jun" => "5",  "Jul" => "6",  "Aug" => "7",
        "Sep" => "8", "Oct" => "9", "Nov" => "10", "Dec" => "11"
    );

    # Set up a locale-depenedent hash of Month-to-Numbers
    if ($opt{l}) {
        $locale = setlocale(LC_TIME);
        dbg("locale is set to \"$locale\"");
        for (0..11) {
            my $tmp = strftime("%b", 0, 0, 0, 1, $_, 96);
            dbg("$tmp");
            $months{$tmp} = $_;
        }
    }

}


#
# Make sure that only one copy is running at a time
#
sub semlock {
    open (LOCKF, ">$lockfile") or do_exit(1, "Could not open $lockfile: $!");
    unless (flock(LOCKF, LOCK_EX | LOCK_NB)) {
        err("warning: Could not lock $lockfile: $!");
        sleep 2;
        unless (flock(LOCKF, LOCK_EX | LOCK_NB)) {
            do_exit(1, "Could not lock $lockfile: $!");
        }
    }
    print LOCKF "$$\n";
    dbg("Have lock on $lockfile");
}


#
# Undo our lock. This is only for the sake of completeness - all file
# handles are closed (and locks lost) on program exit anyway.
#
sub semunlock {
    close LOCKF;
    if (unlink("$lockfile")) {
        dbg("lock $lockfile removed");
    }
}


#
# For completeness, remove the lock before exiting, otherwise the lock
# file remains untidily behind...
#
sub do_exit {
    my ($code, $msg) = @_;
    semunlock();

    if ($code == 0) {
        exit 0;
    } else {
        if (defined $msg) {
            print STDERR "$me: $msg\n";
        }
        exit $code;
    }
}


#
# Load the values of the previous run into variables
#
sub loadState {
    dbg("loadState()");
    $spos = undef;
    $numv = 0; # number of virus types seen

    #
    # Check that we have somewhere to save our status - Not much point
    # in continuing otherwise.
    #
    if ((! -d "$statedir") or (! -w "$statedir")) {
        do_exit(1,"$statedir does not exist or cannot be written to.");
    }


    #
    # Load the id=name mappings file if it already exists. This file is
    # shared between hosts
    #
    if (-f "$namesfile") {
        dbg("opening file $namesfile");
        open (IN, "$namesfile") or die "Could not open $namesfile";
        while (my $line = <IN>) {
            if ($line =~ /^(\d+)\s+(.*)/) {
                my $id = $1;
                my $name = $2;
                if ($name =~ /^spam$/) { # from version 0.1.12 names changed
                    $name = "Not-Delivered(SPAM)";
                } elsif ($name =~ /^passed$/) {
                    $name = "Passed";
                } elsif ($name =~ /^banned$/) {
                    $name = "Banned";
                } elsif ($name =~ /^infected$/) {
                    $name = "Infected";
                }
                $rvid{$name} = $id;
                $vnames{$id} = $name;
                $numv++;
                dbg("Known: #$id $name");
            }
        }
        close IN;
    }

    #
    # Grab the previous position reached in the log file, plus
    # the total number of different viruses we have seen
    #
    if (-f "$statefile") {

        dbg("opening file $statefile");

        open (IN, "$statefile") or die "Could not open $statefile";
        while (my $line = <IN>) {
            if ($line =~ /^pos:\s*(\d+)/) {
                $spos = $1;
            }
            elsif ($line =~ /^lastupdate:\s*(\d+)/) {
                $lastupdate = $1;
            }
            elsif ($line =~ /^spamsess:\s*(.*)/) {
                my @arr = split(/\s+/, $1);
                foreach (@arr) {
                    $spamsess{$_} = 1;
                }
            }
        }
        close IN;

        dbg("opening file $countfile");
        open (IN, "$countfile") or die "Could not open $countfile";
        while (my $line = <IN>) {
            if ($line =~ /^(\d+)\s+(\d+)/) {
                $occurence{$1} = $2;
            }
        }
        close IN;

        dbg("opening file $seenfile");
        open (IN, "$seenfile") or die "Could not open $seenfile";
        while (my $line = <IN>) {
            if ($line =~ /^(\d+)\s+(\d+)\s+(\d+)/) {
                $firstseen{$1} = $2;
                $lastseen{$1}  = $3;
            }
        }
        close IN;
    }

    #
    # If we have not run before (for this host?) reset...
    #
    if (!defined $spos) {
        msg("First Time Run (matching host against '$hostname')");
        $spos      = 0; # position into the log file
        $lastupdate = 0; # number of virus types seen
    }

    #
    # If -r <file> on command line start at beginning of file
    #
    if ($opt{r}) {
        $spos = 0;
    }

    if ($debug) {
        dbg("start position: $spos numv: $numv lastupdate: $lastupdate");
        my $tmp = "left over spam session ids: ";
        foreach my $sid (keys %spamsess) {
            $tmp = "$tmp $sid";
        }
        dbg($tmp);
        while ( my ($id,$count) = each (%occurence)) {
            my $name = $vnames{$id};
            dbg("#$id: $name, seen $count times");
        }
    }
}


sub saveState {

    #
    # Reset the value of spos and save it for the next time we are called
    #
    dbg("saveState(): eof: $eof numv: $numv lastupdate: $lastupdate");

    open (OUT, ">$statefile") or die "Could not write to $statefile";
    print OUT "pos: $pos\n";
    print OUT "lastupdate: $lastupdate\n";
    print OUT "LC_TIME: $locale\n";
    print OUT "spamsess: ";
#    foreach my $sid (keys %spamsess) {
#        print OUT "$sid ";
#    }
#    print OUT "\n";
    close OUT;

    open (NAMES, ">$namesfile") or die "Could not write to $namesfile";
    foreach my $id (keys %vnames) {
        my $name = $vnames{$id};
        print NAMES "$id $name\n";
    }
    close NAMES;

    open (COUNT, ">$countfile") or die "Could not write to $countfile";
    open (SEEN, ">$seenfile") or die "Could not write to $seenfile";
    foreach my $id (keys %occurence) {
        print COUNT "$id $occurence{$id}\n";
        print SEEN "$id $firstseen{$id} $lastseen{$id}\n";
    }
    close COUNT;
    close SEEN;

}


#
# Take a virus/name, and return an ID. Create the ID if the virus doesn't
# already exist. The id<>name mappings are global - ie the same regardless
# of which host we are scanning for.
#
sub getVid {
    my ($virus, $epoch) = @_;

    if (!exists $rvid{$virus}) {
        $numv++;
        $vnames{$numv} = $virus;
        $rvid{$virus}  = $numv;
        msg("New id: #$numv ($virus)");
    }

    my $id      = $rvid{$virus};
    my $rrdfile = "$statedir/$id.rrd";

    if (! -e $rrdfile) {
        $occurence{$id} = 0;
        $firstseen{$id} = $epoch;
        $lastseen{$id}  = $epoch;

        if (! createRRD($rrdfile, $id)) {
            do_exit(1, "updateRRD: Can't create file $rrdfile: $!");
        }
#        updateRRD($id, $lastupdate - $rrdstep)
    }

    return $id;
}


#
# Increment by one the number of times we have seen this virus. Also
# record the time we last saw it. Specific to this host only
#
sub upCount {
    my ($id, $epoch) = @_;
    $occurence{$id}++;
    $lastseen{$id} = $epoch;
}



#
#
#
sub classify {
    my ($sid) = @_;
    my $id;

    #
    # If this line contains a SPAM tag of some sort don't classify it
    # now, but record the fact and move on to the next line
    #
#    if ($line =~ /\sSPAM(-TAG)*?,\s/) {
#        dbg("SPAM: $epoch: $isodate $sid");
#        $spamsess{$sid} = 1;
#        return;
#    }

    #
    # Save the stats according to the classification of the email
    # Order is important.
    #
    if ($line =~ /\bPassed SPAM\b/ ||               # >= amavisd-new-2004
        $line =~ /\bPassed\b.*\bquarantine spam/) { # <  amavisd-new-2003
        dbg("Passed SPAM $epoch: $isodate");
        $id = getVid("Passed SPAM", $epoch);
        upCount($id, $epoch);

    } elsif ($line =~ /\bBlocked SPAM\b/ ||         # >= amavisd-new-2004
             $line =~ /\bNot-Delivered\b.*\bquarantine spam/) { # <= amavisd-new
        dbg("Blocked SPAM $epoch: $isodate");                   #   2003
        $id = getVid("Blocked SPAM", $epoch);
        upCount($id, $epoch);

    } elsif ($line =~ /\bPassed BANNED\b/) {        # >= amavisd-new-2004
        dbg("Passed BANNED $epoch: $isodate");
        $id = getVid("Passed BANNED", $epoch);
        upCount($id, $epoch);

    } elsif ($line =~ /\b(Blocked )?BANNED\b/) {    # >= amavisd-new-2004
        dbg("Blocked BANNED $epoch: $isodate");     # with Blocked, otherwise
        $id = getVid("Blocked BANNED", $epoch);     # <= amavisd-new-2003
        upCount($id, $epoch);

    } elsif ($line =~ /\bPassed INFECTED\b/) {
        dbg("Passed INFECTED $epoch: $isodate");
        $id = getVid("Passed INFECTED", $epoch);
        upCount($id, $epoch);

    } elsif ($line =~ /\b(Passed |Blocked )?INFECTED\s+\((.*?(\(.*?\))*?)\)/ or
                                                                # amavisd-new
             $line =~ /\b(Possible) virus.*->\s+'(.*?)'/     or # amavis-ng
             $line =~ /.*(parts)\/\d+:\s+(.*?)\s+FOUND/      or # amavis-ng 
             $line =~ /\b(quarantine)[:|d;].*?virus='(.*?)'/ or # amavisd
             $line =~ /.*(part-)\d+:\s+(.*?)\s+FOUND/ ) {         # clamav

        my $viruses = $2;

        #
        # Update the overall infected emails statistics
        #
        if (defined $1) {
            if ($1 =~ /Blocked /) {
                dbg("Blocked INFECTED $epoch: $isodate");
                $id = getVid("Blocked INFECTED", $epoch);
                upCount($id, $epoch);
            } elsif ($1 =~ /Passed /) {
                dbg("Passed INFECTED $epoch: $isodate");
                $id = getVid("Passed INFECTED", $epoch);
                upCount($id, $epoch);
            }
        } else {
                dbg("Blocked INFECTED $epoch: $isodate");
                $id = getVid("Blocked INFECTED", $epoch);
                upCount($id, $epoch);
        }
        dbg("viruses: \"$viruses\" at $epoch: $isodate");


        #
        # What is this specific nasty little bugger(s) called?
        # Update his statistics as well.
        #
        my @list = split(/,+\s+/, $viruses);
        my %seen;
        foreach my $virus (@list) {
            if (!$seen{$virus}) {
                $id = getVid($virus, $epoch);
                upCount($id, $epoch);
                $seen{$virus} = 1;
            }
        }

    } elsif ($line =~ /\bBlocked CLEAN\b/ ||        # >= amavisd-new-2004
             $line =~ /\bNot-Delivered\b/) {        # <= amavisd-new-2003
        dbg("Blocked CLEAN $epoch: $isodate");
        $id = getVid("Blocked CLEAN", $epoch);
        upCount($id, $epoch);

    } elsif ($line =~ /\bPassed( CLEAN)?\b/) {
        dbg("Passed CLEAN $epoch: $isodate");
        $id = getVid("Passed CLEAN", $epoch);
        upCount($id, $epoch);

    }

}




#
#
#
sub parseFile {    

    my ($fname, $start, $stop) = @_;
    dbg("parseFile ($fname, $start, $stop)");

    #
    # Open up the file we need to parse
    #
    unless (open (LOGFILE, $fname)) {
        do_exit(1, "Could not open file $fname: $!"); 
    }
    unless (seek (LOGFILE, $start, 0)) {
        do_exit(1, "Could not seek to $start in file $fname: $!"); 
    }

    #
    # Loop each line until the current end of file
    #
    $pos = $start;
    my $lineid = 0;

    while ($pos < $stop and $line = <LOGFILE>) {
        #
        # Housekeeping
        #
        $lineid++;
        $lastepoch = $epoch;
        $line =~ s/:\s+\[ID.*?\]/: /; # get rid of extra Solaris field

        my ($mon, $day, $time, $host, $prog, $sid) = split(/\s+/, $line);

        #
        # Check that the environment locale matches what is being written
        # by syslog
        #
        my $tmp = $months{"$mon"};
        if (!defined $tmp) {
            err("Unknown month \"$mon\" (using locale \"$locale\")");
            $warncount++;
            if ($warncount > 5) {
                do_exit(1, "Too many warnings - bailing out");
            }
        }
        $mon = $tmp;

        #
        # Generate a seconds-since-1970 epoch and formated date string
        #
        my ($hour,$min,$sec) = split (/:/, $time);
        $epoch = timelocal($sec, $min, $hour, $day, $mon, $year-1900);

        if ($epoch > time()) {
            # date is last actually last year
            $epoch = timelocal($sec, $min, $hour, $day, $mon, $year-1901);
        }
        if (!defined $lastepoch) {
            $lastepoch = $epoch - 1;
        }

        $isodate = sprintf("%4u-%02u-%02u", $year, $mon+1, $day) .
                           " $hour:$min:$sec";
        #dbg("line at $isodate epoch: $epoch");

        #
        # Update all rrds if we are more than $rrdstep seconds since the last
        # update
        #
        if ($lastupdate == 0) {
            $lastupdate = int($epoch / $rrdstep) * $rrdstep;
            dbg("First update: $lastupdate");
        }

        my $count = int(($epoch - $lastupdate) / $rrdstep);
        for (my $i = 1; $i <= $count; $i++) {
            $lastupdate = $lastupdate + $rrdstep;
            foreach my $id (keys %occurence) {
                updateRRD($id, $lastupdate);
            }
        }

        #
        # If this is an amavis line, and if the host matches "-n" then
        # do the classification
        #
        if ($prog =~ /amavis.*?\[\d+\]:/ and $host =~ /$hostname/) {
            classify($sid);
        }

        #
        # Where did we get to in the file?
        #
        $pos = tell(LOGFILE);

        #
        # Save the current statistics every 1000 lines. This way
        # if the program dies we don't have to start again from the 
        # beginning each time. Also good for monitoring the graphs
        # to see where we are up to.
        #
        if (!($lineid % 1000)) {
            saveState();;
        }

    }
    close(LOGFILE);    

}


#
#
#
sub parseFilegz {

    my ($fname, $start) = @_;
    dbg("parseFilegz ($fname, $start)");

    #
    # Open up the file we need to parse
    #
    my $gz = gzopen ($fname, "rb");
    if (!defined($gz))
    {
	err("Couldn't open logfile $fname"); 
        exit 2; 
    }

    #
    # Loop each line until the current end of file
    #
    $pos = 0;
    while ($pos < $start) {
    	$pos += $gz->gzreadline($line);
    }
    my $lineid = 0;
    my $p = 0;
    do {
	$p = $gz->gzreadline($line);
	$pos += $p;

	if ($p > 0) {
		$lineid++;
		$lastepoch = $epoch;

		if ($line =~ /amavis.*?\[\d+\]:/) {
		    classify();
		}
		#
		# Save the current statistics every 1000 lines. This way
		# if the program dies we don't have to start again from the 
		# beginning each time. Also good for monitoring the graphs
		# to see where we are up to.
		#
		if (!($lineid % 1000)) {
		    saveState();;
		}
	}
    } while $p > 0;
    $gz->gzclose();
}


#
# Find the previous (rotated) log file and parse that according to
# our last position
#
sub parseRotFile { 
    my ($logfile, $spos) = @_;

    my $now = time();
    my $today     = localtime($now);
    my $yesterday = localtime($now - 60*60*24);

    $today = sprintf("%4u%02u%02u", $today->year + 1900,
                                    $today->mon + 1,
                                    $today->mday);

    $yesterday = sprintf("%4u%02u%02u", $yesterday->year + 1900,
                                        $yesterday->mon + 1,
                                        $yesterday->mday);

    my $rotlogfile = undef;

    if (-f "$logfile.0") {
        $rotlogfile = $logfile . ".0";
    } elsif (-f "$logfile.0.gz") {
        $rotlogfile = $logfile . ".0.gz";
    } elsif (-f "$logfile.1") {
        $rotlogfile = $logfile . ".1";
    } elsif (-f "$logfile.1.gz") {
        $rotlogfile = $logfile . ".1.gz";
    } elsif (-f "$logfile.01") {
        $rotlogfile = $logfile . ".01";
    } elsif (-f "$logfile.01.gz") {
        $rotlogfile = $logfile . ".01.gz";
    } elsif (-f "$logfile-$today") {
        $rotlogfile = $logfile . "-$today";
    } elsif (-f "$logfile-$today.gz") {
        $rotlogfile = $logfile . "-$today.gz";
    } elsif (-f "$logfile-$yesterday") {
        $rotlogfile = $logfile . "-$yesterday";
    } elsif (-f "$logfile-$yesterday.gz") {
        $rotlogfile = $logfile . "-$yesterday.gz";
    } else {
           $rotlogfile = $logfile;
#          $rotlogfile =~ s/(.*)\/([a-z]*)/$1\/archiv\/$2/g;
           $rotlogfile =~ s/\/var\/log\/(.*)/\/var\/log\/archiv\/$1/g;
           if ($rotlogfile eq $logfile) {
                   $rotlogfile = "archiv/$logfile";
           }
           if (-f "$rotlogfile.0") {
                   $rotlogfile = $rotlogfile . ".0";
           } elsif (-f "$rotlogfile.0.gz") {
                   $rotlogfile = $rotlogfile . ".0.gz";
           } elsif (-f "$rotlogfile.1") {
                   $rotlogfile = $rotlogfile . ".1";
           } elsif (-f "$rotlogfile.1.gz") {
                   $rotlogfile = $rotlogfile . ".1.gz";
           } elsif (-f "$rotlogfile.01") {
                   $rotlogfile = $rotlogfile . ".01";
           } elsif (-f "$rotlogfile.01.gz") {
                   $rotlogfile = $rotlogfile . ".01.gz";
           } elsif (-f "$rotlogfile-$today") {
                   $rotlogfile = $rotlogfile . "-$today";
           } elsif (-f "$rotlogfile-$today.gz") {
                   $rotlogfile = $rotlogfile . "-$today.gz";
           } elsif (-f "$rotlogfile-$yesterday") {
                   $rotlogfile = $rotlogfile . "-$yesterday";
           } elsif (-f "$rotlogfile-$yesterday.gz") {
                   $rotlogfile = $rotlogfile . "-$yesterday.gz";
           }
           if (! -f $rotlogfile) {
                   $rotlogfile = undef;
           }
    }

    if (defined $rotlogfile) {
       if ( $rotlogfile =~ /\.gz$/ ) {
               parseFilegz ($rotlogfile, $spos);
       } else {
               parseFile ($rotlogfile, $spos, (stat $rotlogfile)[7]);
       }
    } else {
        err("Could not open rotated logfile.");
        err("  Tried extentions .0, .1, .01, -$today, -$yesterday");
        do_exit(1);
    }
}


#
# Builds the RRD, and populates it with values as far back as the largest
# RRD "bin", so that calculations over a long period are not taken from bins
# which contain both known and unknown values (but resulting in an overall
# unknown value) which gives inaccurate totals.
#
sub createRRD {
    my ($file, $id) = @_;
    dbg("createRRD: $file");

    my $start = $lastupdate - 288*$rrdstep; # 288 here must be same as largest
                                            # RRA below
    RRDs::create($file,
                "--start", $start,
                "--step", $rrdstep,
                "DS:hits:COUNTER:".$rrdstep.":0:U",
                "RRA:AVERAGE:0.5:1:300",
                "RRA:AVERAGE:0.5:6:700",
                "RRA:AVERAGE:0.5:24:775",
                "RRA:AVERAGE:0.5:288:797"
                );

    my $err = RRDs::error;
    if ($err) {
        err("createRRD: $err");
        return -1;
    }
    
    #
    # Pre-populate from the start to $lastupdate (288 times)
    #
    my $tmp = $start;
    while ($tmp <= $lastupdate) {
        updateRRD($id, $tmp);
        $tmp += $rrdstep;
    }

    return 1;
}


sub updateRRD () {
    my ($id, $epoch) = @_;
    my $count        = $occurence{$id};
    my $rrdfile      = "$statedir/$id.rrd";
    my $err;
    my $last;

    if (! -f $rrdfile) {
        do_exit(1, "updateRRD: updating $rrdfile but it doesn't exist!");
    }

    #dbg("Update: $rrdfile at $epoch count $count");

    $last = RRDs::last($rrdfile);
    $err = RRDs::error;
    if ($err) {
        err("updateRRD: $err");
        return -1;
    }

    #
    # We sometimes get two hits in the same second. Check for that here
    # and basically ignore it.
    #
    if ($epoch > $last) {
        my $upd = $epoch . ":" . $count;
        RRDs::update($rrdfile, $upd);

        $err = RRDs::error;
        if ($err) {
            err("updateRRD: $err");
            err("Attempted to update $rrdfile at $epoch count $count");
            return -1;
        }
    }

    return 1;
}



sub dbg {
    print "$me: @_\n" if ($debug);
}

sub msg {
    print "$me: @_\n" if ($verbose);
}

sub err {
    print STDERR "$me: error: @_\n";
}


# ########################################################################
# main() program
# ########################################################################

init();
semlock();
loadState();

$eof = (stat $logfile)[7];

if ($eof < $spos) {
    #
    # The log file has rotated under us, so do the rotated logfile first.
    #
    msg("Logfile \"$logfile\" appears to have rotated");
    parseRotFile($logfile, $spos);
    $spos = 0; # reset to the start of the file
}

if ( $logfile =~ /\.gz$/ ) {
parseFilegz ($logfile, $spos);
} else {
parseFile ($logfile, $spos, $eof);
}
saveState();
semunlock();

