#!/usr/local/bin/perl5 # # mw.pl - mailwatch... uhh, watches mail, what else? # see http://www.homeport.org/~shevett/mailwatch for details. # # NOTE - i'm nutty for tabs set to 2 spaces. deal. # # 2/12/97 des - fixed formatting error in top 'posts by day' line, # as well as problems with calculating the averatge # posts per day (it was doing 'em calculated total/6. eek!) # 3/31/96 des - Major changes. There was a bug in the initial scan # that may result in miscounted number of posts, due to # overwriting an element in a hash. Fixed. Reformatted # some stuff in the reports, and added a "accounting # for" calculation in the 'top' sections. # 2/19/96 des - zounds. trailing spaces in the subject lines showed # up as different entries in the listing. fixed. # 1/19/96 des - changed some problems with very low usage through # the logs. show interval in 'top 10' lists. # 1/11/96 des - revamped distribution, fixed average problem, # added 'period' to limit how much data to parse # 1/6/96 des - hourly distribution was showing '10's. fixed. # 1/3/96 des - unleashed upon the world. require "timelocal.pl"; require "getopts.pl"; require "ctime.pl"; #-------------------------------------------------------------------- # some variables # $version="0.34"; # Version $logfile="mw.log"; # Where to log all this fun stuff $testing=0; # Set to 1 to print to STDOUT when running (no log) $threshold=0; # Stop summaries when down to what value? (0 = all) $plimit=10; # maximum number of lines to print per summary $maxdays=5; # How many days to summarize posts? $period=604800; # How old are the oldest reported posts? (seconds) # $period=99999999; # How old are the oldest reported posts? (seconds) #-------------------------------------------------------------------- # some constants # $Months="JanFebMarAprMayJunJulAugSepOctNovDec"; #-------------------------------------------------------------------- # u s a g e - show da bums how ta do it. # sub usage { print STDERR "Usage: $0 -[is]\n"; print STDERR " i take input from stdin for a new entry.\n"; print STDERR " s summarize current data.\n"; exit(1); } #-------------------------------------------------------------------- # c a l c m i d n i g h t - Calculate midnight and return the time # integer representing the midnight for the date supplied. # sub calcmidnight { my($target)=(@_); my($m,$d,$y,$nmon); ($m,$d) = (&ctime($target) =~ /\w\w\w\s+(\w+)\s+(\d+)\s+\d+:\d+:\d+ /); ($y) = (&ctime(time) =~ /(\d\d\d\d)/); $nmon=(index($Months,$m) / 3); $y=$y-1900; print "Parsed ".&ctime(time)." into $nmon / $d / $y \n" if ($testing); $midnight=&timelocal("01","00","00",$d,$nmon,$y); return ($midnight); } #-------------------------------------------------------------------- # s n a r f i n p u t -- slurps up stdin, chews it for a bit, and spits # it out into the log file in the appropriate form. # sub snarfinput { while (<>) { print "< $_" if ($testing); chop; if ( /^From / && ! $rfline ) { ($rfline) = ($_ =~ /(\w\w\w\s+\d+ \d+:\d+:\d+ \d\d\d\d)/) ; next; } if ( /^From: / && ! $fline ) { ($fline) = ($_ =~ /From: (.*)/) ; next; } if ( /^Subject: / && ! $sline ) { ($sline) = ($_ =~ /: (.*)/) ; next; } if ( /^Date: / && ! $dline ) { ($dline) = ($_ =~ /: (.*)/) ; next; } if ( /^$/ ) { $lcount=0; while (<>) { $lcount++; $inccount++ if (/^(:|>)/) ; } } } # # parse up date posted line... # Could be any one of: # Sat, 30 Dec 1995 00:33:29 -0500 (EST) # Fri, 22 Sep 1995 13:26 -0500 (EST) # Thu, 9 Nov 1995 14:50:54 -0400 (GMT-0400) # Sat, 30 Dec 95 0:05:27 EST # Tue, 03 Oct 1995 13:52:58 -0400 # Wed, 4 Oct 1995 10:03:20 -0500 # Thursday,November 02,1995 5:37PM # Thursday, November 02, 1995 3:22PM # 95-12-05 21:58:06 EST # 24 Dec 1995 22:28:22 EST # 16 Oct 95 23:58:57 EDT # print "Posted: $dline\n" if ($testing); ($_day,$_tmon,$_year,$_hour,$_min,$_sec) = ($dline =~ /(\d+)\s+(\w+)\s+(\d+)\s+(\d+):(\d+)((:\d+| ))/); $nmon=(index($Months,$_tmon) / 3); $_year=$_year-1900 if ($_year > 1900) ; print "dline: s: $_sec, m: $_min, h: $_hour, d: $_day, nm: $nmon, y: $_year\n" if ($testing); $postdate=&timelocal($_sec,$_min,$_hour,$_day,$nmon,$_year) ; # # parse up date received line... (Aug 25 13:05:53 1995) # Dec 1 12:41:18 1995 # ($_tmon,$_day,$_hour,$_min,$_sec,$_year) = ($rfline =~ /(\w\w\w)\s+(\d+)\s+(\d+):(\d+)(:\d+) (\d\d\d\d)/); $_sec=0 if (! $_sec); $nmon=(index($Months,$_tmon) / 3); $_year=$_year-1900; print "rfline: s: $_sec, m: $_min, h: $_hour, d: $_day, nm: $nmon, y: $_year\n" if ($testing); $recdate=&timelocal($_sec,$_min,$_hour,$_day,$nmon,$_year); print "opening logfile $logfile\n" if ($testing); open(LOG,">>$logfile") || die "Cannot open $logfile: $!\n"; print "writing\n" if ($testing); print LOG "$postdate:$recdate:$lcount:$inccount:$fline:$sline\n"; close(LOG); print "written! \n" if ($testing); } sub numeric { $a <=> $b } #-------------------------------------------------------------------- # g e n h i s t o r y - figger out postings for the last coupla # days, number per day. # sub genhistory { my($_dline,$_vline,$m,$d,$y); $index=0; $hcount=0; $pcounter=0; $midnight=&calcmidnight(time); ($w,$m,$d) = (&ctime($midnight) =~ /(\w\w\w)\s+(\w+)\s+(\d+)\s+\d+:\d+:\d+ /); $_dline=sprintf("%3s %2d ",$m,$d); $_vline=""; for $i (reverse(sort keys %pdates)) { if ($i < $midnight) { $_vline="${_vline}".sprintf(" %-3.0f | ",$pcounter); last if ($index == 7); $midnight=$midnight - 86400; $pcounter=1; $hcount++; $index++; ($w,$m,$d) = (&ctime($midnight) =~ /(\w\w\w)\s+(\w+)\s+(\d+)\s+\d+:\d+:\d+ /); $_dline="$_dline|".sprintf(" %3s %2d ",$m,$d); } else { $pcounter++; $hcount++; } } $_vline="${_vline}".sprintf(" %-3.0f",$pcounter); print "\nBreakdown by day: ($hcount posts, average of "; print sprintf("%3.1f",($hcount / 7)); print " posts per day.)\n"; print "----------------------------------------------------------------------\n"; print "\t$_wline\n"; print "\t$_dline\n"; print "\t$_vline\n\n"; } #-------------------------------------------------------------------- # g e n d i s t - figger out the distribution of posting times # sub gendist { my($output,$i,$hour,@dist); for $i (0..23) {@dist[$i]=0}; for $i (keys %pdates) { ($hour) = ($pdates{$i} =~ /\w\w\w\s+\w+\s+\d+\s+(\d+):\d+:\d+ /); @dist[$hour]++; } $output=""; for $i (0..23) { $dstring=sprintf("%2s",@dist[$i]); $out1="$out1 ".substr($dstring,0,1); $out2="$out2 ".substr($dstring,1,1); } print "\t$out1\n"; print "\t$out2\n"; print "\t|---------- AM ---------|---------- PM ---------|\n\n"; } #-------------------------------------------------------------------- # s u m m a r i z e - give em da woiks. # sub summarize { local($pdate,$rdate,$poster,$subject); local($count,$posters,%pdates,%posters); $cutoff=calcmidnight(time - $period + 86400); %plist={}; $posterc=0; %slist={}; $subjectc=0; print "Cutoff is $cutoff ($period seconds ago).\n" if ($testing); open(INP,"$logfile") || die "summarize: error opening $logfile: $!\n"; while() { chop; ($pdate,$rdate,$lines,$inclines,$poster,$subject)=split(":",$_,6); if ($pdate < $cutoff) { print "skipped $pdate by $poster\n" if ($testing); next; } if (! $plist{$poster}) { $posterc++; $plist{$poster}=$poster; } if (! $slist{$subject}) { $subjectc++; $slist{$subject}=$subject } print "processing $pdate...\n" if ($testing); $count++; chop($odate=$pdate>$rdate ? &ctime($pdate) : &ctime($rdate)) if (! "$odate"); $posters{$poster}=$posters{$poster} + 1; print "$posters{$poster} - $poster \n" if ($testing); $subject=~s/^\[[^:]*: (.*)]$/$1/; $subject=~s/^Re: (.*)$/$1/; $subject=~s/^(.*)\s+$/$1/; $sublist{$subject}++; if ($pdates{$pdate}) { print "exists! $poster, $subject\n" if ($testing); $pdate="${pdate}.1"; } $pdates{$pdate}=&ctime($pdate); } chop($ndate=$pdate > $rdate ? &ctime($pdate) : &ctime($rdate)); close(INP); $interval=sprintf("%2.2f",($period / 86400)); chop($repinv=ctime($cutoff)); print "Elbows Traffic Report ".&ctime(time); print "======================================================================\n"; print "Report interval ------: Since $repinv ($interval days)\n"; print "Oldest post ----------: $odate\n"; print "Most recent post -----: $ndate \n"; print "Total posts ----------: $count\n"; print "Total unique posters -: $posterc\n"; print "Total unique subjects : $subjectc\n"; print "======================================================================\n"; &genhistory(); for $i (sort keys %posters) { $perc = sprintf("%3.1f",($posters{$i} / $count * 100)); push(@parray,"$posters{$i} ($perc %)\t$i\n"); } $pcounter=0; $topcount = 0; for $i (reverse(sort numeric @parray)) { ($num,$text)=split("\t",$i); last if (($num == $threshold) && ($threshold)); last if ($pcounter == $plimit); @outarray[$pcounter++]=$i; $topcount=$topcount+$num; } $ptext=sprintf("%3.1f",($topcount / $count) * 100); print "Top $plimit Posters (Representing ${ptext}% of the total traffic.)\n"; print "----------------------------------------------------------------------\n"; for $i (@outarray) { print "\t$i"; } @parray=(); for $i (sort keys %sublist) { $perc = sprintf("%3.1f",($sublist{$i} / $count * 100)); push(@sarray,"$sublist{$i} ($perc %)\t$i\n"); } $pcounter=0; $topcount=0; for $i (reverse(sort numeric @sarray)) { ($num,$text)=split("\t",$i); last if (($num == $threshold) && ($threshold)); last if ($pcounter == $plimit); @outarray[$pcounter++]=$i; $topcount=$topcount+$num; } $ptext=sprintf("%3.1f",($topcount / $count) * 100); print "\nTop $plimit subjects (Representing ${ptext}% of the total traffic.)\n"; print "----------------------------------------------------------------------\n"; for $i (@outarray) { print "\t$i"; } print "\nHourly distribution of postings: \n"; print "----------------------------------------------------------------------\n"; &gendist(); print "======================================================================\n"; print "MailWatch v$version by Shayde. http://www.homeport.org/~shevett\n"; } #-------------------------------------------------------------------- # m a i n - the naughty bits... # &Getopts('si'); if ("$ARGV[0]" eq "s") { &summarize; } elsif ($ARGC == 0) { &snarfinput; }