#!/usr/bin/perl -w ########################################################################## # $Id: weblog.pl,v 0.03 2003/01/10 Lars Exp $ ########################################################################## # Version History # 0.01 - Basic reporting of page hits, visitors, domains and 404 errors # 0.02 - Addition of total hit counts and daily averages. # 0.03 - Added referer stats ########################################################################## use Getopt::Long; my $Version = '0.03'; my $VDate = '01/10/03'; $logdir = "c:\\winnt\\system32\\logfiles\\w3svc1"; $TotalBytes = 0; @DailyBytes = (); @HourlyBytes = (); $StartTime = localtime; # Need to parse all files in the log directory opendir(LOGS, $logdir) || die ("unable to open log directory"); @logfiles = grep !/^\.\.?$/, readdir(LOGS); closedir(LOGS); $DayCounter=0; $ViewCounter=0; $HitCounter=0; $OKHitCounter=0; # sort the list to get the logfiles in chronological order. foreach $file (sort @logfiles) { # we know the file exists, so, let's open and read it! $filename = $logdir . "/" . $file; $DayCounter++; # print ("$filename\n"); if (open(LOG, $filename)) { $line = ; while ($line) { # only grab the lines with dates and times. if ($line =~ m/^....-..-.. ..:..:../) { # if it's a line, it's a hit! $HitCounter++; # Treat the line as a space-delimited line, and split it up! @entry = split (/ /, $line); # print ("$entry[0], $entry[1]\n"); # print $line; #Extract date and time into seperate values #@Date = split (/-/, $entry[0]); @Time = split (/:/, $entry[1]); if ($entry[5] == "200") { # Regular OK message, collect all sorts of info! # count up total number of bytes for this run $TotalBytes += $entry[6]; $DailyBytes{$entry[0]} += $entry[6]; $HourlyBytes{$Time[0]} += $entry[6]; # Page Count! $PageCount{$entry[4]}++; #Increase HitCounter, if it's a web page if ($entry[4] =~ m/html|HTML/) { $ViewCounter++; } $OKHitCounter++; # Collect who is visiting $Visitor{$entry[2]}++; # Collect Domain names! @FQDN = split (/\./, $entry[2]); $lenght = @FQDN; $temp = $FQDN[$lenght-2] . "." . $FQDN[$lenght-1]; $Domain{$temp}++; # Collect Referer info! $temp = $entry[11]; chop($temp); # print "$temp\n"; if ($temp =~ m/hansenonline.net/) { } else { $Referer{$temp}++; } } elsif ($entry[5] == "404") { # Attempt to load page that doesn't exist # Collect hosts and page attempted to load $e404Page{$entry[4]}++; $e404Host{$entry[2]}++; } else { $MessageNum{$entry[5]}++; } } $line = ; } } } $EndTime = localtime; print "\n Complete Report\n"; print " ===============\n"; print " Report started : $StartTime\n"; print " Report ended : $EndTime\n"; printf (" Hits : %12d\n", $HitCounter); printf (" Hits (successful) : %12d\n", $OKHitCounter); printf (" Hits daily average : %12d\n", $HitCounter/$DayCounter); printf (" Page views total : %12d\n", $ViewCounter); printf (" Page views daily average : %12d\n", $ViewCounter/$DayCounter); printf (" Total Bytes: : %12d\n", $TotalBytes); printf (" Bytes daily avergage : %12d\n", $TotalBytes/$DayCounter); if (keys %DailyBytes) { printf ("\n Bytes transferred by Date.\n"); printf ("==========================================\n"); printf (" Date Bytes\n"); foreach $ThisOne (sort(keys %DailyBytes)) { printf (" %-30s %9d\n", $ThisOne, $DailyBytes{$ThisOne}); } printf ("==========================================\n"); } if (keys %HourlyBytes) { printf ("\n Bytes transferred by Hour.\n"); printf ("==========================================\n"); printf (" Hour Bytes\n"); foreach $ThisOne (sort(keys %HourlyBytes)) { printf (" %-30s %9d\n", $ThisOne, $HourlyBytes{$ThisOne}); } printf ("==========================================\n"); } if (keys %PageCount) { printf ("\nMost popular pages\n"); printf ("=========================================================================\n"); printf (" Page Hits\n"); $count = 0; foreach $ThisOne (sort {$PageCount{$b} <=> $PageCount{$a} } keys %PageCount) { if ($count < 50) { if ($ThisOne =~ m/html$/) { $count++; printf (" %-60s %9d\n", $ThisOne, $PageCount{$ThisOne}); } } } printf ("=========================================================================\n"); } if (keys %PageCount) { printf ("\nLeast popular pages\n"); printf ("=========================================================================\n"); printf (" Page Hits\n"); $count = 0; foreach $ThisOne (sort {$PageCount{$a} <=> $PageCount{$b} } keys %PageCount) { if ($count < 50) { if ($ThisOne =~ m/html$/) { $count++; printf (" %-60s %9d\n", $ThisOne, $PageCount{$ThisOne}); } } } printf ("=========================================================================\n"); } if (keys %Visitor) { printf ("\n Top 50 Visiting hosts\n"); printf ("=========================================================================\n"); printf (" Host Times\n"); $count = 0; foreach $ThisOne (sort {$Visitor{$b} <=> $Visitor{$a} } keys %Visitor) { if ($count < 50) { $count++; printf (" %-60s %9d\n", $ThisOne, $Visitor{$ThisOne}); } } printf ("=========================================================================\n"); } if (keys %Domain) { printf ("\n Top 50 Visiting Domains\n"); printf ("==========================================\n"); printf (" Domain Times\n"); $count = 0; foreach $ThisOne (sort {$Domain{$b} <=> $Domain{$a} } keys %Domain) { if ($count < 50) { if ($ThisOne =~ m/[a-zA-Z]/) { $count++; printf (" %-30s %9d\n", $ThisOne, $Domain{$ThisOne}); } } } printf ("==========================================\n"); } printf ("\n Page Statistics\n"); if (keys %PageCount) { printf ("\nHits by Page\n"); printf ("=========================================================================\n"); printf (" Page Hits\n"); $count = 0; foreach $ThisOne (sort %PageCount) { if ($count < 50) { if ($ThisOne =~ m/html$/) { # $count++; printf (" %-60s %9d\n", $ThisOne, $PageCount{$ThisOne}); } } } printf ("==========================================\n"); } if (keys %Referer) { printf ("\n Referer information\n"); $count = 0; foreach $ThisOne (sort {$Referer{$b} <=> $Referer{$a} } keys %Referer) { $temp = $ThisOne; chop($temp); if ($count < 50) { $count++; printf (" %-70s %9d \n", $temp, $Referer{$ThisOne}) ; } } } printf ("\n Error reports\n"); if (keys %e404Page) { printf ("\n Top 50 pages not found\n"); printf ("==================================================================================\n"); printf (" Page Times\n"); $count = 0; foreach $ThisOne (sort {$e404Page{$b} <=> $e404Page{$a}} keys %e404Page) { if ($count < 50) { $count++; printf (" %-70s %9d\n", $ThisOne, $e404Page{$ThisOne}); } } printf ("==================================================================================\n"); } if (keys %e404Host) { printf ("\n Top 50 hosts generating 404 errors\n"); printf ("==================================================================================\n"); printf (" Host Times\n"); $count = 0; foreach $ThisOne (sort {$e404Host{$b} <=> $e404Host{$a}} keys %e404Host) { if ($count < 50) { $count++; printf (" %-70s %9d\n", $ThisOne, $e404Host{$ThisOne}); } } printf ("==================================================================================\n"); } if (keys %MessageNum) { printf("\n Message Numbers not handled\n"); printf ("==========================================\n"); printf (" Message number Times\n"); foreach $ThisOne (sort {$MessageNum{$b} <=> $MessageNum{$a}} keys %MessageNum) { printf (" %-30s %9d\n", $ThisOne, $MessageNum{$ThisOne}); } printf ("==========================================\n"); }