#!/usr/bin/perl -w

###############################################################################
#
# cmst (Nagios collector plugin)
#
# description: see printUsage()
#
# operation: Run the cmst file system I/O test and report results
#
my $version = "0.1";
#
# author: Dale Talcott/NASA Advanced SuperComputing Facility
# email: Dale.R.Talcott@nasa.gov
#
# please report any bugs or other issues to the author, thanks!
#
# changes since 0.0
#
###############################################################################


use File::Basename;
use lib dirname ( $0 );

# Karen McCann's perl magic for getting the full path of the directory
# containing this file at runtime
$0 = $ENV{_} unless -e $0;
delete $INC{'FindBin.pm'};  # NOTE: this call necessary for resolving @INC dir
                            # paths at Perl compile time.
use FindBin;
use lib "$FindBin::Bin/../Shared";
use FindBin qw($RealBin);

use lib $RealBin . "/lib";

use strict;

use File::Temp "tempfile";
use Getopt::Std;

my $nagiosVersion;
if ($RealBin =~ /nagios (\d [^\/]*)/x) {
    $nagiosVersion = $1;
}
else {
    $nagiosVersion = "unknown";
}

my $verStr = "Ver=" . $version . "_" . $nagiosVersion;

use constant
{
    TRUE                            => 1,
    FALSE                           => 0,

# return codes
    OK                              => 0,
    WARN                            => 1,
    CRITICAL                        => 2,
    UNKNOWN                         => 3,
};


my $stdoutStr = "";

my $useTmpFile = FALSE;
my $tmpFileNameTemplate = "/tmp/nagios-cmst.XXXXXXX";
my $tmpfileHandle;
my $tmpfileName;

our @cmstArgs = (
     '-B', '1048576'			# Buffer alignment
    ,'-b', '1'				# Stop after this many error records
    ,'-p', '1'				# Pass count
    ,'-P', '=='				# Prefix for performance msgs
    ,'-w', '20'				# Report only 20 errors per record
    # Added later: -l len, -n count, -f file
);

our $abortOnFirstReadError = 0;		# How to handle first read error
our $critThresh = 1.0;			# Critical threshold
our $warnThresh = 10.0;			# Warning threshold
our @extraOutput = ();			# Verbose msgs
our $cmstPath = $RealBin . '/lib/cmst';	# Path to cmst program
our $initialRun = 0;			# First write flag
our $verbose = 0;			# Verbosity level

our $outStream = *STDOUT;

Main();

exit(OK);




#--------------------------------------------------------------------------
#
sub Main
{
    my %commandArgs;

    my $testFile = '';
    my $rlen = 65536;

    # nagios sends SIGTERM (which can be handled) before sending SIGKILL (which
    # can't be handled). we handle SIGINT in case the user attempts to kill off
    # an execution of this program
    $SIG{'INT'}='sigHandler';
    $SIG{'TERM'}='sigHandler';

    getopts('ac:C:ef:hi:l:TvVw:', \%commandArgs);

    # parse commandline args
    {

	if ( exists($commandArgs{h}) ) {
	    printUsage();
	    exit(OK);
	}

	if ( exists($commandArgs{V}) ) {
	    print("version: $version\n");
	    exit(OK);
	}

	foreach my $argLabel ( keys(%commandArgs) ) {
	    # acts as a switch statement
	    for ( $argLabel ) {
	    	/^a$/	and do {
		    $abortOnFirstReadError = 1;
		};
	    	/^c$/	and do {
		    $critThresh = $commandArgs{$argLabel};
		    last;
		};
		/^C$/	and do {
		    $cmstPath = $commandArgs{$argLabel};
		    last;
		};
		/^e$/	and do {
		    $verbose = 2;
		};
		/^f$/	and do {
		    $testFile = $commandArgs{$argLabel};
		    last;
		};
		/^i$/	and do {
		    $initialRun = $commandArgs{$argLabel};
		};
		/^l$/	and do {
		    my $t = $commandArgs{$argLabel};
		    if ($t =~ m/(\d+)([kKmMbB]?)/ ) {
		    	$rlen = $1;
			my $scale = $2;
			if (defined($scale)) {
			    if ($scale eq 'k' || $scale eq 'K') {
			    	$rlen *= 1024;
			    } elsif ($scale eq 'm' || $scale eq 'M') {
			    	$rlen *= 1024 * 1024;
			    }
			}
		    }
		    last;
		};
		/^T$/	and do {
		    $useTmpFile = TRUE;
		    last;
		};
		/^v$/	and do {
		    $verbose++;
		    last;
		};
		/^w$/	and do {
		    $warnThresh = $commandArgs{$argLabel};
		    last;
		};
	    }
        }
    }
    if ($testFile eq '') {
	exitWithError(UNKNOWN, "no test file specified (-f)");
    }

    # open a temporary file
    if ( $useTmpFile ) {
	# eval is used to trap any exceptions that are raised
	eval {
	    ($tmpfileHandle, $tmpfileName) = tempfile($tmpFileNameTemplate);
	};

	# exit if a temp file was requested but we weren't able to open one
	if ( !defined($tmpfileHandle) ) {
	    print("$verStr|errorSummary=!could not create tempfile!\n");
	    exit(UNKNOWN);
	}

	$outStream = $tmpfileHandle;

	# print filename to stdout so nagios/etc can find the file
	$stdoutStr .= "$verStr|$tmpfileName\n";
	print($stdoutStr);
    }
    my $count;
    if ($initialRun) {
    	$count = $initialRun;
    } else {
	$count = &checkTestFile($testFile, $rlen);
    }
    &runCmst($testFile, $rlen, $count);
    # Not reached
}


#--------------------------------------------------------------------------
#
# checkTestFile($file, $len) - Check that test file exists correctly
# Entry:	$file = path to test file
#		$len = desired record length
# Returns:	number of records in file
#		Exits on error conditions

sub checkTestFile {
    my ($file, $len) = @_[0..1];

    my @sb = stat($file);
    if ($#sb < 12) {
    	exitWithError(UNKNOWN, "cannot stat test file $file", "$!");
    }
    my ($size, $blksize) = @sb[7,11];
    if ($size < $len) {
    	exitWithError(UNKNOWN, "test file too small",
		sprintf("%d < %d", $size, $len) );
    }
    if ($size % $len) {
    	exitWithError(UNKNOWN, "test file not multiple of record length",
		sprintf("%d %% %d = %d", $size, $len, $size % $len) );
    }
    if ($len % $blksize) {
        extraInfo("record length not multiple of file system block size",
		sprintf("%d %% %d != 0", $len, $blksize) );
    }
    return $size / $len;
}


#--------------------------------------------------------------------------
#
# runCmst($file, $len, $count) - Run cmst program
# Entry:	$file = path to test file
#		$len = desired record length
#		$count = desired record count
# Assumes:	@cmstArgs
#		$cmstPath
#		$critThresh, $warnThresh
#		$initialRun
#		$outStream
# Exit:		none: Calls exit() one way or the other

sub runCmst {
    my ($file, $len, $count) = @_[0..2];

    push @cmstArgs, '-l', $len;
    push @cmstArgs, '-n', $count;
    push @cmstArgs, '-f', $file;

    # Save away arg list without test selection
    my $args = join(' ', @cmstArgs);
    # Now, add option to run only random read test
    push @cmstArgs, '-t', 'rr';
    my $rrargs = join(' ', @cmstArgs);

    my $rc;
    my $rc2 = 0;
    my @results;
    if (!$initialRun) {
	# Run the random read first.  This detects corruption since
	# the last time we ran.
	if ($verbose > 1) {
	    print $outStream "Running cmst with ", $rrargs, "\n";
	}
	@results = `$cmstPath $rrargs 2>&1`;
	$rc2 = $?;
	map { chomp } @results;
	if ($rc2 != 0) {
	    # Errors of some kind
	    if ($abortOnFirstReadError) {
		exitWithError($rc2 >> 8, "Errors from cmst first read", @results, "$!");
	    }
	    extraInfo(@results);
	}
    }
    # Now do normal test
    if ($verbose > 1) {
	print $outStream "Running cmst with ", $args, "\n";
    }
    @results = `$cmstPath $args 2>&1`;
    $rc = $?;
    if ($rc == 0) { $rc = $rc2; }
    map { chomp } @results;
    if ($rc != 0) {
    	# Errors of some kind
	exitWithError($rc >> 8, "Errors from cmst", @results, "$!");
    }
    extraInfo(@results);
    # Normal run, extract performance numbers
    my ($rSpeed, $wSpeed);
    my $line;
    foreach $line ( @results ) {
    	if ($line =~ m/^==Pass.*sequential read:.* (\d+\.\d+)/) {
	    $rSpeed = $1;
	}
    	if ($line =~ m/^==Pass.*sequential write:.* (\d+\.\d+)/) {
	    $wSpeed = $1;
	}
    }
    my @msgs = ();
    $rc = OK;
    if (defined($rSpeed)) {
    	push @msgs, sprintf("read=%.1fMB;%.1f;%.1f", $rSpeed, $warnThresh, $critThresh);
	if ($rSpeed < $critThresh) {
	    $rc = CRITICAL;
	} elsif ($rSpeed < $warnThresh) {
	    $rc = max($rc, WARN);
	}
    }
    if (defined($wSpeed)) {
    	push @msgs, sprintf("write=%.1fMB;%.1f;%.1f", $wSpeed, $warnThresh, $critThresh);
	if ($wSpeed < $critThresh) {
	    $rc = CRITICAL;
	} elsif ($wSpeed < $warnThresh) {
	    $rc = max($rc, WARN);
	}
    }
    my $results = join(' ', @msgs);

    print $outStream "$verStr|$results\n";
    if ($verbose && $#extraOutput >= 0) {
    	print $outStream join("\n", @extraOutput, "");
    }
    exit($rc);
}


#--------------------------------------------------------------------------
#
# max( n, ... ) - return maximum value from list
# Entry:	called with non-empty list of numeric values
# Returns:	maximum value from list

sub max {
    my $rc = shift;
    foreach my $val ( @_ ) {
    	if ($val > $rc) {
	    $rc = $val;
	}
    }
    return $rc;
}


#--------------------------------------------------------------------------
#
# exitWithError(rc, msg, more) - Give up, after issuing standardized message
# Entry:	rc = desired return code (OK, WARN, CRITICAL, UNKNOWN)
#		msg = short error message
#		more = list of additional info about error
# Exit:		none, calls exit()

sub exitWithError {
    my $code = shift;
    my $msg = shift;
    print $outStream "$verStr|errorSummary=!${msg}!\n";
    my $line;
    foreach $line ( @extraOutput, @_ ) {
    	print $outStream $line, "\n";
    }
    exit($code);
}


#--------------------------------------------------------------------------
#
# extraInfo - squirrel away messages for later verbose output
# Entry:	list of messages to save
# Exit:		messages added to extraOutput array

sub extraInfo {
    if ($verbose) {
	push @extraOutput, @_;
    }
}


#--------------------------------------------------------------------------
#
# signal handler. when a registered signal is caught we make note of the
# signal in an error message. if we've opened a temp file we delete it
#
sub sigHandler
{
    my $sigName = $_[0];

    print("$verStr|errorSummary=!exiting with signal SIG$sigName!\n");

    deleteTempFile();

    exit(CRITICAL);
}


#--------------------------------------------------------------------------
#
# delete temp file if we've opened one
#
sub deleteTempFile
{
    if ( defined($tmpfileHandle) ) {
	unlink($tmpfileName);
    }
}


#--------------------------------------------------------------------------
#
# 
#
sub printUsage
{
    print << 'EOF';

This script ... checks for file system performance by running the cmst
program on a pre-defined file in the file system.

EOF

    print("Usage: $0 [-h] [-V] [-T] [-l blen] -f /path/to/test_file\n");
    print << 'EOF';

  -f	path to test file, which should already exist

  -h    this help text

  -l	block length for I/O in bytes (default 65536)

  -T    output to a tempfile

  -V    version

author: Dale Talcott / NASA / CSC  Dale.R.Talcott@nasa.gov
EOF
}
