#!/usr/bin/perl

# xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# Agent for moniroting crashes
# 
# Feb 2016, Jieming Wang
# 
# Copyright (c) 2016-2019 by Cisco Systems, Inc.
# All rights reserved.
# xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
#

use lib qw ( /pkg/opt/cisco/pam/ /opt/cisco/calvados/pam/ /opt/pam/ );

use pam;

use File::Basename;
use Getopt::Std;
use vars qw/ %opt /;
use File::Copy;
use strict;
use warnings;

#use inotify to improve efficiency:
use Linux::Inotify2;

sub usage {
    print STDERR "Usage: $0 <options>\n";
    exit(1);
}
sub update_crash_log($$$$$);

##########################################
# parameters
##########################################
my ($pam_root,
    $sys_info,
    $osType,
    $boardtype,
    $platform,
    $vf1_3073_ip,
    $version_info);

my ($localtime, $node_info);
my $pid_dir = "/opt/cisco/pam/run/";
my $pid_file = $pid_dir . "/pid";

my ($bucket, $event_type);
$bucket = "crash";
$event_type = $bucket;

getopts( "I:d", \%opt );

my $debug = $opt{d};
my $hostname = `uname -n`;
$hostname =~ s/[\r\n]//g;

my $my_pid = $$;
my $this = $0;
my $bname = $this;
$bname =~ s/.*\///g;

$sys_info = &getOsType();
$osType = $sys_info->{hostType};
$boardtype = $sys_info->{boardtype};
$platform = $sys_info->{platform};
#local eth-vf1.3073 IP address:
$vf1_3073_ip = $sys_info->{vf1_3073_ip};

my $mode = "exec";
if (!defined($osType)) {
    print "Unknown osType\n";
    exit(1);
}
if ($osType =~ /calvados|sysadmin/i) {
    $mode = "sysadmin";
}

$version_info = &getWS($osType);
my $buildDate = $version_info->{buildDate};
my $build_ymd = &localtime_to_yearmonthday($buildDate);

if ($boardtype !~ /R[S]?P|CC/i) {
    print "$0 can only run on RP.\n";
    exit(1);
}
########################################
#make sure only one instance running
########################################
my $ret = &check_process($bname, $sys_info, $my_pid);
if ((scalar(@$ret)) > 0) {
    print "process $bname (pid=@$ret) is already running.\n";
    exit(1);
}
my $log_dir = get_PamLogDir();
mkdir $log_dir if (! -d $log_dir);
$pam_root = $log_dir;
my $user_edcd_dir = "/opt/pam/etc/";
my @pam_event_db_files=('/opt/cisco/calvados/pam/pam_event.json');
if ($osType =~ /xr/i) {
    @pam_event_db_files=('/pkg/opt/cisco/pam/pam_event.json');
}
my $edcd_info = get_user_pattern_files($user_edcd_dir);
my @user_event_files = @{$edcd_info->{event_files}};
@pam_event_db_files=(@pam_event_db_files, @user_event_files);
my $show_tech_root = "/misc/disk1/showtech/";

#######################################################
#core watch folder: /misc/disk1
#######################################################
my $misc_disk1 = "/misc/disk1";
my $misc_disk1_dumper = $misc_disk1 . "/dumper";

my $pam_inotify = new Linux::Inotify2 or 
                die "Unable to create new inotify object: $!";
foreach my $dir ($misc_disk1, $misc_disk1_dumper) {
    mkdir $dir if ( ! -d $dir );
    if (! $pam_inotify->watch ($dir, IN_CLOSE_WRITE) ) {
        print "watch creation for '$dir' failed: $!" ;
        exit(1);
    }
}
my @coreFolders = ();
push @coreFolders, "/misc/disk1" if ( -d "/misc/disk1" );

my $ctrace_info;
if ( $osType =~ /calvados|sysadmin|xr/i ) {
    $ctrace_info = get_ctrace_processes();
}

#Set LD_LIBRARY_PATH to make sure it will not use non-system libray
if ( $osType =~ /xr/i) {
    $ENV{LD_LIBRARY_PATH} = "/pkg/lib:/pkg/lib/cerrno:";
    $ENV{LD_LIBRARY_PATH} .= "/pkg/lib/mib:/pkg/lib/spp_plugins";
} elsif ( $osType =~ /calvados|sysadmin/i) {
    $ENV{LD_LIBRARY_PATH} = "";
}
my (@oldCrashes, @newCrashes);
my $crashFile = $log_dir . "/" . "crashList.txt";

# check /opt/cisco/pam/etc/build_ip.txt exists,
# if yes, then try mount from build server directly

my ($ddts_smu_info, $show_install_info);
$show_install_info = &get_ddts_from_show_install($sys_info);
my @ddts_list = @{$show_install_info->{ddtsList}};

my $t1 = time(); 

umask 0000;
my $crashMsg = "";

my ($initial_crash_data, $wsfd);

#now update
if (!open($wsfd, ">>$crashFile")) {
    print "Failed to write to $crashFile: $!\n";
    exit 0;
}
close($wsfd);

#use /opt/cisco/pam/ for logging which will be archived/reotated
my $pam_log_dir = "/opt/cisco/pam/";
mkdir "/opt/cisco/" if (! -d "/opt/cisco/");
mkdir $pam_log_dir if (! -d $pam_log_dir);
my $crash_log = $pam_log_dir . "/crash.log";
@oldCrashes = getOldInstances($crash_log);
my ($crash_fd);

my $isGlobalSkippedProcs;
my @_skippedProcSignals = ();
my %isProcSeen;
foreach my $line (@oldCrashes) {
    my ($node, $proc, $signal, $time) = split(/,/, $line);
    $proc =~ s/\s+//;
    $signal =~ s/\s+//;
     my $proc_signal = $proc . ":" . $signal;
    if (!$isProcSeen{$proc_signal}) {
        $isProcSeen{$proc_signal} = 1;
        push @_skippedProcSignals, $proc_signal;
    }
}
$isGlobalSkippedProcs->{skippedProcSignals} = \@_skippedProcSignals;

$SIG{'TERM'} = 'INT_handler';

my $crash_info;
my ($crashCount, $crashTracking);
my $version_content = &getVersionContent("/");
my @pending_ctimes = ();
my $max_xr_block_time = 120;

my %isFound;
my %isTxtSeen;

my $chassis_id = get_chassis_id($sys_info);
if ($chassis_id !~ /^\d+$/) {
    print "Invalid chassis ID: $chassis_id (expect integer).\n";
    exit(1);
}

#in EDCD, there are 2 type of commands: commands, and shell_commands
#take shell command for XR (for now)
my $cmd_type = "shell_commands";
if ((($osType =~ /xr/i) && (-f "/pkg/bin/xr_cli")) ||
     ($osType =~ /calvados|sysadmin/i)) {
    $cmd_type = "commands";
}
my $cmd_info = get_default_commands($event_type, $osType);
my @default_commands = @{$cmd_info->{$cmd_type}->{allCommands}};
#
my $min_free_threshold = 307200; #300MB:
my $max_threshold = 30000; #will stop if memory>30MB
my $memfree = get_memfree();
#Only start when there is enough memory to prevent exhausting memory
if ($memfree < $min_free_threshold) {
    #free cache on calvados in case its too low:
    if ($osType =~ /calvados/) {
        `sysctl vm.drop_caches=3`;
    }
    $memfree = get_memfree();
}
if ($memfree < $min_free_threshold) {
    sleep 60;
    unlink $pid_file if (-f $pid_file);
    exit;
}

my $showtech_cnt = 0;
unless (fork) {
    $my_pid = $$;
    while (1) {
        my @saved_log_files = ();
        #Only run on the RP
        my $uptime = &getUptime;
        $memfree = get_memfree();
        if ($memfree < $min_free_threshold) {
            #free cache on calvados in case its too low:
            if ($osType =~ /calvados/) {
                `sysctl vm.drop_caches=3`;
            }
            $memfree = get_memfree();
        }
        if ($memfree < $min_free_threshold) {
            #restart to free memory (as perl cannot free)
            sleep 60;
            unlink $pid_file if (-f $pid_file);
            exit;
        }
        my $total_rss = get_process_total_memory($my_pid);
        if ($total_rss >= $max_threshold) {
            #restart to free memory (as perl cannot free)
            unlink $pid_file if (-f $pid_file);
            print "Memory exceeds limit. Restart to free memory.\n" if ($debug);
            exit;
        }
        #Skip if harddisk is missing
        if ($log_dir !~ /^\s*[\/]*(harddisk:|misc\/+disk1)/) {
            sleep 60;
            next;
        }

        print "starting watching....\n" if ($debug);
        my $crash_data;
        my (@cores, @newCrashList, @fullcores);

        my $ctime = time();
        #INOTIFY_EVENTS:
        my @events = $pam_inotify->read;
        unless (@events > 0) {
            print "read error: $!";
            last;
        }
        my %isSeen;
        foreach my $e (@events) {
            print "New file closes - processing ....\n";
            my $core = $e->fullname;

            #ignore user generated core
            next if ( $core =~ /\d+\.by[\._]user\.\d+/ );
            next if ($core !~ /core\.gz$/);
            print "Core name is $core\n" if ($debug);
            my $base_core_name = basename($core);

            if (!$isSeen{$base_core_name}) {
                push @newCrashList, $base_core_name;
                #not really but close enough
                $crash_data->{$base_core_name}->{ctime} = $ctime;
                $crash_data->{$base_core_name}->{fullname} = $core;
                push @fullcores, $core;
                $isSeen{$base_core_name} = 1;
            }
        } ;#foreach my $e (@events)
        print "Done with watch\n" if ($debug);

        next if ( scalar(@newCrashList) < 1 );
        if (@newCrashList) {
            if (!open($wsfd, ">>$crashFile")) {
                print "Failed to write to $crashFile: $!\n";
                exit 0;
            }
        }

        my @executed_commands = ();
        my @uniqueCrashProcs = ();
        my $show_tech_executed = 0;
        my @_fullcores = @fullcores;
        my @_skippedProcSignals = ();

        while (@newCrashList) {
            my $crash = pop (@newCrashList);
            push @oldCrashes, $crash;
            my $fullCrashName = pop (@fullcores);

            if ( defined($crash_data->{$crash}->{ctime}) ) {
                print $wsfd $crash, " ", $crash_data->{$crash}->{ctime}, "\n";
            } else {
                print $wsfd $crash, "\n";
            }
            my $procName = &getCrashProcName($crash);
            my $signal = &getCrashSignal($crash);
            $crashTracking->{$procName}->{count}++;
            $crashTracking->{$procName}->{fullCrashNames} .= "$fullCrashName,";
            #$crash_data->{$base_core_name}->{fullname} = $core;
            #Ignore repeated crashes!!!

            #Ignore known crashes - for the same type/signal
            my $proc_signal = $procName . ":" . $signal;
            if ((defined($isGlobalSkippedProcs->{skippedProcSignals})) &&
                (grep(/\b$proc_signal\b/, @{$isGlobalSkippedProcs->{skippedProcSignals}}))) {
                print "$procName core has been seen before .. ignore\n" if ($debug);
                next;
            }
            push @_skippedProcSignals, $proc_signal;

            my $node = get_node_name_from_core($crash);
            #TOOO - HACK ..
            $node = "0_RP0_CPU0" if ($sys_info->{is_thinxr});
            if ($node !~ /\w+/) {
                 my $msg = "Unable to get node name for $crash";
                 &pam_logger($sys_info, $log_dir, $bucket, $msg);
                 next;
            }

            if ( ! $isFound{$node}{$procName} ) {
                push @uniqueCrashProcs, $procName;
                $isFound{$node}{$procName} = 1;

                my $_boardtype = "rp";
                if ($crash =~ /R[S]?P/i) {
                    $_boardtype = "rp";
                } elsif ($crash =~ /B\d+\/CB\d+/i) {
                    $_boardtype = "rp";
                } else {
                    $_boardtype = "lc";
                }
                $showtech_cnt = 0;
                ##########################################
                #get default (generic) commands:
                ##########################################
                my %isCmdSeen;
                my @allCommands = ();
                foreach my $cmd_mode (@default_commands) {
                    if (!$isCmdSeen{$cmd_mode}) {
                        push @allCommands, $cmd_mode;
                    }
                }
                my $cmd_info = get_edcd_event_commands($platform,
                                                       $osType,
                                                       $event_type,
                                                       $procName,
                                                       \@pam_event_db_files);
                my $_commandList = $cmd_info->{$cmd_type}->{commandList};
                foreach my $cmd_mode (@$_commandList) {
                    if (!$isCmdSeen{$cmd_mode}) {
                        push @allCommands, $cmd_mode;
                    }
                }

                my $junk;
                foreach my $cmd_mode (@allCommands) {
                    my ($cmd, $junk) = split(/,/, $cmd_mode);
                    #TODO -- check actually executed command:
                    # i.e., cli_info->{show_tech_executed}
                    if ($cmd =~ /show.*tech/i) {
                        $show_tech_executed++;
                        $showtech_cnt++;
                        sleep 1;
                    }
                    my $_pid = "";
                    #TOOO - HACK ..
                    $node = "0_RP0_CPU0" if ($sys_info->{is_thinxr});
                    my $cli_info = cli_agent_shell($sys_info,
                                                   $event_type,
                                                   $procName,
                                                   $_pid,
                                                   $cmd,
                                                   $node);
                    if ($cli_info->{rc} ne 1) {
                        my $msg = $cli_info->{msg};
                        &pam_logger($sys_info, $log_dir, $bucket, $msg);
                    } else {
                        push @executed_commands, $cli_info->{cmd};
                        print "111 cmd=", $cli_info->{cmd}, "\n" if ($debug);
                        my $log = "";
                        foreach my $_log (@{$cli_info->{log_files}}) {
                            $log .= $_log . ",";
                        }
                        $log =~ s/,\s*$//;
                        push @saved_log_files, $log;
                        #track the txt file if existing:
                        my $core_txt = $fullCrashName;
                        $core_txt =~ s/\.gz/\.txt/;
                        if ( (!$isTxtSeen{$core_txt})&& (-f $core_txt) ) {
                            $isTxtSeen{$core_txt} = 1;
                            push @saved_log_files, $core_txt;
                        }
                    }
                } ;# foreach my $cmd_mode (@allCommands)

                #1) show logging;
                #2) show trace;
                #3) user defined CLI (JSON format)
                ####################################################
                #create log to report
                ####################################################
                my $summary_log=&save_process_crash_summary($log_dir,
                                                            \@executed_commands,
                                                            \@saved_log_files,
                                                            $show_tech_executed,
                                                            $show_tech_root,
                                                            \@_fullcores);
                push @saved_log_files, $summary_log if ($summary_log);
                my @retained_logs = ();
                foreach my $_core (@_fullcores) {
                    if (!grep(/$_core/, @saved_log_files) ) {
                        push @saved_log_files, $_core;
                    }
                    if (!grep(/$_core/, @retained_logs) ) {
                        push @retained_logs, $_core;
                    }
                    my $core_txt = $_core;
                    $core_txt =~ s/\.gz$/\.txt/;
                    if ((-f $core_txt) &&
                           (!grep(/$core_txt/, @retained_logs))) {
                        push @retained_logs, $core_txt;
                    }
                }
                if ( @saved_log_files ) {
                    my $delete_original = 1;
                    my $pr_info;
                    $pr_info->{procName} = $procName;
                    $pr_info->{node} = $node;
                    $pr_info->{event_type} = $event_type;
                    $pr_info->{delete_original} = $delete_original;
                    $pr_info->{showtech_cnt} = $showtech_cnt;
                    my $tar_name = &create_log_archive($log_dir,
                                                       \@saved_log_files,
                                                       \@retained_logs,
                                                       $pr_info);
                    &update_crash_log($crash_log, $procName, $node, $signal, $crash_fd);
                }
            } ;# if ( ! $isFound{$procName} )
        } ;#while (@newCrashList)
        my @_tmpProcSignals = ();
        foreach my $proc_signal (@{$isGlobalSkippedProcs->{skippedProcSignals}},
                              @_skippedProcSignals) {
            push @_tmpProcSignals, $proc_signal;
        }
        $isGlobalSkippedProcs->{skippedProcSignals} = \@_tmpProcSignals;
        close($wsfd);
    } ;# while (1)
    print "Fail to fork\n";
} ;# unless (fork) {

sub INT_handler {
    print "KILL signal received. Closing all files.\n";
    #close($wsfd);
    #close($crash_fd);
    exit(0);
}

sub update_crash_log($$$$$) {
    my $logFile = shift;
    my $proc    = shift;
    my $node    = shift;
    my $signal  = shift;
    my $_fd     = shift;

    print "111 logFile=$logFile\n";
    print "111 proc=$proc\n";
    print "111 node=$node\n";
    print "111 signal=$signal\n";

    my $rt = 0;
    if (!open($_fd, ">>$logFile")) {
        print "Failed to open $logFile: $!\n";
        return $rt;
    }
    my $timestamp = time();
    my $output = $node . "," . $proc . "," . $signal . "," . $timestamp;
    print $_fd $output, "\n";
    close($_fd);
    return 1;
}

