#!/usr/bin/perl

# xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# Agent for moniroting crashes
#
# Jan 2017, Jieming Wang
#
# Copyright (c) 2016-2019 by Cisco Systems, Inc.
# All rights reserved.
# xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

use lib qw ( /pkg/opt/cisco/pam/ /opt/cisco/calvados/pam/ /opt/pam/ );
use pam;

use warnings;
use strict;
use Getopt::Std;
use vars qw/ %opt /;
use Date::Calc qw(:all);

sub get_total_timestamps($);

my $wday = '(Mon|Tue|Wed|Thu|Fri|Sat|Sun)';
my $month = '(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)';
my $month_map = {
     'Jan' => 1,
     'Feb' => 2,
     'Mar' => 3,
     'Apr' => 4,
     'May' => 5,
     'Jun' => 6,
     'Jul' => 7,
     'Aug' => 8,
     'Sep' => 9,
     'Oct' => 10,
     'Nov' => 11,
     'Dec' => 12,
};

use Date::Calc qw(:all);

#convert spelled months to numerical
sub convert_to_wall_time($) {
    my $timestamp = shift;

    my ($year, $mon, $day, $time) = split(/\//, $timestamp);
    my ($hour, $min, $sec) = split(/:/, $time);
    $mon = $$month_map{$mon};
    $mon = length($mon) < 2 ? "0${mon}" : $mon;
    if ( ($year < 2010) || ($year > 2037)) {
        return 0;
    }
    if ( ($mon < 1) || ($mon > 12)) {
        return 0;
    }
    if ( ($day < 1) || ($day > 31)) {
        return 0;
    }
    if ( ($hour < 0) || ($hour > 24)) {
        return 0;
    }
    if ( ($min < 0) || ($min > 60)) {
        return 0;
    }
    if ( ($sec < 0) || ($sec > 60)) {
        return 0;
    }
    $timestamp = Date_to_Time($year,$mon,$day,$hour,$min,$sec);

    return $timestamp;
} ;# sub convert_to_wall_time($)

sub get_process_mem_range($$$$$) {
    my $sys_info    = shift;
    my $node        = shift;
    my $start_time  = shift;
    my $stop_time   = shift;
    my $process     = shift;

    my $isStarted = 0;
    my $isStopped = 0;
    my $isSelected = 0;

    my $mem_info;
    my $found_date = 0;
    my $next_wall_timestamp = 0;
    my $count = 0;
    my $next_step = 0;

    my @mem_leaks = ();
    my $leak_data = "";
    my $idx = 0;

    my ($proc, $res, $prev_res, $shr, $prev_shr);
    my ($timestamp, $prev_timestamp, $delta_res, $delta_shr);
    #my ($cpup, $memp, $cpu_time);
    my ($cpup, $cpu_time);

    #xr-vm_node0_RP0_CPU0
    #xr-0_RP0_CPU0.log
    $node =~ s/\//_/g;
    $node =~ s/_+/_/g;
    $node =~ s/.*vm_node//g;
    my $proc_info;
    if ( $sys_info->{hostType} =~ /xr/i) {
        $proc_info = get_xr_pid_info($process, $node);
    } elsif ( $sys_info->{hostType} =~ /calv/i) {
        $proc_info = get_calvados_pid_info($process, $node);
    }
    if ((!defined($proc_info->{pid})) || ($proc_info->{pid} !~ /\d+/)) {
        print "Unable to find pid for $process\n";
        return $leak_data;
    }
    my $pid = $proc_info->{pid};
    #!!!!!!!!!!
    if ( $sys_info->{hostType} =~ /xr/i) {
        $node =~ s/^\s*(xr\-)*/xr\-/;
    } elsif ( $sys_info->{hostType} =~ /cal/i) {
        $node =~ s/^/calvados\-/;
        #default VM1
        if ($node !~ /_VM\d+$/) {
            $node =~ s/$/_VM1/;
        }
    }
    #!!!!!!!!!!

    my $date_pattern = "\\s*$wday\\s+$month\\s+(\\d+)\\s+";
    $date_pattern .= "(\\d{2}:\\d{2}:\\d{2})\\s+(\\S+\\s+)*(\\d{4})";
    my $pid_pattern = "\\s*${pid}\\s+\\w+\\s+\\d+\\s+\\d+\\s+(\\S+)\\s+(\\S+)";
    $pid_pattern .= '\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)\s+';
    $pid_pattern .= '(\d+:\d+\.\d+)\s+(\S+)';
    my $cisco_support_dir = "/misc/disk1/cisco_support/";
    if (!opendir(DIR,$cisco_support_dir)) {
        return $leak_data;
    }
    my @files = readdir(DIR);
    closedir(DIR);


    my $input = "";
    foreach my $file (@files) {
       if ( $file =~ /${node}\.log$/) {
           $input = $cisco_support_dir . "/" . $node . ".log";
           last;
       }
    }
    if (! -f $input) {
        print "No memory data found.\n";
        print "i.e., no data has been collected, or, archived after reload.\n";
        print "or, wrong node name '$node'.\n" if ($node =~ /\w+/);
        return $leak_data;
    }
    my ($start_wtime, $stop_wtime);

    my $time_info = get_total_timestamps($input);
    my @timestamps = @{$time_info->{time_lines}};
    if ($start_time) {
        $start_wtime = convert_to_wall_time($start_time);
    } else {
        $start_wtime = convert_to_wall_time($timestamps[0]);
    }
    if ($stop_time) {
        $stop_wtime = convert_to_wall_time($stop_time);
    } else {
        $stop_time = $timestamps[$#timestamps];
        $stop_wtime = convert_to_wall_time($stop_time);
    }

    if ($#timestamps < 2) {
        print "Warning - not enough memory samples\n";
        return $leak_data;
    }
    my $interval = sprintf("%d", ($stop_wtime - $start_wtime)/$#timestamps);

    my $wtime;
    if ($start_time) {
        $start_wtime = convert_to_wall_time($start_time);
    } else {
        $isStarted = 1;
    }
    if ($stop_time) {
        $stop_wtime = convert_to_wall_time($stop_time);
    }
    my $next_wtime = $start_wtime;

    if (!open(FD, $input)) {
        #die "cannot open $input: $!\n";;
        return $leak_data;
    }
    @timestamps = ();
    while (my $line = <FD>) {
        $line =~ s/\r//g;
        $line =~ s/\n//g;
        if ( $line =~ /^$date_pattern/) {
            my $_month = $2;
            my $_day = $3;
            my $_time = $4;
            my $_year = $6;
            $timestamp = $_year . "/" . $_month . "/" . $_day . "/" . $_time;

            $wtime = convert_to_wall_time($timestamp);
            $isSelected = 0;

            if (!$isStarted) {
                if ($wtime >= $start_wtime) {
                    $isStarted = 1;
                    $isSelected = 1;
                    $next_wtime += $interval;
                } elsif ($wtime >= $next_wtime) {
                    $isSelected = 1;
                    $next_wtime += $interval;
                }
            } else {
                $isSelected = 1;
                $next_wtime += $interval;
            }

            last if ($isStopped);
            if ($wtime >= $stop_wtime) {
                $isStopped = 1;
            }

            if ($isSelected) {
                $count++;
                $found_date++;
                push @timestamps, $timestamp;
            }
            if ($wtime >= $stop_wtime) {
                $isStopped = 1;
            }
        } ;# if ( $line =~ /^$date_pattern/)
        next if ($found_date < 1);
        next if ($isStarted eq 0);
        next if ($isSelected < 1);

        if ( $line =~ /^$pid_pattern/) {
            # my $virt = $1;
            $res = $2;
            $shr = $3;
            $cpup = $4;
            #$memp = $5;
            $cpu_time = $6;
            $proc = $7;
            if ( $res =~ /m/ ) {
                $res =~ s/m//g;
                $res *= 1024;
            } elsif ( $res =~ /g/ ) {
                $res =~ s/g//g;
                $res *= 1024 * 1024;
            }
            if ( $shr =~ /m/ ) {
                $shr =~ s/m//g;
                $shr *= 1024;
            } elsif ( $shr =~ /g/ ) {
                $shr =~ s/g//g;
                $shr *= 1024 * 1024;
            }
            $shr = sprintf ("%.2f", $shr/1024.0);
            $res = sprintf ("%.2f", $res/1024.0);
            if ($idx eq 0) {
                $leak_data = "$timestamp,$pid,$proc,$res,-,$shr";
            } else {
                $delta_res = $res - $prev_res;
                $delta_shr = $shr - $prev_shr;
                $delta_res = sprintf("%.2f", $delta_res/1.0);
                if ($leak_data eq "") {
                    $leak_data .= "$timestamp,$pid,$proc,$res,$delta_res,$shr";
                } else {
                    $leak_data .= ";" . 
                                 "$timestamp,$pid,$proc,$res,$delta_res,$shr";
                }
            }
            $prev_timestamp = $timestamp;
            $prev_res = $res;
            $prev_shr = $shr;
            $idx++;
            next;
        }
    } ;#while (my $line = <FD>)
    close(FD);
    #TODO - change to string (vs array)!
    return $leak_data; # if ($leak_data ne "");
    #$mem_info->{timestamps} = \@timestamps;
    #return $mem_info;
    #return @mem_leaks;
} ;# sub get_mem_info_by_pid($$$$)

sub get_total_timestamps($) {
    my $input = shift;

    #goto shell and grep (to use less memory)
    my $grep_date_pattern = "$wday *$month";
    #top - 20:53:35 up 2 days,  4:12,  1 user,  load average: 0.05, 0.03, 0.05
    my $grep_top_pattern = 'top +\- +([0-9]+:[0-9]+:[0-9]+) +up +(.*), +[0-9]+ +user';

    my $output = `grep -E "^ *($grep_date_pattern|$grep_top_pattern)" $input`;
    my $info;
    my @time_lines = ();
    my @uptime_lines = ();
    foreach my $time_line (split(/\n/, $output)) {
        #$time_line =~ s/\r//;
        if ( $time_line =~ /^\s*$wday\s+$month\s+(\d+)\s+(\d{2}:\d{2}:\d{2})\s+(\S+\s+)*(\d{4})/ ) {
            my $_time_line =  $6 . '/' . $2 . '/' . $3 . '/' . $4;;
            push @time_lines, $_time_line;
        } elsif ( $time_line =~ /^\s*$grep_top_pattern/) {
            my $hms = $1;
            my $uptime = $2;
            if ($uptime =~ /((\d+) +day[s]*, +)*(\d+:)*(\d+)/) {
                my $up_day = $2 || 0;
                my $up_hour = $3 || 0;
                my $up_min = $4 || 0;
                $up_hour =~ s/://;
                $uptime = 24 * 60 * $up_day + 60 * $up_hour + $up_min;
            } elsif ($uptime =~ /((\d+) +day[s]?, +)*(\d+) +min/) {
                #5 days, 16 min
                my $up_day = $2 || 0;
                my $up_min = $3 || 0;
                $uptime = 24 * 60 * + $up_min;
            }
            push @uptime_lines, $uptime;
        }
    }
    $info->{time_lines} = \@time_lines;
    $info->{uptime_lines} = \@uptime_lines;
    return $info;
} ;# sub get_total_timestamps($)

sub get_ltrace_snapshot($$$$) {
    my $node        = shift;
    my $start_time  = shift;
    my $stop_time   = shift;
    my $process         = shift;

    my @mem_leaks = ();

    #xr-vm_node0_RP0_CPU0
    #xr-0_RP0_CPU0.log
    $node =~ s/\//_/g;
    $node =~ s/_+/_/g;
    $node =~ s/.*vm_node//g;
    my $proc_info = get_xr_pid_info($process, $node);
    if ((!defined($proc_info->{pid})) || ($proc_info->{pid} !~ /\d+/)) {
        print "Unable to find pid for $process\n";
        return @mem_leaks;
    }
    my $pid = $proc_info->{pid};

    my $cisco_support_dir = "/misc/disk1/cisco_support/ltrace/";
    $cisco_support_dir .= "/" . $node;
    if (!opendir(DIR,$cisco_support_dir)) {
        print "Unable to find ltrace data (i.e., no ltrace data has been collected).\n";
        print "or, no ltrace data has been archived (i.e. due to reload).\n";
        return @mem_leaks;
    }
    my @files = readdir(DIR);
    closedir(DIR);

    my $start_wtime = 0;
    my $stop_wtime = 0;
    if ($start_time) {
        $start_wtime = convert_to_wall_time($start_time);
    }
    if ($stop_time) {
        $stop_wtime = convert_to_wall_time($stop_time);
    }

    if (scalar(@files) < 3) {
        return @mem_leaks;
    }

    my $input = "";
    foreach my $file (@files) {
       if ( $file =~ /^${pid}\-/) {
           $input = $cisco_support_dir . "/" . $file;
           last;
       }
    }
    if (! -f $input) {
        print "Invalid input '$input' (possible wrong node name: '$node').\n";
        exit;
    }
    if (!open(FD, $input)) {
        #die "cannot open $input: $!\n";;
        print "Unable to open ltrace file: $!.\n";
        return @mem_leaks;
    }
    while (my $line = <FD>) {
        $line =~ s/[\r\n]//g;
        if ((!$start_wtime) && (!$stop_wtime)) {
            push @mem_leaks, $line;
        } else {
            my ($time, $total, $rss, $pss) = split(/,/, $line);
            my $wtime = convert_to_wall_time($time);
            if (($start_wtime) && ($stop_wtime)) {
                if (($wtime >= $start_wtime) &&
                    ($wtime <= $stop_wtime)) {
                    push @mem_leaks, $line;
                }
            } elsif ($start_wtime) {
                if ($wtime >= $start_wtime) {
                    push @mem_leaks, $line;
                }
            } elsif ($stop_wtime) {
                if ($wtime <= $stop_wtime) {
                    push @mem_leaks, $line;
                }
            }
        }
    } ;#while (my $line = <FD>)
    close(FD);
    return @mem_leaks;
} ;# sub get_ltrace_snapshot($$$$)

sub usage {
    print STDERR "Usage: $0 <options>\n";
    print STDERR "\t-n <node name> - madantory. e.g. -n 0/RP0/CPU0, 0/RP1/CPU0, 0/0/CPU>\n";
    print STDERR "\t-p <process name> - madantory. e.g. -p gsp, -p ipv4_rib, etc\n";
    print STDERR "\t-s <start time> - optional, format 2016/Oct/01/01:00:00. If omitted, it will try start from the eailest timestamp available.\n";
    print STDERR "\t-e <end time> - optional, format 2016/Oct/05/10:00:00. If omitted, it will try use the latest timestamp available.\n";
    print STDERR "\t(Accepted month abbreviation: $month)\n";
    print STDERR "\t-L <print data for last L hours>.\n";
    print STDERR "\t(option -s/-e and option -L are mutually execlusive.)\n";
    #print STDERR "\t-l <get ltrace data>. Default is total memory.\n";
    print STDERR "\t-h print this message\n";
    print STDERR "\te.g. to get total memory snapshots for aib:\n";
    print STDERR "\t$0 -n 0/RSP0/CPU0 -p aib\n";
    exit;
}

getopts( "n:s:e:p:L:h", \%opt ) or usage();

if ( ( (!$opt{n}) && (!$opt{p}) ) ) {
    usage();
}
if (($opt{n} !~ /\d+\/(R[S]?P)?\d+\/CPU\d|B\d+\/CB\d+/i) &&
    ($opt{n} !~ /\d+\/(R[S]?P|LC)?\d+|B\d+\/CB\d+/i)) {
    print "Invalid node anme: '$opt{n}'\n";
    usage();
}
if ( $opt{p} !~ /\w+/i) {
    print "Invalid process anme: '$opt{p}'\n";
    usage();
}
my $timestamp_pat = '\d{4}\/' . ${month} . '\/\d+\/\d{0,2}:\d{0,2}:\d{0,2}';
if ((defined($opt{s})) && ($opt{s} !~ /^$timestamp_pat/)) {
    print "Invalid format for start time: '$opt{s}'\n";
    usage();
}
if ((defined($opt{e})) && ($opt{e} !~ /^$timestamp_pat/)) {
    print "Invalid format for end time: '$opt{e}'\n";
    usage();
}
if ( (defined($opt{L})) && ($opt{L} !~ /^\d+$/i)) {
    print "Invalid value: '$opt{L}'\n";
    print "Expect an integer (number of hours)\n";
    usage();
}
if ( (($opt{s}) || ($opt{e})) && ($opt{L}) ) {
    print "\toption -s/-e and option -L are mutually execlusive.\n";
    usage();
}

my $node       = $opt{n};
my $process    = $opt{p};
my $start_time = $opt{s};
my $stop_time  = $opt{e};

if ($opt{L}) {
    my $localtime = localtime();
    my ($c_dow, $c_mon, $c_day, $mhs, $c_year) = split (/\s+/, $localtime);
    $c_mon = $$month_map{$c_mon};
    my ($c_hour, $c_min, $c_sec) = split(/:/, $mhs);

    my $Dd = 0;
    my $Dh = -$opt{L};
    my $Dm = 0;
    my $Ds = 0;
    my ($year,$month,$day, $hour,$min,$sec) =
                Add_Delta_DHMS($c_year,$c_mon,$c_day,
                               $c_hour,$c_min,$c_sec,
                               $Dd,$Dh,$Dm,$Ds);
   foreach my $m (keys %{$month_map}) {
       if ($$month_map{$m} eq $month) {
           $month = $m;
           last;
       }
   }
   $start_time = $year . "/" . $month . "/" . $day . "/";
   $start_time .= $hour . ":" . $min . ":" . $sec;
   $stop_time = "";
   #print "Starttime = $start_time\n";
}

my $sys_info = &getOsType();
my $osType = $sys_info->{hostType};
my $mem_leaks = get_process_mem_range($sys_info, $node, $start_time, $stop_time, $process);
my @ltrace_mem;
my @total_mem = ();
my $ltrace_info;
if ($osType =~ /xr/i) {
    @ltrace_mem = get_ltrace_snapshot($node, $start_time, $stop_time, $process);
    foreach my $line (@ltrace_mem) {
        my ($time, $lt_rss, $shmwin_rss, $xdt_rss) = split(/,/, $line);
        $ltrace_info->{$time} = $lt_rss;
    }
}
if (scalar(split(/;/,$mem_leaks)) < 1) {
    print "Warning - not enough memory samples (please wait for one hour)\n";
    exit;
}
if (($osType =~ /xr/i) && (scalar(@ltrace_mem) < 1)) {
    print "Warning - no ltrace data has been collected (please wait for one hour).\n";
}
if ($osType =~ /xr/i) {
    printf("%22s%7s%15s%15s\n", "Timestamp", "PID", "Memory(kB)", "Ltrace(kB)");
} else {
    printf("%22s%7s%15s\n", "Timestamp", "PID", "Memory(kB)");
}
foreach my $line (split(/;/, $mem_leaks)) {
    my ($time, $pid, $proc, $total, $delta, $shr) = split(/,/, $line);
    if ($osType =~ /xr/i) {
        if ($ltrace_info->{$time}) {
            printf("%22s%7d%15.2f%15.2f\n", $time, $pid, $total*1024, $ltrace_info->{$time});
        } else {
            printf("%22s%7d%15.2f%15s\n", $time, $pid, $total*1024, "-");
        }
    } else {
        printf("%22s%7d%15.2f\n", $time, $pid, $total*1024);
    }
}
