#!/usr/bin/perl

# xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# Agent for moniroting cpu, memory, and disk usage
#
# Feb 2016, Jieming Wang
# 
# Copyright (c) 2016-2019 by Cisco Systems, Inc.
# All rights reserved.
# xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

use lib qw ( /pkg/opt/cisco/pam/ /opt/cisco/calvados/pam/ /opt/pam/ );

# TODO:
# check fd leaking

use Cwd;
my $cwd = cwd;

use pam;
# don't include Cisco_pam_web which can increase memory
use pam_perf;
use pam_ltrace;

use strict;
use warnings;
use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
#POSIX will conflict with ZMQ
#use POSIX;
use File::Basename;
use File::Copy;
use Logfile::Rotate;
use Expect;

use Getopt::Std;
use vars qw/ %opt /;

use JSON::PP;
use Data::Dumper;

# ZMQ:
use ZMQ::LibZMQ3;
# Need to quote ":all" or it will complain ZMQ_ULL, ZMQ_RCVMORE
use ZMQ::Constants qw(:all);

sub collect_top($$$$);
sub mount_var_log($);
sub get_xr_command_history($$$);
sub get_clihistory_last_timestamp($);
sub getTopOpt($);
sub dump_memory_logs($$$$$$);
sub dump_cpu_logs($$$$);
sub save_disk_logs($$$$);
sub rotate_ltraces($$$);
sub rotate_clihistory($$$);
sub get_false_leak_proc_info($$);
sub do_perl_install($$);
sub check_perl_rpm();


##########################################
# parameters
##########################################
my ($msg, $ret);

my ($sys_info,
    $osType,
    $boardtype,
    $platform,
    $vf1_3073_ip,
    $version);

# post perl installaiton for panini:
my $do_panini_perl_install = 0;
if (check_perl_rpm) {
    $do_panini_perl_install = 1;
}

my ($cpu_arch_dir, $log_dir, $pam_root);
my $event_type;
my ($localtime, $node_info);
my $bucket = "resource_usage";

my $retention_period      = 14 * 24 * 3600; #delete old tgz file after 14 days
#Note (TODO): log_rotation_interval won't have any affect:
my $log_rotation_interval = 7 * 24 * 3600; #delete old logs after 7 days
my $stale_log_rotation_interval = 1 * 24 * 3600; #delete stale logs on stadbyRP
my $log_rotation_size_limit = 15000000;  #15MB translating to 18 days of data
my $log_rotation_count = 3;
my $cli_log_rotation_size_limit = 2048000;
my $cli_log_rotation_count = 1;

my $xr_default_cli_commands = "/misc/scratch/clihistory/commands";
my $pid_dir = "/opt/cisco/pam/run/";

$log_dir = get_PamLogDir();
$pam_root = $log_dir;
my $show_tech_root = "/misc/disk1/showtech/";
my $proc_map_file = $pam_root . "proc_map.txt";
mkdir $log_dir if (! -d $log_dir);

getopts( "I:m:cd", \%opt );

my $do_data_collect = $opt{c} ? 0 : 1;

my $refresh_interval = $opt{I} || 1800;
my $debug = $opt{d};
my $max_ltrace = 200 * 1024; #200MB
# on XR, need to wait for memory (ltrace, e.g.) to initilize:
my $minimum_memory_initialized_time = $opt{m} || 2;
if (defined($minimum_memory_initialized_time) &&
     ($minimum_memory_initialized_time !~ /^\d+/)) {
    print "Wrong input: '$opt{m}' - expect integer (# of days).\n";
    exit;
}
$minimum_memory_initialized_time *= 24 * 3600;
my $memory_tracking_time = 0;
my $ltrace_ratio = 0.9; # ignore if ltrace counts more than 90%

my $hostname = `uname -n`;
$hostname =~ s/[\r\n]//g;

my $my_pid = $$;
my $this = $0;
my $bname = $this;
$bname =~ s/.*\///g;

$sys_info = &getOsType();
$osType = $sys_info->{hostType};
$version = &getWS($osType);
$boardtype = $sys_info->{boardtype};
$platform = $sys_info->{platform};
#local eth-vf1.3073 IP address:
$vf1_3073_ip = $sys_info->{vf1_3073_ip};

#Folder to mount proc
my $proc_mnt_root_dir = "/opt/cisco/pam/mnt/";
#Folder to store ltrace data (mainly for XR)
my $ltrace_depot_dir =  "/misc/disk1/cisco_support/ltrace/";
my $ltrace_cmd = "/pkg/opt/cisco/pam/collect_ltrace.pl";
my %is_smaps_collected;

#pid to proc map:
my $pid_proc_map;
#shmwin and trace file extension:
my $trace_name_tpl = "-trace-shmwin.txt";

if ($boardtype !~ /R[S]?P|CC/i) {
    print "$0 can only run on RP.\n";
    exit(1);
}

my $mode = "exec";
if ($osType =~ /calvados|sysadmin/i) {
    $mode = "sysadmin";
}

########################################
#make sure only one instance running
########################################
$ret = &check_process($bname, $sys_info, $my_pid);
if ((scalar(@$ret)) > 0) {
    print "process $bname (pid=@$ret) is already running.\n";
    exit(1);
}
my $pid_file = $pid_dir . "/pid";

#only on RP
my $core_info; # used to hold core files info

$version = &getWS($osType);

# load balancing: each RP is responsible for one chassis
my $chassis_id = get_chassis_id($sys_info);
if ($chassis_id !~ /^\d+$/) {
    print "Invalid chassis ID $chassis_id (expect integer).\n";
    exit(1);
}
$node_info = get_active_rp_nodes_by_chassis_id($sys_info, $chassis_id);
#default thresholds:
my $process_cpu_threshold = 100;

my $dumpcore_pat = '^\s*(corehelper_gen\s+\-o|dumpcore)\s+run';

my $cli_data;
my $cli_last_timestamp = 0;
my $cli_last_mtime = 0;

######################################
#update the timestamp:
######################################
my $dst_clihistory_dir = $log_dir . "/clihistory";
my $dst_clihistory = $dst_clihistory_dir . "/" . $osType . "-clihistory";
&createFolder($dst_clihistory_dir) if (! -d $dst_clihistory_dir);
my $last_timestamp = get_clihistory_last_timestamp($dst_clihistory);
$cli_data->{last_timestamp} = $last_timestamp;

#Disable for now until we figure out how to check ltrace memory
my @default_skippedMemProcs = ('ipv4_rib',
                               'ipv6_rib',
                               #'pam_memory_leaker',
                               'ospf',
                               'gsp',
                               'bgp',
                               'te_control',
                               'mpls_lsd',
                               'mpls_ldp',
                               'netio',
                               'isis',
                               'mibd',
                               'mibd_interface',
                               'fib_mgr',
                               'parser_server');
@default_skippedMemProcs = ();

if ($osType =~ /xr/i) {
    my @xr_history_files = &mount_xr_cli_history($sys_info);
    $cli_data = &get_xr_command_history(\@xr_history_files,
                                        $cli_last_timestamp,
                                        $cli_last_mtime);
    &umount_xr_cli_history($sys_info);
}
if ($osType =~ /calv/i) {
    my @confd_audit_logs = &mount_var_log($node_info);
    $cli_data = &get_calv_command_history(\@confd_audit_logs,
                                          $cli_last_timestamp,
                                          $cli_last_mtime);
    &umount_var_log($node_info);
}

my $uptime = &getUptime;
#give some time for the system to stablize
my $minimum_soaking_time = 1 * $refresh_interval;
$minimum_soaking_time = $minimum_soaking_time<600?600 : $minimum_soaking_time;

my $version_content = &getVersionContent("/");

my $number_of_pstack = 3;
my $pstack_sleep = 30;
my (%isProcSeen, %isKnownLeaks, %isGlobalKnownLeaks);
my ($isSkippedMemProcs,
    $isGlobalSkippedMemProcs,
    $isSkippedCpuPids,
    $isGlobalSkippedCpuPids,
    $isSkippedDisk,
    $isGlobalSkippedDisk);

#Keep track memory leak info:
my ($dumpCoreInfo);
#Number of cores to collect for analyzing memory leaks:
my $no_dumpcores = 2;

my $user_edcd_dir = "/opt/pam/etc/";

#use /opt/cisco/pam/ for logging which will be archived/reotated
my $pam_log_dir = "/opt/cisco/pam/";
mkdir "/opt/cisco/" if (! -d "/opt/cisco/");
mkdir $pam_log_dir if (! -d $pam_log_dir);
my $memory_leak_log = $pam_log_dir . "/memory_leaks.log";
my @oldMemoryLeaks = getOldInstances($memory_leak_log);
my $cpu_hog_log = $pam_log_dir . "/cpu_hog.log";
my @oldCpuHogs = getOldInstances($cpu_hog_log);
my $disk_usage_log = $pam_log_dir . "/disk_usage.log";
my @oldDiskUsages = getOldInstances($disk_usage_log);
my ($mem_fd, $cpu_hog_fd, $disk_usage_fd);

my @_skippedMemProcs = ();
foreach my $line (@oldMemoryLeaks) {
    my ($node, $proc, $time) = split(/,/, $line);
    $proc =~ s/\s+//;
    push @_skippedMemProcs, $proc;
    $isGlobalKnownLeaks{$proc} = 1;
    print "Known process with memory leak: $proc\n" if ($debug);
}
$isGlobalSkippedMemProcs->{skippedMemProcs} = \@_skippedMemProcs;

my @pam_event_db_files=('/opt/cisco/pam/pam_event.json');
my $false_leak_process_file = '/opt/cisco/pam/false_leak_processes.json';
if ($osType =~ /xr/i) {
    @pam_event_db_files=('/pkg/opt/cisco/pam/pam_event.json');
    $false_leak_process_file = '/pkg/opt/cisco/pam/false_leak_processes.json';
}
my $false_leak_proc_info = get_false_leak_proc_info($sys_info,
                                                    $false_leak_process_file);
my $edcd_info = get_user_pattern_files($user_edcd_dir);
my @user_event_files = @{$edcd_info->{event_files}};
@pam_event_db_files=(@pam_event_db_files, @user_event_files);

$SIG{'TERM'} = 'INT_pid_handler';
my $min_free_threshold = 307200; #300MB:
my $memfree = get_memfree();
#Only start when there is enough memory to prevent exhausting memory
if ($memfree < $min_free_threshold) {
    #free cache on calvados in case its too low:
    if ($osType =~ /calvados/) {
        `sysctl vm.drop_caches=3`;
    }
    $memfree = get_memfree();
}
if ($memfree < $min_free_threshold) {
    sleep 60;
    unlink $pid_file if (-f $pid_file);
    exit;
}

#in EDCD, there are 2 type of commands: commands, and shell_commands
#take shell command for XR (for now)
my $cmd_type = "shell_commands";
if ((($osType =~ /xr/i) && (-f "/pkg/bin/xr_cli")) ||
     ($osType =~ /calvados|sysadmin/i)) {
    $cmd_type = "commands";
}

my $total_data_collection_count = 0;

unless (fork) {
    my $rt = create_pid_file($sys_info, $pid_dir, $$);
    exit(1) if ($rt ne 1);
    while (1) {
        print "Starting over again....\n" if ($debug);
        my $showtech_cnt = 0;
        $memfree = get_memfree();
        if ($memfree < $min_free_threshold) {
            #free cache on calvados in case its too low:
            if ($osType =~ /calvados/) {
                `sysctl vm.drop_caches=3`;
            }
            $memfree = get_memfree();
        }
        if ($memfree < $min_free_threshold) {
            #restart to free memory (as perl cannot free)
            sleep 60;
            unlink $pid_file if (-f $pid_file);
            exit;
        }
        #create the pid file in case it's not created (or deleted)
        if (! -f $pid_file) {
            $rt = create_pid_file($sys_info, $pid_dir, $$);
            exit(1) if ($rt ne 1);
        }
        #Skip if harddisk is missing
        if ($log_dir !~ /^\s*[\/]*(harddisk:|misc\/+disk1|\/var\/xr\/disk1)/) {
            sleep 60;
            next;
        }

        #TODO - in addition, also check process runtime
        $uptime = &getUptime;
        if ($uptime < $minimum_soaking_time) {
            sleep 60;
            next;
        }
        #Only run on the active RP
        #wait until it is active
        $node_info = get_active_rp_nodes_by_chassis_id($sys_info, $chassis_id);
        if ($osType =~ /calvados/i) {
            my $first_calv_ip = get_first_calv_ip($node_info);
            if ($vf1_3073_ip ne $first_calv_ip) {
                sleep 60;
                next
            }
        }

        if ((!$node_info->{isActive}) && ($do_data_collect)) {
            sleep 60;
            #$total_data_collection_count = 0;
            #cleanup stale log on standby RPs
            my @cleanedLogs = &clean_stale_logs($log_dir,
                                                $stale_log_rotation_interval,
                                                $log_rotation_count);
            next;
        }

        #Panini: check if perl package is installed on LC, and install if not
        if (!$do_panini_perl_install) {
            if (($platform =~ /ncs6k|panini/i) && ($osType =~ /xr/i)) {
                  my $rc = &do_perl_install($node_info, $sys_info);
                  if ($rc) {
                      $do_panini_perl_install = 1;
                  }
             }
        }

        #cleanup old tgz files:
        &cleanup_tgz($log_dir, $retention_period) if ($do_data_collect);
        if ($do_data_collect) {
            &cleanup_tgz($log_dir, $retention_period);
            if ($osType =~ /calvados/i) {
                &cleanup_tgz($show_tech_root, $retention_period);
            }
        }

        ##########################################
        #rotate logs:
        ##########################################
        #cli history
        my @rotatedLogs = ();
        if ($do_data_collect) {
            &rotate_clihistory($dst_clihistory_dir,
                               $cli_log_rotation_size_limit,
                               $cli_log_rotation_count);
    
            #generic cli history
            my @rotatedLogs = &do_log_rotation($log_dir,
                                               $log_rotation_interval,
                                               $log_rotation_size_limit,
                                               $log_rotation_count);
            foreach my $log (@rotatedLogs) {
                print "log rotated: $log\n" if ($debug);
            }
            if (@rotatedLogs) {
                ##########################################
                #For XR, also rotate ltrace files (smaps)
                ##########################################
                if ($osType =~ /xr/i) {
                   $ret = rotate_ltraces($ltrace_depot_dir,
                                         \@rotatedLogs,
                                         $log_rotation_count);
                }
                #drop cache
                system("sysctl vm.drop_caches=3 >/dev/null");
                unlink $pid_file if (-f $pid_file);
                exit;
            }
        }

        my $localtime = localtime();

        ######################################################
        #collect top - performance
        ######################################################
        # performance collection
        my $now = time();
        my $perf_stat = collect_top($sys_info,
                                    $node_info,
                                    $log_dir,
                                    $chassis_id);
        #should collect for each event: Memory, CPU, and disk?
        my $ret = $perf_stat->{rc};
        #-----
        #Memory START
        #-----
        my $incremental_threshold = 1; #1 MB/hour - minimum resolution from top
        my $minimum_samples       = 8; # 7 hours

        #############################################
        #step 1 - check memory
        #############################################
        #Track known leaks and skip reporting repeatedly
        my $proc_info;
        $event_type = "memory_leak";

        my @skippedMemProcs = ();
        foreach my $node (@{$perf_stat->{nodeList}}) {
            my @retained_logs = ();

            next if (!defined($perf_stat->{$node}->{log}));
            next if (! -f $perf_stat->{$node}->{log});
            my $top_log = $perf_stat->{$node}->{log};
            push @retained_logs, $top_log;
            if ($osType =~ /xr/i) {
                my $xr_uptime = $perf_stat->{$node}->{uptime};
                if ($xr_uptime < $minimum_memory_initialized_time) {
                    my $msg = "Uptime ($xr_uptime seconds) on $node is less";
                    $msg .= " than memory initialization time (";
                    $msg .= $minimum_memory_initialized_time . " seconds)\n";
                    print $msg if ($debug);
                    next;
                } else {
                    #Started to track memory leak...
                    #only use from data after $minimum_memory_initialized_time
                    if (!$memory_tracking_time) {
                        $memory_tracking_time = time() -
                                               $xr_uptime +
                                               $minimum_memory_initialized_time;
                    }
                }
            }
            my $ip = $perf_stat->{ip}->{$node};
            print "memory_tracking_time=$memory_tracking_time\n" if ($debug);
            my $time_info = get_total_timestamps($top_log,
                                                 $memory_tracking_time);
            my @_timestamps = @{$time_info->{time_lines}};

            #check if ltrace memory exceeds limit (max_ltrace):

            ###################################
            #collect ltrace data from smaps
            #for all XR processes at begining
            ###################################
            if (scalar(@_timestamps) < 2) {
                print "Too less memory samplings: ",
                            scalar(@_timestamps), "\n" if ($debug);
                next;
            }

            print "Number of samplings ($node): ",
                           scalar(@_timestamps), "\n" if ($debug);

            my @up_times = @{$time_info->{uptime_lines}};
            my $reload_info = get_blocks_by_reload($time_info);

            if ($debug) {
                print "reload_starttimestamps=",
                   scalar(@{$reload_info->{reload_starttimestamps}}), "\n";
            }
            my $last_reload_idx = 
                   scalar(@{$reload_info->{reload_starttimestamps}}) - 1;
            #next if ($last_reload_idx < 1);
#TODO - start_time taken from data after $minimum_memory_initialized_time
            my $start_time =
                 @{$reload_info->{reload_starttimestamps}}[$last_reload_idx];
            my $stop_time = 
                 @{$reload_info->{reload_stoptimestamps}}[$last_reload_idx];
            my $start_uptime = 
                 @{$reload_info->{reload_startuptimes}}[$last_reload_idx];
            my $stop_uptime = 
                 @{$reload_info->{reload_stopuptimes}}[$last_reload_idx];

            #TODO check pam.pm to avoid divide by zero!!!!
            next if ($start_time eq $stop_time);

            my $filtered_leak_info;
            if (defined($isGlobalSkippedMemProcs->{skippedMemProcs})) {
                @skippedMemProcs = 
                              @{$isGlobalSkippedMemProcs->{skippedMemProcs}};
            }
            my %memSeen;
            foreach my $p (@skippedMemProcs, @default_skippedMemProcs) {
                if (!$memSeen{$p}) {
                    $memSeen{$p}=1;
                    push @skippedMemProcs, $p;
                }
            }
            #track the IP - used to find remote process name
            $false_leak_proc_info->{_ip} = $ip,
            $pid_proc_map = pam::get_pid_proc_map($node, $ltrace_depot_dir);

            #TODO reduce CPU consumption - avoid overloading...
            $proc_info = get_dynamic_leaks($top_log,
                                           $start_time,
                                           $stop_time,
                                           $start_uptime,
                                           $stop_uptime,
                                           $filtered_leak_info,
                                           $minimum_samples,
                                           $incremental_threshold,
                                           $false_leak_proc_info,
                                           $pid_proc_map,
                                           \@skippedMemProcs);
            #TODO - need to track per node
            next if ( !defined($proc_info->{leaked_pids}) ||
                       !@{$proc_info->{leaked_pids}} );

            ##########################################
            #Leaks found ... collect core, save logs
            ##########################################
            if ($debug) {
                my $_msg = "DEBUG: Leaks found on $node";
                $_msg .= " (pid: @{$proc_info->{leaked_pids}})\n";
                print $_msg;
            }

            my $_msg = "Significant memory increases found on $node.";
            $_msg .= " Collect additional data.\n";
            print $_msg if ($debug);
            if (!$do_data_collect) {
                print "Verify mode - skip collecting data/generating reportsn";
                next;
            }

            foreach my $pid (@{$proc_info->{leaked_pids}}) {
                my @executed_commands = ();
                my @saved_log_files = ();
                my %isExecuted;
                my $show_tech_executed = 0;
                my @dumped_mem_logs = ();
                $showtech_cnt = 0;

                my $proc = $proc_info->{$pid}->{proc};
                next if ($proc =~ /monitor_(cp|sh|cr)/);
                #Check if the process is still running, and skip dumpcore etc.
                my $cmd_proc = "";
                if ( (!defined($proc_info->{$pid}->{terminated})) ||
                     (!$proc_info->{$pid}->{terminated}) ) {
                    $cmd_proc = verify_remote_proc_via_top($ip, $pid);
                    if ( ($cmd_proc eq "") || ($cmd_proc ne $proc) ) {
                        my $msg = "Unable to find pid for $proc on $node.";
                        $msg .= " process may have restarted.";
                        $msg .= " Skip collecting data.";
                        print $msg, "\n" if ($debug);
                        next;
                    }
                }

                #$cmd_proc = get_remote_cmd_proc_name($ip, $pid);
                if (defined($pid_proc_map->{$pid})) {
                    $cmd_proc = $pid_proc_map->{$pid};
                } else {
                    $cmd_proc = get_proc_name_from_sshfs($sys_info,
                                                         $ip,
                                                         $pid,
                                                         "/opt/cisco/pam/mnt/");
                }
                if (($cmd_proc =~ /\w+/) && ($cmd_proc ne $proc)) {
                    $proc = $cmd_proc;
                    $proc_info->{$pid}->{proc} = $cmd_proc;
                }

                #if ($isKnownLeaks{$node}{$pid}) {}
                if ($isGlobalKnownLeaks{$proc}) {
                    #my $msg = "$pid ($proc) leaks memory. ";
                    my $msg = "$proc leaks memory. ";
                    $msg .= "But it has been reported. Skip reporting.\n";
                    print $msg;
                    next;
                }

                if (defined($dumpCoreInfo->{$node}->{$pid}->{count}) &&
                    ($dumpCoreInfo->{$node}->{$pid}->{count} >=
                                                   ($no_dumpcores - 1))) {
                    if (!defined($isGlobalSkippedMemProcs->{skippedMemProcs})) {
                        #@skippedMemProcs = ($pid);
                        @skippedMemProcs = ($proc);
                    } else {
                        my @skippedMemProcs =
                           @{$isGlobalSkippedMemProcs->{skippedMemProcs}};
                    }
                    #if (!grep(/\b$pid\b/, @skippedMemProcs)) {}
                    if (!grep(/\b$proc\b/, @skippedMemProcs)) {
                        #push @skippedMemProcs, $pid;
                        push @skippedMemProcs, $proc;
                    }
                    $isGlobalSkippedMemProcs->{skippedMemProcs}
                                                   = \@skippedMemProcs;
                    $isKnownLeaks{$node}{$pid} = 1;
                    $isKnownLeaks{$node}{$proc} = 1;
                    $isGlobalKnownLeaks{$proc} = 1;
                }

                $start_time = $proc_info->{$pid}->{start_time};
                $stop_time = $proc_info->{$pid}->{stop_time};
                my $interval = $proc_info->{$pid}->{interval};
                my $total_start = $proc_info->{$pid}->{total_start};
                my $total_stop = $proc_info->{$pid}->{total_stop};

                ##########################################
                #For XR, check if usage is due to ltrace
                ##########################################
                my @mem_leaks   = ();
                my @ltrace_mems = ();
                @mem_leaks = get_mem_info_by_pid($top_log,
                                                 $start_time,
                                                 $stop_time,
                                                 $interval,
                                                 $pid);
                if ($osType =~ /xr/i) {
                    #get ltrace memory for the process:
                    @ltrace_mems = get_proc_ltrace_memory($node,
                                                          $pid,
                                                          $proc,
                                                          $start_time,
                                                          $stop_time,
                                                          $interval,
                                                          $ltrace_depot_dir);
                    $ret = verify_leak_by_ltrace(\@mem_leaks,
                                                 \@ltrace_mems,
                                                 $ltrace_ratio);
                    if ($ret eq 0) {
                        if ($debug) {
                            $_msg = "Memory increase due to ltrace for";
                            $_msg .= " $proc ($pid) - ignore.\n";
                            print $_msg;
                        }
                        next;
                    }
                    my $lt_file = $ltrace_depot_dir . "/" . $node;
                    $lt_file .= "/" . $pid . "-" . $proc . $trace_name_tpl;
                    push @saved_log_files, $lt_file;
                    push @retained_logs, $lt_file;
                }

                if (!scalar(@mem_leaks)) {
                    if ($debug) {
                        print "No leaks memory detected for $proc ($pid).\n";
                    }
                    next;
                }
                #Ignore if memory fluctuates
                if ($interval > 1) {
                    my @mem_leaks_verified = 
                             verify_mem_info_by_pid($top_log,
                                                    $start_time,
                                                    $stop_time,
                                                    $pid);

                    if (!scalar(@mem_leaks_verified)) {
                        print "No leaks memory detected for $proc ($pid).\n";
                        next;
                    }
                }

                ##########################################
                #collect memory snapshots on last dumpcore.
                ##########################################
                if (defined($dumpCoreInfo->{$node}->{$pid}->{count}) &&
                    ($dumpCoreInfo->{$node}->{$pid}->{count} >= 
                                                    ($no_dumpcores - 1))) {
                    $uptime = $stop_uptime; #in mins
                    my @_dumped_mem_logs = dump_memory_logs(\@mem_leaks,
                                                            $node,
                                                            $log_dir,
                                                            $uptime,
                                                            $sys_info,
                                                            $version_content);
                    if (!@_dumped_mem_logs) {
                        $_msg = "Failed to dump_memory_logs\n";
                        &pam_logger($sys_info, $log_dir, "memory_monitoring", $_msg);
                    } else {
                        foreach my $d_log (@_dumped_mem_logs) {
                            push @dumped_mem_logs, $d_log;
                        }
                    }
                }
                #TODO - check memory size, and skip dumpcore if size is too big
                ##########################################
                #get default (generic) commands:
                ##########################################
                my %isCmdSeen;
                my $_event_type = "memory_leak";
                my $cmd_info1 = get_default_commands($_event_type, $osType);
                my @allCommands = @{$cmd_info1->{$cmd_type}->{allCommands}};
                my $cmd_info = get_edcd_event_commands($platform,
                                                       $osType,
                                                       $_event_type,
                                                       $proc,
                                                       \@pam_event_db_files);
                my $_commandList = $cmd_info->{$cmd_type}->{commandList};
                foreach my $cmd_mode (@$_commandList) {
                    if (!$isCmdSeen{$cmd_mode}) {
                        push @allCommands, $cmd_mode;
                        $isCmdSeen{$cmd_mode} = 1;
                    }
                }
                foreach my $cmd_mode (@allCommands) {
                    #Skip until CSCuz77931 is fixed (Panini/Scapa)
                    my ($cmd, $junk) = split(/,/, $cmd_mode);
                    next if ($isExecuted{$cmd_mode});
                    if ((!defined($dumpCoreInfo->{$node}->{$pid}->{count})) ||
                         ($dumpCoreInfo->{$node}->{$pid}->{count} < 
                                                       ($no_dumpcores - 1))) {
                        if ($cmd !~ /$dumpcore_pat/i) {
                            next;
                        }
                    }
                    if ($cmd =~ /show.*tech/i) {
                        $show_tech_executed++;
                        $showtech_cnt++;
                        sleep 1;
                    }
                    my $_node = $node;
                    if ($mode =~ /sysadmin/) {
                       #calvados node naming: 0/RP0, 0/RP1, 1/RP0 etc
                       $_node =~ s/.*://;
                       $_node =~ s/_VM\d+//;
                    }
                    #TOOO - HACK ..
                    $_node = "0_RP0_CPU0" if ($sys_info->{is_thinxr});
                    my $cli_info = cli_agent_shell($sys_info,
                                                   $_event_type,
                                                   $proc,
                                                   $pid,
                                                   $cmd,
                                                   $_node);
                    $isExecuted{$cmd_mode} = 1;
                    if ($cli_info->{rc} ne 1) {
                        my $msg = $cli_info->{msg};
                        &pam_logger($sys_info, $log_dir, $bucket, $msg);
                    } else {
                        push @executed_commands, $cli_info->{cmd};
                        foreach my $_log (@{$cli_info->{log_files}}) {
                            if ((!defined($dumpCoreInfo->{$node}->{$pid}->{count})) ||
                                 ($dumpCoreInfo->{$node}->{$pid}->{count} <
                                                         ($no_dumpcores - 1))) {
                                if (($_log =~ /\.core\./) &&
                                    ($cmd =~ /$dumpcore_pat/i)) {
                                    #only track dumpcore:
                                    if (defined($dumpCoreInfo->{$node}->{$pid}->{cores})) {
                                        $dumpCoreInfo->{$node}->{$pid}->{cores} .=
                                                                    "," . $_log; 
                                    } else {
                                        $dumpCoreInfo->{$node}->{$pid}->{cores} =
                                                                          $_log; 
                                    }
                                }
                            } else {
                                #track all:
                                push @saved_log_files, $_log;
                            }
                        }
                    }
                    # keep track the top_log files:
                    if (!grep(/$top_log/, @saved_log_files)) {
                        push @saved_log_files, $top_log;
                        #don't delete
                    }
                } ;# foreach my $cmd_mode (@allCommands)

                ####################################################
                #create summary report
                ####################################################
                if ( (defined($dumpCoreInfo->{$node}->{$pid}->{count})) &&
                     ($dumpCoreInfo->{$node}->{$pid}->{count} >= 
                                                       ($no_dumpcores - 1))) {
                    foreach my $d_log (@dumped_mem_logs) {
                        push @saved_log_files, $d_log;
                    }
                    if ( defined($dumpCoreInfo->{$node}->{$pid}->{cores}) ) {
                        foreach my $_log (split(/,/,
                                    $dumpCoreInfo->{$node}->{$pid}->{cores})) {
                            if ($_log =~ /\.core\./) {
                                push @saved_log_files, $_log;
                            }
                        }
                    }
                    if ( @saved_log_files ) {
                        my $summary_log =
                          &save_process_event_summary($log_dir,
                                                      \@executed_commands,
                                                      \@saved_log_files,
                                                      $show_tech_executed,
                                                      $show_tech_root,
                                                      $bucket);
                        push @saved_log_files, $summary_log if ($summary_log);
                        my $delete_original = 1;
                        my $pr_info;
                        $pr_info->{procName} = $proc;
                        $pr_info->{node} = $node;
                        $pr_info->{event_type} = $_event_type;
                        $pr_info->{delete_original} = $delete_original;
                        $pr_info->{start_time} = $start_time;
                        $pr_info->{stop_time} = $stop_time;
                        $pr_info->{total_start} = $total_start;
                        $pr_info->{total_stop} = $total_stop;
                        $pr_info->{showtech_cnt} = $showtech_cnt;
                        my $tar_name = &create_log_archive($log_dir,
                                                           \@saved_log_files,
                                                           \@retained_logs,
                                                           $pr_info);
                        &update_log($memory_leak_log, $node, $proc, $mem_fd);
                    } ;# if ( @saved_log_files )
                } ;# if ($dumpCoreInfo->{$node}->{$pid}->{count}

                if (!defined($dumpCoreInfo->{$node}->{$pid}->{count})) {
                    $dumpCoreInfo->{$node}->{$pid}->{count} = 1;
                } else {
                    $dumpCoreInfo->{$node}->{$pid}->{count}++;
                }

            } ;# foreach my $pid (@{$proc_info->{leaked_pids}})
        } ;#foreach my $node (@{$perf_stat->{nodeList}})
        $isGlobalSkippedMemProcs->{skippedMemProcs} = \@skippedMemProcs;
        #-----
        #Memory END
        #-----

        #############################################
        #step 2 - check CPU
        #############################################
        $event_type = "cpu_hog";
        my $min_cpu_samplings = 4;
        foreach my $node (@{$perf_stat->{nodeList}}) {
            my $show_tech_executed = 0;
            my @retained_logs = ();
            my @dumped_cpu_logs = ();

            next if (!defined($perf_stat->{$node}->{log}));
            next if (! -f $perf_stat->{$node}->{log});
            my $top_log = $perf_stat->{$node}->{log};
            push @retained_logs, $top_log;
            my $ip = $perf_stat->{ip}->{$node};

            my @cpu_info_list = get_memory_last_samplings($top_log,
                                                          $min_cpu_samplings);
            my $last_cpu_info = $cpu_info_list[$#cpu_info_list];
            my $last_timestamp = $last_cpu_info->{timestamp};
            if (scalar(@cpu_info_list) < $min_cpu_samplings) {
                print "Too less cpu samplings: ", scalar(@cpu_info_list),
                                                            "\n" if ($debug);
                next;
            }
            my @cpu_logs = get_cpu_hogs(\@cpu_info_list,
                                        $process_cpu_threshold,
                                        $osType);
            next if (!@cpu_logs);

            my @skippedCpuPids = ();
            my @skippedCpuProcs = (); 
            my %isCpuPidSeen;
            foreach my $cpu_log (@cpu_logs) {
                $showtech_cnt = 0;
                my @executed_commands = ();
                my @saved_log_files = ();
                my %isExecuted;
                my ($_pid,
                    $_proc,
                    $_timestamp1,
                    $_cpup1,
                    $_cput1,
                    $_timestamp2,
                    $_cpup2,
                    $_cput2) = split(/,/, $cpu_log);

                if (defined($isSkippedCpuPids->{$node}->{skippedCpuPids})) {
                    @skippedCpuPids = 
                              @{$isSkippedCpuPids->{$node}->{skippedCpuPids}};
                }
                if (!$isSkippedCpuPids->{$node}->{$_pid}) {
                    $isSkippedCpuPids->{$node}->{$_pid} = 1;
                    push @skippedCpuPids, $_pid;
                } else {
                    my $severity = "alert";
                    $msg = "PAM detected $_proc is hogging CPU on $node!";
                    create_syslog($sys_info, $msg, $severity);
                    next;
                }
                if (defined($isSkippedCpuPids->{$node}->{skippedCpuProcs})) {
                    @skippedCpuProcs = 
                             @{$isSkippedCpuPids->{$node}->{skippedCpuProcs}};
                }
                if (!$isSkippedCpuPids->{$node}->{$_proc}) {
                    $isSkippedCpuPids->{$node}->{$_proc} = 1;
                    push @skippedCpuProcs, $_proc;
                } else {
                    next;
                }
                my $pid_proc_map = get_pid_proc_map($node, $ltrace_depot_dir);
                my $cmd_proc = "";
                if (defined($pid_proc_map->{$_pid})) {
                    $cmd_proc = $pid_proc_map->{$_pid};
                } else {
                    $cmd_proc = get_proc_name_from_sshfs($sys_info,
                                                         $ip,
                                                         $_pid,
                                                         "/opt/cisco/pam/mnt/");
                }
                if (($cmd_proc =~ /\w+/) && ($cmd_proc ne $_proc)) {
                    $_proc = $cmd_proc;
                    $_proc =~ s/\s+/_/g;
                }

                ##########################################
                #get default (generic) commands:
                ##########################################
                my %isCmdSeen;
                my $_event_type = "cpu_hog";
                my $cmd_info1 = get_default_commands($_event_type,
                                                     $osType);
                my @allCommands = @{$cmd_info1->{$cmd_type}->{allCommands}};
                my $cmd_info = get_edcd_event_commands($platform,
                                                       $osType,
                                                       $_event_type,
                                                       $_proc,
                                                       \@pam_event_db_files);
                my $_commandList = $cmd_info->{$cmd_type}->{commandList};
                foreach my $cmd_mode (@$_commandList) {
                    if (!$isCmdSeen{$cmd_mode}) {
                        push @allCommands, $cmd_mode;
                        $isCmdSeen{$cmd_mode} = 1;
                    }
                }
                foreach my $cmd_mode (@allCommands) {
                    my ($cmd, $junk) = split(/,/, $cmd_mode);
                    next if ($isExecuted{$cmd_mode});
                    if (!$isExecuted{$cmd_mode}) {
                        $isExecuted{$cmd_mode} = 1;
                    }
                    if ($cmd =~ /show.*tech/i) {
                        $show_tech_executed++;
                        $showtech_cnt++;
                        sleep 1;
                    }
                    #TOOO - HACK ..
                    $node = "0_RP0_CPU0" if ($sys_info->{is_thinxr});
                    my $cli_info = cli_agent_shell($sys_info,
                                                   $event_type,
                                                   $_proc,
                                                   $_pid,
                                                   $cmd,
                                                   $node);
                    if ($cli_info->{rc} ne 1) {
                        my $msg = $cli_info->{msg};
                        &pam_logger($sys_info, $log_dir, $bucket, $msg);
                    } else {
                        push @executed_commands, $cli_info->{cmd};
                        print "cmd=", $cli_info->{cmd}, "\n" if ($debug);
                        foreach my $_log (@{$cli_info->{log_files}}) {
                            push @saved_log_files, $_log;
                        }
                    }

                    # keep track the top_log files:
                    if (!grep(/$top_log/, @saved_log_files)) {
                        push @saved_log_files, $top_log;
                        #don't delete
                    }
                } ;# foreach my $cmd_mode (@allCommands)

                #add the header of last top output 
                #my $top_header = get_top_header($top_log, $last_timestamp);
                ##########################################
                #CPU hogs found on the node ... collect more data, save logs
                ##########################################
                my $_msg = "CPU hogs found on $node. Collect additional data.";
                print $_msg, "\n" if ($debug);

                my @_dumped_cpu_logs = dump_cpu_logs(\@cpu_logs,
                                                     $node,
                                                     $log_dir,
                                                     $uptime);
                if (!@_dumped_cpu_logs) {
                    $_msg = "Failed to dump_cpu_usage_logs\n";
                    &pam_logger($sys_info, $log_dir, $bucket, $_msg);
                } else {
                    foreach my $d_log (@_dumped_cpu_logs) {
                        push @dumped_cpu_logs, $d_log;
                    }
                }
                ####################################################
                #create log to report
                ####################################################
                foreach my $d_log (@dumped_cpu_logs) {
                    if (!grep(/$d_log/, @saved_log_files)) {
                        push @saved_log_files, $d_log;
                    }
                }
                my $summary_log = 
                          &save_process_event_summary($log_dir,
                                                      \@executed_commands,
                                                      \@saved_log_files,
                                                      $show_tech_executed,
                                                      $show_tech_root,
                                                      $event_type);
                push @saved_log_files, $summary_log if ($summary_log);
                #adding delays to ensure file is saved
                if ( @saved_log_files ) {
                    my $delete_original = 1;
                    my $_junk_proc = "";
                    my $pr_info;
                    $pr_info->{procName} = $_proc;
                    $pr_info->{node} = $node;
                    $pr_info->{event_type} = $event_type;
                    $pr_info->{delete_original} = $delete_original;
                    $pr_info->{showtech_cnt} = $showtech_cnt;
                    my $tar_name = &create_log_archive($log_dir,
                                                       \@saved_log_files,
                                                       \@retained_logs,
                                                       $pr_info);
                    &update_log($cpu_hog_log, $node, $_proc, $cpu_hog_fd);

                }
            } ;# foreach my $cpu_log (@cpu_logs)
            $isSkippedCpuPids->{$node}->{skippedCpuPids} = \@skippedCpuPids;
            $isSkippedCpuPids->{$node}->{skippedCpuProcs} = \@skippedCpuProcs;
        } ;#foreach my $node (@{$perf_stat->{nodeList}})

        #############################################
        #step 3 - check disk
        #############################################
        my %isSkippedMount;
        $event_type = "disk_usage";
        foreach my $node (@{$perf_stat->{nodeList}}) {
            my @saved_log_files = ();
            my @executed_commands = ();
            my $show_tech_executed = 0;
            my @retained_logs = ();
            $showtech_cnt = 0;

            next if (!defined($perf_stat->{$node}->{log}));
            next if (! -f $perf_stat->{$node}->{log});
            my $top_log = $perf_stat->{$node}->{log};
            push @retained_logs, $top_log;

            #only collect last sampling

            my $no_disk_samples = 2; #don't set to 1 or it can lead to memory hog
            my @mem_info_list = get_disk_last_samplings($top_log,
                                                        $no_disk_samples);
            if (scalar(@mem_info_list) < 2) {
                print "Too less disk samplings: ", scalar(@mem_info_list),
                                                              "\n" if ($debug);
                next;
            }
            my $last_mem_info = $mem_info_list[$#mem_info_list];
            $uptime = $last_mem_info->{top}->{up};

            my $fullCount = 0;
            my @FullDisks = ();
            my @skippedDisks = ();
            my %isDiskSeen;
            foreach my $mount (@{$isSkippedDisk->{$node}->{skippedDisks}}) {
                next if ($mount =~ /\/var\/volatile\/|tmp.*iso/);
                next if ($mount =~ /^\/mnt\/pacific/);
                if (!$isDiskSeen{$mount}) {
                    $isDiskSeen{$mount} = 1;
                    push @skippedDisks, $mount;
                }
            }
            foreach my $mount (@{$last_mem_info->{mountList}}) {
                next if ($mount =~ /\/var\/volatile\/|tmp.*iso/);
                if ($last_mem_info->{df}->{$mount}->{usepct} >= 99) {
                    next if ($mount =~ /\/var\/volatile\/|tmp.*iso/);
                    next if ($mount =~ /^\/mnt\/pacific/);
                    if (!$isDiskSeen{$mount}) {
                        push @FullDisks, $mount;
                        push @skippedDisks, $mount;
                        $fullCount++;
                    } else {
                        my $severity = "alert";
                        $msg = "PAM detected disk ($mount) on $node is full! Please cleanup ASAP to";
                        $msg .= " avoid any fault caused by this.";
                        create_syslog($sys_info, $msg, $severity);
                    }
                }
            }
            my %isExecuted;
            if ($fullCount) {
                my $cmd_info = get_default_commands($event_type, $osType);
                my @allCommands = @{$cmd_info->{$cmd_type}->{allCommands}};
                foreach my $cmd_mode (@allCommands) {
                    my ($cmd, $junk) = split(/,/, $cmd_mode);
                    next if ($isExecuted{$cmd_mode});
                    if (!$isExecuted{$cmd_mode}) {
                        $isExecuted{$cmd_mode} = 1;
                    }
                    if ($cmd =~ /show.*tech/i) {
                        $show_tech_executed++;
                        $showtech_cnt++;
                        sleep 1;
                    }
                    #TOOO - HACK ..
                    $node = "0_RP0_CPU0" if ($sys_info->{is_thinxr});
                    my $cli_info = cli_agent_shell($sys_info,
                                                   $event_type,
                                                   "",
                                                   "",
                                                   $cmd,
                                                   $node);
                    if ($cli_info->{rc} ne 1) {
                        my $msg = $cli_info->{msg};
                        &pam_logger($sys_info, $log_dir, $bucket, $msg);
                    } else {
                        push @executed_commands, $cli_info->{cmd};
                        foreach my $_log (@{$cli_info->{log_files}}) {
                            push @saved_log_files, $_log;
                        }
                    }
                } ;# foreach my $cmd_mode (@allCommands)

                print "Disk full on $node\n" if ($debug);
                my $log = save_disk_logs($last_mem_info,
                                         $node,
                                         $log_dir,
                                         $uptime);
                push @saved_log_files, $log;
            }
            if ( @saved_log_files ) {
                my $delete_original = 1;
                my $_junk_proc = "";
                my $pr_info;
                $pr_info->{procName} = $_junk_proc;
                $pr_info->{fullDiskList} = \@FullDisks;
                $pr_info->{node} = $node;
                $pr_info->{event_type} = $event_type;
                $pr_info->{delete_original} = $delete_original;
                $pr_info->{showtech_cnt} = $showtech_cnt;
                my $tar_name = &create_log_archive($log_dir,
                                                   \@saved_log_files,
                                                   \@retained_logs,
                                                   $pr_info);
                my $disk_list = "";
                foreach my $_disk (@FullDisks) {
                    $disk_list .= ":" . $_disk,
                }
                $disk_list =~ s/^://;
                &update_log($disk_usage_log, $node, $disk_list, $disk_usage_fd);
            }
            $isSkippedDisk->{$node}->{skippedDisks} = \@skippedDisks;
        } ;#foreach my $node (@{$perf_stat->{nodeList}})
        sleep $refresh_interval;
    } ;# while (1)
    print "Fail to fork\n";
} ;# unless (fork)

sub INT_pid_handler {
    print "KILL signal received. Closing all files.\n";
    unlink $pid_file if (-f $pid_file);
    foreach my $_fd ($mem_fd, $cpu_hog_fd, $disk_usage_fd) {
        close($_fd) if (defined($_fd));
    }
    exit(0);
}

######################################
#collect top data
######################################
sub collect_top ($$$$) {
    my $sys_info = shift;
    my $node_info = shift;
    my $root_folder = shift || "/misc/disk1/cisco/cpu_usage/";
    my $chassis_id = shift || 0;

    my $vf1_3073_ip = $sys_info->{vf1_3073_ip};

    ##########################
    #rebooted
    ##########################
    my $min_uptime = 3600; #if already up more than 1 hour, should not archive

    my $perf_stat;
    $perf_stat->{rc} = 0;
    my @nodeList = ();
    my @logList = ();
    $perf_stat->{nodeList} = \@nodeList;
    $perf_stat->{logList} = \@logList;
    my $osType = $sys_info->{hostType};
    my $platform = $sys_info->{platform};
    my $chvrf = "";
    if ( ($osType =~ /calv/i) &&
         ($platform =~ /(ncs(4|6)k|panini|scapa)/i) ) {
        $chvrf = "/sbin/chvrf 0 ";
    }

    ###################################
    #update top/df output
    ###################################
    my $ssh_o = "-q -o UserKnownHostsFile=/dev/null ";
    $ssh_o .= "-o StrictHostKeyChecking=no ";
    my $localtime = localtime();
    my $top_tracker;
    my %isSeen;

    #date pattern:
    my $wday = '(Mon|Tue|Wed|Thu|Fri|Sat|Sun)';
    my $month = '(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)';
    my $date_pattern = "\\s*$wday\\s+$month\\s+(\\d+)\\s+";
    $date_pattern .= "(\\d{2}:\\d{2}:\\d{2})\\s+(\\S+\\s+)*(\\d{4})";

    my $top_bin = "/usr/bin/top";
    if ((! -f "/usr/bin/top") && (-f "/bin/top")) {
        $top_bin = "/bin/top";
    }

    foreach my $node (@{$node_info->{nodeList}}) {
        my $ip = $node_info->{$node}->{IP};
        #next if ($node =~ /R[S]?P\d+\/VM2/);
        next if (($platform =~ /nsc4k|scapa/i) && ($node =~ /VM2/));
        next if (($osType =~ /calv|sysadm/i) && (!defined($ip) || ($ip !~ /\.1$/)));

        $node =~ s/\//_/g;
        my $log = $log_dir . "/" . $osType . "-" . $node . ".log";

        my ($output1, $output2);
        #####################################
        #TODO - lindt doesn't allow ssh as sshd only runs on global-vrf
        #i.e., how to handle distributed ???
        if ($sys_info->{is_thinxr}) {
            $output1 = `$top_bin -b -n1`;
            $output2 = `/bin/df -kl`;
        } else {
            $output1 = `$chvrf /usr/bin/ssh $ssh_o $ip "$top_bin -b -n1"`;
            $output2 = `$chvrf /usr/bin/ssh $ssh_o $ip "/bin/df -kl"`;
        }
        $top_tracker->{$node}->{output} = $output1 . $output2;

        my $_uptime = 0;

        my $up = (split(/\n/, $output1))[0];
        if ( $up =~ /up\s+((\d+)\s+day[s]?,\s+)*(\d+):(\d+),\s+.*load\s+average/) {
             my $day = $2 || 0;
             my $hour = $3;
             my $min = $4;
             $_uptime = $day * 24 * 60 + $hour * 60 + $min;
        } elsif ($up =~ /up\s+((\d+) +day[s]?, +)*(\d+) +min.*load\s+average/) {
            my $day = $2 || 0;
            my $min = $3 || 0;
            $_uptime = 24 * 60 * $day + $min;
        }
        #convert into seconds:
        $_uptime *= 60;
        $perf_stat->{$node}->{uptime} = $_uptime;

        if (!$isSeen{$node}) {
            push @nodeList, $node;
            $isSeen{$node} = 1;
            $perf_stat->{ip}->{$node} = $ip;
        }
        my $last_uptime = 0;
        my $uptime_file=$log_dir . "/" . $osType . "-" . $node . "-uptime.txt";
        if ( -f $uptime_file) {
            if (open(FD,$uptime_file)) {
                $last_uptime = <FD>;
                $last_uptime =~ s/[\r\n]//g;
            }
            close(FD);
        }
        $perf_stat->{$node}->{last_uptime} = $last_uptime;

        ############################################
        #Archive old log if node has been reloaded
        ############################################
        if (($do_data_collect) &&
            ($last_uptime >= $perf_stat->{$node}->{uptime})) {
            if (-f $log) {
                my $new_log = new Logfile::Rotate(File => $log,
                                               Count   => $log_rotation_count,
                                               Gzip    => 'lib',
                                               Flock   => 'yes',
                                               Persist => 'yes',
                                               );
                #unlink $log;
                $new_log->rotate();
                undef $new_log;

                my $baselog = basename($log);
                my @_rotatedLogs = ($baselog);
                if ($osType =~ /xr/i) {
                    $ret = rotate_ltraces($ltrace_depot_dir,
                                          \@_rotatedLogs,
                                          $log_rotation_count);
                }
            }
        }
        ############################################

        #update uptime again
        if ($do_data_collect) {
            if (open(WD,">$uptime_file")) {
                print WD $perf_stat->{$node}->{uptime};
                close(WD);
            }
        }

        ##########################
        #Collect ltrace:
        ##########################
        if (($osType =~ /xr/i) && ($do_data_collect)) {
            my $ltrace_cmd = "/pkg/opt/cisco/pam/collect_ltrace.pl";
            if (!$sys_info->{is_thinxr}) {
                $ltrace_cmd = "$chvrf /usr/bin/ssh $ssh_o $ip $ltrace_cmd";
            }
            my $ltrace_output = `$ltrace_cmd`;
            my $timestamp = $localtime;
            if ($timestamp =~ /^$date_pattern/) {
                my $_month = $2;
                my $_day = $3;
                my $_time = $4;
                my $_year = $6;
                $timestamp = $_year . "/" . $_month . "/" . $_day . "/" . $_time;
                #Sat Aug 13 21:05:51 2016
                #2016/Aug/13/21:05:51
            }
            #TOOO - HACK ..
            $node = "0_RP0_CPU0" if ($sys_info->{is_thinxr});
            my $ret = update_ltrace($ltrace_output,
                                    $node,
                                    $ltrace_depot_dir,
                                    $timestamp);
            if (!$ret) {
                my $msg = "Unable to update ltrace data on $node.";
                my $severity = "warning";
                create_syslog($sys_info, $msg, $severity);
            }
        }

    } ;# foreach my $node (@{$node_info->{nodeList}})

    foreach my $node (@nodeList) {
        $node =~ s/\//_/g;
        my $log = $log_dir . "/" . $osType . "-" . $node . ".log";
        if ($do_data_collect) {
            my $output = $top_tracker->{$node}->{output};
            if (!open(TOP_LOG, ">>$log")) {
                print "Failed to open $log: $!\n";
                close (TOP_LOG);
                next;
            }
            if (!flock(TOP_LOG, 2)) {
                print "Failed to lock $log: $!\n";
                close (TOP_LOG);
                next;
            }
            print TOP_LOG $localtime, "\n";
            if (defined($perf_stat->{$node}->{uptime})) {
                print TOP_LOG "Uptime: ", $perf_stat->{$node}->{uptime}, "\n";
            } else {
                print TOP_LOG "Uptime:\n";
            }
            print TOP_LOG $output;
            flock(TOP_LOG, 8);
            close(TOP_LOG);
            chmod (0777, $log);
        }

        my $master_xr_ip = $sys_info->{master_xr_ip};
        $perf_stat->{$node}->{log} = $log;
    } ;# foreach my $node (@nodeList)

    ###################################
    #update cli history
    ###################################
    if ($do_data_collect) {
        my $cli_last_timestamp = $cli_data->{last_timestamp};
        my $cli_last_mtime = $cli_data->{last_mtime};
        my $new_cli_data;
        if ($osType =~ /xr/i) {
            my @xr_history_files = &mount_xr_cli_history($sys_info);
            $new_cli_data = &get_xr_command_history(\@xr_history_files,
                                                     $cli_last_timestamp,
                                                     $cli_last_mtime);
            &umount_xr_cli_history($sys_info);
        } elsif ( ($osType =~ /calv/i) && 
                       ($platform !~ /fretta|asr9k|skywarp|ncs5/i) ) {
            my @confd_audit_logs = &mount_var_log($node_info);
            $new_cli_data = &get_calv_command_history(\@confd_audit_logs,
                                                       $cli_last_timestamp,
                                                       $cli_last_mtime);
            &umount_var_log($node_info);
        }
    
        my $newEvent = 0;
        if (!$cli_last_timestamp) {
            $newEvent = 1;
        }
        my $cli_list = "";
        my $new_last_timestamp = "";
        foreach my $timestamp (@{$new_cli_data->{timestamps}}) {
            if ($cli_last_timestamp ne "") {
                if ( $cli_last_timestamp eq $timestamp ) {
                    $newEvent = 1;
                    next;
                }
            }
            if ($newEvent) {
                my $mode = $new_cli_data->{$timestamp}->{mode};
                $cli_list .= $timestamp . ":" . $mode;
                $cli_list .= ":" . $new_cli_data->{$timestamp}->{cli} . "\n";
            }
        }
        #still nothing - all new (e.g., there is a GAP):
        if (!$newEvent) {
            foreach my $timestamp (@{$new_cli_data->{timestamps}}) {
                my $mode = $new_cli_data->{$timestamp}->{mode};
                $cli_list .= $timestamp;
                my $cli_option = "";
                if ($osType =~ /xr/i) {
                    my $user = $new_cli_data->{$timestamp}->{user};
                    my $tty = $new_cli_data->{$timestamp}->{tty};
                    my $tty_source = $new_cli_data->{$timestamp}->{tty_source};
                    $cli_option = "(" . $user . "," . $tty . ",";
                    $cli_option .= $tty_source . "," . $mode . "):";
                } else {
                    $cli_option = ":" . $mode . ":";
                }
                $cli_list .= $cli_option . $new_cli_data->{$timestamp}->{cli} ."\n";
            }
        }
    
        if ( $cli_list ne "" ) {
            $cli_last_timestamp = $new_cli_data->{last_timestamp};
            my $dst_clihistory_dir = $log_dir . "/clihistory";
            &createFolder($dst_clihistory_dir) if (! -d $dst_clihistory_dir);
            my $dst_clihistory = $dst_clihistory_dir ."/". $osType ."-clihistory";
            if (open(HD, ">>$dst_clihistory")) {
                print HD "PAM_PAM_PAM\n";
                print HD $localtime, "\n";
                print HD $cli_list;
                print HD "last_timestamp: $cli_last_timestamp\n";
            }
            close(HD);
            chmod (0777, $dst_clihistory) if (-f $dst_clihistory);
            $cli_data = $new_cli_data;
        }
    } ;# if ($do_data_collect)

    $perf_stat->{nodeList} = \@nodeList;
    $perf_stat->{rc} = 1;
    return $perf_stat;
} ;#sub collect_top ($$$$)

sub mount_var_log($) {
    my $node_info = shift;

    my $vf1_3073_ip = $sys_info->{vf1_3073_ip};
    my @confd_audit_logs = ();
    return @confd_audit_logs if ($sys_info->{boardtype} !~ /R[S]?P|SC|FC|CC/i);
    if ( -f "/var/log/confd_audit.log" ) {
        @confd_audit_logs = ("/var/log/confd_audit.log");
    }
    my $remote_mount_dir = "/var/log";
    if (-f "/opt/cisco/calvados/bin/pam_show_node") {
        $node_info = get_calvados_pam_show_sdr($sys_info);
    } else {
        $node_info = get_calvados_sdr(0);
    }
    foreach my $node (@{$node_info->{nodeList}}) {
        #TODO - support FC/SC
        next if ($node !~ /R[S]?P|SC|FC|B\d+\/CB\d+/i);
        next if (($platform =~ /nsc4k|scapa/i) && ($node =~ /VM2/));
        #this is calvados IP already:
        my $ip = $node_info->{$node}->{IP};
        $ip =~ s/(.*).\d+$/$1\.1/;
        next if ($ip eq $vf1_3073_ip);

        # need to mount remote /var/log
        my $local_mount_dir = "/opt/cisco/pam/${ip}/var/log";
        my $confd_audit_log = $local_mount_dir . "/" . "confd_audit.log";
        $ret = &createFolder($local_mount_dir);
        if (!$ret ) {
            print "Failed to create folder: $local_mount_dir.\n";
            next;
        }
        $ret = &check_stale_sshfs_mount($local_mount_dir, $remote_mount_dir);
        if (!$ret) {
            $ret = &do_sshfs_mount($remote_mount_dir,
                                   $ip,
                                   $local_mount_dir,
                                   "root");
        }
        if ( ($ret) && (-f $confd_audit_log) ) {
            push @confd_audit_logs, $confd_audit_log;
        }
    }
    return @confd_audit_logs;
} ;#sub mount_var_log($)

sub umount_var_log($) {
    my $node_info = shift;
    my $vf1_3073_ip = $sys_info->{vf1_3073_ip};
    my $remote_mount_dir = "/var/log";
    if (-f "/opt/cisco/calvados/bin/pam_show_node") {
        $node_info = get_calvados_pam_show_sdr($sys_info);
    } else {
        $node_info = get_calvados_sdr(0);
    }
    foreach my $node (@{$node_info->{nodeList}}) {
        next if ($node !~ /R[S]?P|SC|FC|B\d+\/CB\d+/i);
        #this is calvados IP already:
        my $ip = $node_info->{$node}->{IP};
        $ip =~ s/(.*).\d+$/$1\.1/;
        next if ($ip eq $vf1_3073_ip);
        my $local_mount_dir = "/opt/cisco/pam/${ip}/var/log";
        $ret = &umount_sshfs($local_mount_dir);
    }
    return 1;
} ;#sub umount_var_log($)

sub mount_xr_cli_history() {
    my $sys_info = shift;

    my $vf1_3073_ip = $sys_info->{vf1_3073_ip};
    my @xr_history_files = ();
    my $remote_mount_dir = "/misc/scratch/clihistory/";
    my $command_file = $remote_mount_dir . "/commands";

    return @xr_history_files if ($sys_info->{boardtype} !~ /R[S]?P|CC/i);
    push @xr_history_files, $command_file if ( -f $command_file );
    if ($sys_info->{is_thinxr}) {
        return @xr_history_files;
    }

    my $base_dir = "/opt/cisco/pam/";
    my $sysdb_info = get_rp_ip_from_sysdb();
    foreach my $node (@{$sysdb_info->{nodeList}}) {
        next if ($node !~ /R[S]?P|B\d+\/CB\d+/i);
        my $ip = $sysdb_info->{$node}->{IP};
        next if ($ip eq $vf1_3073_ip);
        my $local_mount_dir = $base_dir . $ip . "/clihistory/";
        $ret = &createFolder($local_mount_dir);
        if (!$ret ) {
            print "Failed to create folder: $local_mount_dir.\n";
            next;
        }
        my $remote_command_file = $local_mount_dir . "/commands";
        $ret = &check_stale_sshfs_mount($local_mount_dir, $remote_mount_dir);
        if (!$ret) {
            $ret = &do_sshfs_mount($remote_mount_dir,
                                   $ip,
                                   $local_mount_dir,
                                   "root");
        }
        if ( ($ret) && (-f $remote_command_file) ) {
            push @xr_history_files, $remote_command_file;
        }
    }
    return @xr_history_files;
} ;#sub mount_xr_cli_history($)

sub umount_xr_cli_history() {
    my $sys_info = shift;
    if ($sys_info->{is_thinxr}) {
        return 1;
    }
    my $vf1_3073_ip = $sys_info->{vf1_3073_ip};
    my $base_dir = "/opt/cisco/pam/";
    my $sysdb_info = get_rp_ip_from_sysdb();
    foreach my $node (@{$sysdb_info->{nodeList}}) {
        next if ($node !~ /R[S]?P|B\d+\/CB\d+/i);
        my $ip = $sysdb_info->{$node}->{IP};
        next if ($ip eq $vf1_3073_ip);
        my $local_mount_dir = $base_dir . $ip . "/clihistory/";
        $ret = &umount_sshfs($local_mount_dir);
    }
    return 1;
} ;#sub umount_xr_cli_history($)

sub get_clihistory_last_timestamp($) {
    my $dst_clihistory = shift;
    my $last_timestamp = "";
    if ( open(HD, "$dst_clihistory") ) {
        while(my $line = <HD>) {
            $line =~ s/\r//g;
            $line =~ s/\n//g;
            if ( $line =~ /last_timestamp:\s+(\S+)/ ) {
                $last_timestamp = $1;
            }
        }
        close(HD);
    }
    return $last_timestamp;
}

sub get_xr_command_history($$$) {
    my $_command_files = shift;
    my $last_timestamp = shift;
    my $last_mtime = shift;

    my $cli_data;

    my $wday = '(Mon|Tue|Wed|Thu|Fri|Sat|Sun)';
    my $month = '(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)';
    my $date_template = "($wday\\s+$month";
    $date_template .= "\\s+\\d+\\s+\\d{2}:\\d{2}:\\d{2}\\.\\d{3}\\s+";
    $date_template .= "\\d{4}\\s+\\w{3})";

    my $month_map = {
        'Jan' => '01',
        'Feb' => '02',
        'Mar' => '03',
        'Apr' => '04',
        'May' => '05',
        'Jun' => '06',
        'Jul' => '07',
        'Aug' => '08',
        'Sep' => '09',
        'Oct' => '10',
        'Nov' => '11',
        'Dec' => '12',
    };

    #my $cli_data;
    my @timestamps;
    my ($max_count);
    my $_timestamp = $last_timestamp;
    my $_last_timestamp = $_timestamp;
    $cli_data->{timestamps} = \@timestamps;
    $cli_data->{last_timestamp} = $last_timestamp;
    $cli_data->{last_mtime} = $last_mtime;

    my $pattern = '([\-\w]+),(vty\d+|con\d_RS*P\d+_CPU\d+),';
    $pattern .= '(\-|\d+\.\d+\.\d+\.\d+),(exec|config|adminexec|adminconfig)';
    $pattern .= ',' . ${date_template} . ',(.*)';

    my ($user,$tty,$tty_source,$mode,$timestamp,$cli);
    foreach my $history_commands (@$_command_files) {
        if ( (! -f $history_commands) && (-s $history_commands < 10) ) {
            next;
        }
        my ($dev,$ino,$_mode,$nlink,$uid,$gid,
            $rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks)
                                            = stat($history_commands);
        next if ($mtime < $last_mtime);
        $last_mtime = $mtime;
        my $command = "/usr/bin/strings $history_commands";
        if (!open(fd1, "$command |") ) {
            next;
        }
        while (<fd1>) {
            chomp $_;
            if ( /\s*^(\d+),(\d+),(\d+)/ ) {
                $max_count = $1;
                $cli_data->{max_count} = $max_count;
                next;
            }
            if ( /\s*^$pattern/) {
                $user = $1;
                $tty = $2;
                $tty_source = $3;
                $mode = $4;
                $timestamp = $5;
                $cli = $8;
                $timestamp =~ s/\s+/\-/g;

                my ($_wday, $_mon, $_day, $_hmsms, $_year, $_tz) =
                                               (split(/\-/, $timestamp));
                $_mon = $$month_map{$_mon};
                my ($hour, $min, $sec) = (split(/:/, $_hmsms));
                $_day = ($_day =~ /\d{2}/) ? $_day : "0${_day}";
                $_timestamp = $_year . $_mon . $_day . $hour . $min . $sec;

                next if ($_timestamp <= $last_timestamp);
                if ($_timestamp >= $_last_timestamp) {
                    $_last_timestamp = $_timestamp;
                }

                push @timestamps, $timestamp;
                $cli_data->{$timestamp}->{user} = $user;
                $cli_data->{$timestamp}->{tty} = $tty;
                $cli_data->{$timestamp}->{tty_source} = $tty_source;
                $cli_data->{$timestamp}->{mode} = $mode;
                $cli_data->{$timestamp}->{cli} = $cli;
            }
        }
        close(fd1);
    }
    $cli_data->{last_timestamp} = $_last_timestamp;
    $cli_data->{timestamps} = \@timestamps;
    $cli_data->{last_mtime} = $last_mtime;
    return $cli_data;
} ;#sub get_xr_command_history($$$)

sub getTopOpt($) {
    my $top_opt = shift;
    my $_output = `/usr/bin/top -$top_opt -b -n 1`;
    my $rCount = 0;
    my $max_count = 30;
    my $output = "";
    foreach my $line (split (/\n/, $_output)) {
        if ( $rCount <= $max_count ) {
            $output .= $line . "\n";
            $rCount++;
        } else {
            return $output;
        }
    }
    return $output;
}

# TODO !!!!!!!!!!
# collect show memory on that node:
# free -m, etc (and from top output)
sub dump_memory_logs($$$$$$) {
    my $_mem_leaks = shift;
    my $node       = shift;
    my $log_dir    = shift;
    my $uptime     = shift;
    my $sys_info    = shift;
    my $version_content     = shift;

    my $osType = $sys_info->{hostType};
    $node =~ s/\//_/g;
    my $msg = "";
    my @files = ();
    foreach my $mem_leak (@$_mem_leaks) {
        my ($timestamp, $pid, $proc, $total, $delta, $shared);
        $msg = "Process memory usage snapshots on $node.\n";
        $msg .=<<END;
---------------------------- --------------- ------- ------------ ------------
Timestamp                    Process         PID       Total (MB)  Shared (MB)
---------------------------- --------------- ------- ------------ ------------
END
        foreach my $sample (split(/;/, $mem_leak)) {
            ($timestamp, $pid, $proc, $total, $delta, $shared) = 
                                                  split(/,/, $sample);
            $msg .= sprintf("%-29s", $timestamp);
            my $cmd_proc = $proc;
            if (defined($pid_proc_map->{$pid})) {
                $cmd_proc = $pid_proc_map->{$pid};
            }
            $msg .= sprintf("%-16s", $cmd_proc);
            $msg .= sprintf("%-7s", $pid);
            $msg .= sprintf("%13.1f", $total);
            $msg .= sprintf("%13.1f", $shared);
            $msg .= "\n";
        }
        my ($aday, $mon, $day, $hms, $year) = split(/\s+/, localtime());
        $hms =~ s/://g;
        $day = length($day) < 2 ? "0${day}" : $day;
        my $file = $log_dir . "/memory_snapshot-";
        $file .= $proc . "-" . $node . "-" . $year . $mon . $day . $hms .".txt";

        my $show_platform = "";
        if ($osType =~ /xr/i) {
            $show_platform = &show_platform();
        } elsif ($osType =~ /calv|sysadmin/i) {
            $show_platform = &show_platform_calvados();
        }
        if ($show_platform ne "") {
            $msg .= "\n" . "=" x 10 . " show platform " . "=" x 10;
            $msg .= "\n" . $show_platform . "\n";
        }
        $msg .= "\n" . $version_content;
        my $show_install = get_show_install($sys_info);
        if ($show_install =~ /\w+/) {
           $msg .= "\n========== show install active ==========.\n";
           $msg .= $show_install;
           $msg .= "\n";
        }
        $msg .= "\n\n--- router system uptime: $uptime minutes ---\n";
        my $con_msg = "";
        if (open(WD, ">$file")) {
            print WD $msg;
            push @files, $file;
            $con_msg .= "Snapshot saved under $file.\n";
        }
        close(WD);
    } ;# foreach my $mem_leak (@$_mem_leaks)
    return @files;
} ;# sub dump_memory_logs$($$$)

sub dump_cpu_logs($$$$) {
    my $_cpu_logs  = shift;
    my $node       = shift;
    my $log_dir    = shift;
    my $uptime     = shift;

    $node =~ s/\//_/g;
    my $msg = "";
    my @files = ();

    my ($aday, $mon, $day, $hms, $year) = split(/\s+/, localtime());
    $hms =~ s/://g;
    $day = length($day) < 2 ? "0${day}" : $day;
    my $log_timestamp = $year . $mon . $day . $hms;

    foreach my $cpu_log (@$_cpu_logs) {
        $msg = "CPU snapshots on $node.\n";
    $msg .=<<END;
---------------------------- --------------- ------- ------------ ------------
Timestamp                    Process         PID      CPU Use (%)    CPU Time
---------------------------- --------------- ------- ------------ ------------
END
        my ($pid, $proc, $timestamp1, $cpup1, $cput1,
                         $timestamp2, $cpup2, $cput2,
                         $timestamp3, $cpup3, $cput3) = split(/,/, $cpu_log);
        $msg .= sprintf("%-29s", $timestamp1);
        $msg .= sprintf("%-16s", $proc);
        $msg .= sprintf("%-7s", $pid);
        $msg .= sprintf("%13.1f", $cpup1);
        $msg .= sprintf("%13.1f", $cput1);
        $msg .= "\n";
        $msg .= sprintf("%-29s", $timestamp2);
        $msg .= sprintf("%-16s", $proc);
        $msg .= sprintf("%-7s", $pid);
        $msg .= sprintf("%13.1f", $cpup2);
        $msg .= sprintf("%13.1f", $cput2);
        $msg .= "\n";
        $msg .= sprintf("%-29s", $timestamp3);
        $msg .= sprintf("%-16s", $proc);
        $msg .= sprintf("%-7s", $pid);
        $msg .= sprintf("%13.1f", $cpup3);
        $msg .= sprintf("%13.1f", $cput3);
        $msg .= "\n";

        my $file = $log_dir . "/cpu_snapshot-";
        $file .= $proc . "-" . $node . "-" . $log_timestamp . ".txt";

        my $show_platform = "";
        if ($osType =~ /xr/i) {
            $show_platform = &show_platform();
        } elsif ($osType =~ /calv|sysadmin/i) {
            $show_platform = &show_platform_calvados();
        }
        if ($show_platform ne "") {
            $msg .= "\n" . "=" x 10 . " show platform " . "=" x 10;
            $msg .= "\n" . $show_platform . "\n";
        }
        $msg .= "\n" . $version_content;
        my $show_install = get_show_install($sys_info);
        if ($show_install =~ /\w+/) {
           $msg .= "\n========== show install active ==========.\n";
           $msg .= $show_install;
           $msg .= "\n";
        }
        $msg .= "\n\n--- router system uptime: $uptime ---\n";
        my $con_msg = "";
        if (open(WD, ">$file")) {
            print WD $msg;
            push @files, $file;
            $con_msg .= "Snapshot saved under $file.\n";
        }
        close(WD);
    } ;#foreach my $cpu_hog (@$_cpu_logs)
    return @files;
} ;# sub dump_cpu_logs$($$$)

sub save_disk_logs($$$$) {
    my $mem_info   = shift;
    my $node       = shift;
    my $log_dir    = shift;
    my $uptime     = shift;

    $node =~ s/\//_/g;
    my $msg = "Disk is full on $node.\n";
    foreach my $mount (@{$mem_info->{mountList}}) {
        if ($mem_info->{df}->{$mount}->{usepct} >= 99) {
            $msg .= $mount . " is " . $mem_info->{df}->{$mount}->{usepct};
            $msg .= "% full\n";
        }
    }
    my ($aday, $mon, $day, $hms, $year) = split(/\s+/, localtime());
    $hms =~ s/://g;
    $day = length($day) < 2 ? "0${day}" : $day;
    my $file = $log_dir . "/disk_snapshot-" . $node . "-";
    $file .= $year . $mon . $day . $hms . ".txt";

    my $show_platform = "";
    if ($osType =~ /xr/i) {
        $show_platform = &show_platform();
    } elsif ($osType =~ /calv|sysadmin/i) {
        $show_platform = &show_platform_calvados();
    }
    if ($show_platform ne "") {
        $msg .= "\n" . "=" x 10 . " show platform " . "=" x 10;
        $msg .= "\n" . $show_platform . "\n";
    }
    $msg .= "\n" . $version_content;
    my $show_install = get_show_install($sys_info);
    if ($show_install =~ /\w+/) {
           $msg .= "\n========== show install active ==========.\n";
           $msg .= $show_install;
           $msg .= "\n";
    }
    $msg .= "\n\n--- router system uptime: $uptime ---\n";
    my $con_msg = "";
    if (open(WD, ">$file")) {
        print WD $msg;
        $con_msg .= "Snapshot saved under $file.\n";
    }
    close(WD);
    return $file;
} ;# sub save_disk_logs($$$)

#################################################
#this should only be executed on standby RP
#################################################
sub clean_stale_logs($$$) {
    my $log_dir = shift;
    my $log_rotation_interval = shift;    #1 days: 1*24*3600
    my $log_rotation_count = shift || 3;  #3 rotations

    my $log_size = 4096;

    if (!opendir(DIR, $log_dir)) {
        closedir(DIR);
        return 1;
    }
    my @logs = readdir(DIR);
    closedir(DIR);
    my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
                                  $atime,$mtime,$ctime,$blksize,$blocks);
    my @logList = ();
    foreach my $log_file (@logs) {
        next if ($log_file !~ /\.log$/);
        my $fullname = $log_dir . "/" . $log_file;
        ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
                 $atime,$mtime,$ctime,$blksize,$blocks) = stat($fullname);

        my $now = time();
        my $duration = $now - $mtime;
        if (($duration > $log_rotation_interval) && ($size > $log_size)) {
            my $log = new Logfile::Rotate(File    => $fullname,
                                          Count   => $log_rotation_count,
                                          Gzip    => 'lib',
                                          Flock   => 'yes',
                                          Persist => 'yes',
                                 );
            $log->rotate();
            undef $log;
            if (open(WD, ">$fullname")) {
                print WD "";
            }
            close(WD);
            push @logList, $log_file;
            my $uptime_file = $fullname;
            $uptime_file =~ s/\.log/\-uptime\.txt/;
            unlink $uptime_file if (-f $uptime_file);
        }
    } ;# foreach my $log_file (@logs)
    return @logList;
} ;# sub clean_stale_logs($$$)

#################################################
# cleanup existing ltrace (smaps) statistics
# it will be re-collected to keep synced with top data
#################################################
sub rotate_ltraces($$$) {
    my $ltrace_depot_dir   = shift;
    my $_rotatedLogs       = shift;
    my $log_rotation_count = shift || 3;
    my @rotatedLogs = @$_rotatedLogs;

    foreach my $log (@rotatedLogs) {
        next if ($log !~ /\.log$/);
        $log =~ s/\.log//;
        $log =~ s/^(xr|sysadmin|calvados)-//;
        my $node_dir = $ltrace_depot_dir . "/" . $log;
        my $tar_name = $log . "-ltrace.tar";
        my $full_tar_name = $ltrace_depot_dir . $tar_name;
        if (-d $node_dir) {
            my $tar_cmd = "cd $ltrace_depot_dir && /bin/tar czf $tar_name $log";
            if (system("$tar_cmd")) {
                my $msg = "PAM: Unable to create tar file ($tar_name): $!";
                $msg .= "\ntar command: " . $tar_cmd;
                my $_sys_info = &getOsType();
                my $severity = "warning";
                create_syslog($_sys_info, $msg, $severity);
            } else {
                my $_log = new Logfile::Rotate(File    => $full_tar_name,
                                               Count   => $log_rotation_count,
                                               Gzip    => 'lib',
                                               Flock   => 'yes',
                                               Persist => 'yes',
                                               );
                $_log->rotate();
            }
            #cleanup existing files
            while(my $nextname = <$node_dir/*>) {
                unlink $nextname;
            }
        }
    }
    return 1;
} ;# sub rotate_ltraces($$)

#################################################
# rotate clihistory file
#################################################
sub rotate_clihistory($$$) {
    my $clihistory_dir          = shift;
    my $log_rotation_size_limit = shift || 2048000; #2MB
    my $log_rotation_count      = shift || 1;

    return 1 if (! -d $clihistory_dir);
    return 1 if (!opendir(DIR, $clihistory_dir));
    my @files = readdir(DIR);
    foreach my $file (@files) {
        next if ($file !~ /history$/);
        my $full_filename = $clihistory_dir . "/" . $file;
        my $size = -s $full_filename;
        next if ($size < $log_rotation_size_limit);
        my $_log = new Logfile::Rotate(File    => $full_filename,
                                       Count   => $log_rotation_count,
                                       Gzip    => 'lib',
                                       Flock   => 'yes',
                                       Persist => 'yes',
                                       );
           $_log->rotate();
    }
    return 1;
} ;# sub rotate_clihistory($$$)


sub get_false_leak_proc_info($$) {
    my $sys_info   = shift;
    my $input_file =shift;
    my $platform = $sys_info->{platform},
    my $osType = $sys_info->{hostType};
    my $dec;
    if(!open(FD, $input_file)) {
        return $dec;
    }
    my $output = "";
    while (my $line = <FD>) {
        $line =~ s/[\r\n]//g;
        $output .= $line;
    }
    close(FD);
    $dec = decode_json $output;
    if ($dec->{$osType}) {
        return $dec->{$osType};
    } else {
        return $dec;
    }
}


######################################
#perform perl install on LC on panini
######################################
sub do_perl_install ($$) {
    my $node_info = shift;
    my $sys_info = shift;

    my $vf1_3073_ip = $sys_info->{vf1_3073_ip};

    my $min_uptime = 3600; #if already up more than 1 hour, should not archive

    my @nodeList = ();
    my $osType = $sys_info->{hostType};
    my $platform = $sys_info->{platform};
    my $chvrf = "";
    if ( ($osType =~ /calv/i) &&
         ($platform =~ /(ncs(4|6)k|panini|scapa)/i) ) {
        $chvrf = "/sbin/chvrf 0 ";
    }

    my $ssh_o = "-q -o UserKnownHostsFile=/dev/null ";
    $ssh_o .= "-o StrictHostKeyChecking=no ";
    my  $start_pam = "/pkg/opt/cisco/pam/start_pam.sh";
    if ( $osType =~ /calv|admin/i) {
        $start_pam = "/opt/cisco/calvados/pam/start_pam.sh";
    }

    my $ok_cnt = 0;
    foreach my $node (@{$node_info->{nodeList}}) {
        my $ip = $node_info->{$node}->{IP};
        next if ($node =~ /R[S]?P\d+/);
        next if (($platform =~ /nsc4k|scapa/i) && ($node =~ /VM2/));
        my $output = `$chvrf /usr/bin/ssh $ssh_o $ip "ls -l /usr/bin/perl"`;
        if ($output !~ /\/usr\/bin\/perl/) {
            `$chvrf /usr/bin/ssh $ssh_o $ip "$start_pam &"`;
            sleep 10;
            $output = `$chvrf /usr/bin/ssh $ssh_o $ip "ls -l /usr/bin/perl"`;
            if ($output =~ /\/usr\/bin\/perl/) {
                $ok_cnt++;
            }
        } else {
            $ok_cnt++;
        }
        push @nodeList, $node;
    } ;# foreach my $node (@{$node_info->{nodeList}})
    if ($ok_cnt eq scalar(@nodeList)) {
        return 1;
    }
    return 0;
} ;#sub do_perl_install ($$)


######################################
#check if perl is installed as rpm
######################################
sub check_perl_rpm() {
    my $rpm_cmd = "" ;
    if ( -f "/bin/rpm" ) {
        $rpm_cmd = "/bin/rpm" ;
    } elsif ( -f "/usr/bin/rpm" ) {
        $rpm_cmd = "/usr/bin/rpm" ;
    } else {
       return 0;
    }
    my $output = `$rpm_cmd -q perl`;
    if ($output =~ /^perl/) {
        return 1;
    }
    return 0;
}
