#!/bin/bash
#
# collect_mem_usage.sh - Simple script to collect a snapshot of the 
#                        memory usage.
#
# Check usage() below for available options
#
# Copyright (c) 2015 by cisco Systems, Inc.
# All rights reserved.
#


MYDIR="/misc/scratch/working_dir.$$"
BOOTSTRAP_FILE="/etc/init.d/calvados_bootstrap.cfg"
if [[ ! -f ${BOOTSTRAP_FILE} ]]; then
    echo "File: ${BOOTSTRAP_FILE} does not exist. Exiting..."
    exit 1
fi
source /etc/init.d/calvados_bootstrap.cfg

#
# HostOS
CGROUP_PATHS[0]="/dev/cgroup/memory/tasks"
if [[ "$PLATFORM" = "xrv9k" ]]; then
    CGROUP_PATHS[1]="/dev/cgroup/memory/top_spirit/tasks"
fi

#
# All LXC's. At present we support only sysadmin & XR.
# Make this dynamic to support UVF and other thirdparty App's
# Execute a virsh list to get all active LXC's and only active
# LXC's have cgroup directory created.
CGROUP_PATHS[2]="/dev/cgroup/memory/machine/sysadmin.libvirt-lxc/tasks"
CGROUP_PATHS[3]="/dev/cgroup/memory/machine/default-sdr--1.libvirt-lxc/tasks"

REPO_PATH[0]="${MYDIR}/proc/hostOS"
REPO_PATH[1]="${MYDIR}/proc/hostOS"
REPO_PATH[2]="${MYDIR}/proc/sysadmin"
REPO_PATH[3]="${MYDIR}/proc/xr"

KERNEL_PIDS=0
USER_PIDS=0
DYNAMIC=1
BASIC_ANALYSIS=0

LOGFILE="${MYDIR}/proc/logfile"

#
# peekinto_process
#
# Given a PID, parse the SMAPS & MAPS and collect info per memory section and 
# pass it back to the caller
#
# Look at the footer of this function, for more info on the returned values.
#
function peekinto_process()
{
    local pid_number=$1

    local __pid_name

    local __priv_clean=0
    local __priv_dirty=0
    local __shared_clean=0
    local __shared_dirty=0

    local __priv_stack=0
    local __shared_stack=0
    local __priv_heap=0
    local __shared_heap=0

    local __anon_priv_data=0
    local __anon_shared_data=0

    local __shmem_priv=0
    local __shmem_shared=0

    local __shmem_ltrace_priv=0
    local __shmem_ltrace_shared=0
    local __shmem_ltrace_heap_priv=0
    local __shmem_ltrace_heap_shared=0

    if [[ $DYNAMIC -ne 0 ]]; then
        local smaps="${REPO_PATH[$index]}"/"$pid_number"_smaps
        local maps="${REPO_PATH[$index]}"/"$pid_number"_maps
    else
        local smaps=/tmp/"$pid_number"_smaps
        local maps=/tmp/"$pid_number"_maps

        # Create a copy, so we don't have to query the /proc for every cat
        rm -f $smaps
        rm -f $maps
        cat /proc/"$pid_number"/smaps > $smaps
        cat /proc/"$pid_number"/maps > $maps
    fi

    if [[ ! -f "$smaps" || ! -f "$maps" ]]; then
        return 1
    fi

    # Get the Process Name
    if [[ $DYNAMIC -ne 0 ]]; then
        __pid_name=`cat ${REPO_PATH[$index]}/"$pid_number"_status | \
                                    grep Name | awk '{ print $2}' 2>/dev/null`
    else
        __pid_name=`cat /proc/"$pid_number"/status | \
                                    grep Name | awk '{ print $2}' 2>/dev/null`
    fi

    if [[ ! -n $__pid_name ]]; then
        __pid_name="-"
    fi

    # PRIVATE CLEAN
    __priv_clean=`grep "^Private_Clean" $smaps | awk '{s+=$2}END{print s}'`
    # PRIVATE DIRTY
    __priv_dirty=`grep "^Private_Dirty" $smaps | awk '{s+=$2}END{print s}'`
    # SHARED CLEAN
    __shared_clean=`grep "^Shared_Clean" $smaps | awk '{s+=$2}END{print s}'`
    # SHARED DIRTY
    __shared_dirty=`grep "^Shared_Dirty" $smaps | awk '{s+=$2}END{print s}'`

    if [[ $BASIC_ANALYSIS -ne 1 ]]; then
        # HEAP
        __priv_heap=`sed -n '/heap/,/VmFlags/p' $smaps | \
                        grep "^Private" | awk '{s+=$2}END{print s}'`

        __shared_heap=`sed -n '/heap/,/VmFlags/p' $smaps | \
                        grep "^Shared" | awk '{s+=$2}END{print s}'`
        # STACK
        __priv_stack=`sed -n '/stack/,/VmFlags/p' $smaps | \
                        grep "^Private" | awk '{s+=$2}END{print s}'`

        __shared_stack=`sed -n '/stack/,/VmFlags/p' $smaps | \
                        grep "^Shared" | awk '{s+=$2}END{print s}'`

        # ANONYMOUS PAGES
        # Pages that are not backed by files, will have the 6th column to be
        # empty. Look up those virtual address range in maps file and get the 
        # associated data from the smaps file.
        for line in `awk ' { if ($6 == "") print $1 }' $maps`
        do
            temp_data=`sed -n "/"$line"/,/VmFlags/p" $smaps | \
                        grep "^Private" | awk '{s+=$2}END{print s}'`
            __anon_priv_data=$(($__anon_priv_data + $temp_data))

            temp_data=`sed -n "/"$line"/,/VmFlags/p" $smaps | \
                        grep "^Shared" | awk '{s+=$2}END{print s}'`
            __anon_shared_data=$(($__anon_shared_data + $temp_data))
        done

        ######## Shared Memory info for detailed stats ########
        for line in `grep "/dev/shm" $maps | awk ' { print $1 }'`
        do
            temp_data=`sed -n "/"$line"/,/VmFlags/p" $smaps | \
                        grep "^Private" | awk '{s+=$2}END{print s}'`
            __shmem_priv=$(($__shmem_priv + $temp_data))

            temp_data=`sed -n "/"$line"/,/VmFlags/p" $smaps | \
                        grep "^Shared" | awk '{s+=$2}END{print s}'`
            __shmem_shared=$(($__shmem_shared + $temp_data))
        done

        for line in `grep -E "/dev/shm.*ltrace" $maps | awk ' { print $1 }'`
        do
            temp_data=`sed -n "/"$line"/,/VmFlags/p" $smaps | \
                        grep "^Private" | awk '{s+=$2}END{print s}'`
            __shmem_ltrace_priv=$(($__shmem_ltrace_priv + $temp_data))

            temp_data=`sed -n "/"$line"/,/VmFlags/p" $smaps | \
                        grep "^Shared" | awk '{s+=$2}END{print s}'`
            __shmem_ltrace_shared=$(($__shmem_ltrace_shared + $temp_data))
        done

        for line in `grep -E "/dev/shm.*ltrace.*heap" $maps | \
                                            awk ' { print $1 }'`
        do
            temp_data=`sed -n "/"$line"/,/VmFlags/p" $smaps | \
                        grep "^Private" | awk '{s+=$2}END{print s}'`
            __shmem_ltrace_heap_priv=$(($__shmem_ltrace_heap_priv \
                                                        + $temp_data))

            temp_data=`sed -n "/"$line"/,/VmFlags/p" $smaps | \
                        grep "^Shared" | awk '{s+=$2}END{print s}'`
            __shmem_ltrace_heap_shared=$(($_shmem_ltrace_heap_shared 
                                                        + $temp_data))
        done
    fi

    eval $2="'$__pid_name'"                     # Process Name
    eval $3="'$__priv_clean'"                   # Total Private pages clean
    eval $4="'$__priv_dirty'"                   # Total Private pages dirty
    eval $5="'$__shared_clean'"                 # Total Shared pages clean
    eval $6="'$__shared_dirty'"                 # Total Shared pages dirty
    eval $7="'$__priv_stack'"                   # Stack Private pages 
    eval $8="'$__priv_heap'"                    # Heap Private pages
    eval $9="'$__shared_stack'"                 # Stack Shared pages
    eval ${10}="'$__shared_heap'"               # Stack Private pages
    eval ${11}="'$__anon_priv_data'"            # Anonymous Private pages
    eval ${12}="'$__anon_shared_data'"          # Anonymous Shared pages
    eval ${13}="'$__shmem_priv'"                # /dev/shm Private pages
    eval ${14}="'$__shmem_shared'"              # /dev/shm Shared pages
    eval ${15}="'$__shmem_ltrace_priv'"         # ltrace in shmem Private pages
    eval ${16}="'$__shmem_ltrace_shared'"       # ltrace in shmem Shared pages
    eval ${17}="'$__shmem_ltrace_heap_priv'"    # Heap in ltrace Private pages
    eval ${18}="'$__shmem_ltrace_heap_shared'"  # Heap in ltrace Shared pages
}

#
# munch_snapshot
#
function munch_snapshot()
{
    local proc_dir=$1
    local total_uss=0

    local sort_file="/tmp/sort_file"
    rm -f "$sort_file"

    local pid=0
    for pid in `ls $proc_dir | cut -d'_' -f 1 | sort | uniq`
    do
        peekinto_process $pid pid_name \
                              priv_clean \
                              priv_dirty \
                              shared_clean \
                              shared_dirty \
                              priv_stack \
                              priv_heap \
                              shared_stack \
                              shared_heap \
                              anon_priv_data \
                              anon_shared_data \
                              shmem_priv \
                              shmem_shared \
                              shmem_ltrace_priv \
                              shmem_ltrace_shared \
                              shmem_ltrace_heap_priv \
                              shmem_ltrace_heap_shared 
        if [[ $? -ne 0 ]]; then
            continue
        fi

        pid_name_str="${pid_name}(${pid})"
        if [[ $BASIC_ANALYSIS -eq 1 ]]; then
            total_priv=$(($priv_clean + $priv_dirty))
            total_shared=$(($shared_clean + $shared_dirty))

            # Dump contents for sorting.
            printf "%30s %10d %10d \n"  "$pid_name_str" \
                                        "$total_priv" \
                                        "$total_shared" >> ${sort_file}
        else 
            total_priv=$(($priv_clean + $priv_dirty))
            total_shared=$(($shared_clean + $shared_dirty))
            total_data=$(($priv_stack + $priv_heap + $anon_priv_data))
            total_anon=$(($anon_priv_data + $anon_shared_data))

            # Dump contents for sorting.
            printf "%23s %10d %10d %10d %10d %10d\n" \
	                                "$pid_name_str" \
                                        "$total_priv" \
                                        "$total_data" \
                                        "$total_anon" \
                                        "$shmem_priv" \
                                        "$shmem_shared" >> ${sort_file}
        fi

        total_uss=$(($total_uss + $total_priv))
    done

    # sort and dump it in the logfile
    sort -k 2 -nr ${sort_file} >> ${LOGFILE}
    rm -f "$sort_file"

    echo "USS of all User pids: $total_uss KB" >> ${LOGFILE}
}
readonly -f munch_snapshot

#
# process_snapshot_basic
#
function process_snapshot_basic()
{
    echo -n "Processing snapshot..."
    local index=0

    for index in ${!REPO_PATH[@]}
    do
        done_marker="${REPO_PATH[$index]}/dir.done"
        if [[ ! -d ${REPO_PATH[$index]} ]]; then
            continue
        fi

        if [[ -f $done_marker ]]; then
            continue
        fi

        echo "Processing directory... " >> ${LOGFILE}
        echo "    ${REPO_PATH[$index]}" >> ${LOGFILE}
        echo -n "===========================================" >> ${LOGFILE}
        echo "==========" >> ${LOGFILE}
        printf "%30s %10s %10s \n" \
	       "PID_NAME(PID)" "PRIVATE" "SHARED" >> ${LOGFILE}
        echo -n "===========================================" >> ${LOGFILE}
        echo "==========" >> ${LOGFILE}
        
        munch_snapshot "${REPO_PATH[$index]}"
        touch $done_marker

        echo -n "===========================================" >> ${LOGFILE}
        echo "==========" >> ${LOGFILE}
        echo "" >> ${LOGFILE}
    done
    echo "DONE"
    return 0
}
readonly -f process_snapshot_basic

#
# process_snapshot_detailed
#
function process_snapshot_detailed()
{
    echo -n "Processing snapshot..."
    local index=0

    for index in ${!REPO_PATH[@]}
    do
        done_marker="${REPO_PATH[$index]}/dir.done"
        if [[ ! -d ${REPO_PATH[$index]} ]]; then
            continue
        fi

        if [[ -f $done_marker ]]; then
            continue
        fi

        echo "Processing directory... ${REPO_PATH[$index]}" >> ${LOGFILE}
        echo -n "===============================================" >> ${LOGFILE}
        echo "===================================" >> ${LOGFILE}
        echo -n " TOTAL     : Total private memory consumed by" >> ${LOGFILE}
        echo " this process" >> ${LOGFILE}
        echo -n "             This includes the private memory" >> ${LOGFILE} 
        echo " used in the shared libs" >> ${LOGFILE}
        echo "" >> ${LOGFILE} 
        echo "####" >> ${LOGFILE} 
        echo "# HEAP/STACK: Heap & Stack of the process " >> ${LOGFILE}
        echo -n "# ANON MAP  : Malloc's that are big and can't" >> ${LOGFILE} 
        echo " be allocated" >> ${LOGFILE}
        echo "#             from HEAP" >> ${LOGFILE}
        echo -n "# SHMEM(P)  : Shared memory(/dev/shm), that is" >> ${LOGFILE} 
        echo " private to this process" >> ${LOGFILE}
        echo "####" >> ${LOGFILE} 
        echo "" >> ${LOGFILE} 
        echo "####" >> ${LOGFILE} 
        echo -n "# SHMEM(S)  : Shared memory(/dev/shm), that is" >> ${LOGFILE} 
        echo " shared with this process" >> ${LOGFILE}
        echo "####" >> ${LOGFILE} 
        echo "" >> ${LOGFILE} 
        echo -n "===============================================" >> ${LOGFILE}
        echo "===================================" >> ${LOGFILE}
        printf "%23s %10s %10s %10s %10s %10s\n" \
               "PID_NAME(PID)" "TOTAL" "HEAP/STACK" "ANON MAP" \
               "SHMEM(P)" "SHMEM(S)">> ${LOGFILE}
        echo -n "===============================================" >> ${LOGFILE}
        echo "===================================" >> ${LOGFILE}

        munch_snapshot "${REPO_PATH[$index]}"
        touch $done_marker
        echo -n "===============================================" >> ${LOGFILE}
        echo "===================================" >> ${LOGFILE}
        echo "" >> ${LOGFILE}
    done
    echo "DONE"
    return 0
}
readonly -f process_snapshot_detailed

#
# create_snapshot
#
function create_snapshot()
{
    echo -n "Creating snapshot..."

    for index in "${!CGROUP_PATHS[@]}"
    do
        if [[ ! -f ${CGROUP_PATHS[$index]} ]]; then
            echo "${REPO_PATHS[$index]} does not exist, skipping..."
            continue
        fi

        mkdir -p ${REPO_PATH[$index]}
        if [[ $? -ne 0 ]]; then
            echo "Failed to create snapshot directory"
            return 1
        fi

        #
        # Differtiate between kernel thread and userspace processes
        #
        local thread
        for thread in `cat ${CGROUP_PATHS[$index]}`
        do
            smaps_out=`cat /proc/$thread/smaps 2>/dev/null`
            if [[ ! -n $smaps_out ]]; then
                KERNEL_PIDS=$(($KERNEL_PIDS + 1))
            else
                thread_array[$thread]=1
            fi
        done

        #
        # Get only the Thread group ID's
        # Thread share process address space
        #
        local pid
        for pid in "${!thread_array[@]}"
        do
            tgid=`cat /proc/$pid/status 2> /dev/null | grep "Tgid" | \
                                        awk ' { print $2 }' 2>/dev/null`
            pid_array[$tgid]=1
        done

        for user_pid in "${!pid_array[@]}"
        do
            #
            # Ignore transient processes.
            #
            if [[ ! -f /proc/$user_pid/status ]]; then
                continue     
            fi

            cat /proc/$user_pid/status > \
                        ${REPO_PATH[$index]}/"$user_pid"_status 2>/dev/null
            cat /proc/$user_pid/maps > \
                        ${REPO_PATH[$index]}/"$user_pid"_maps 2>/dev/null
            cat /proc/$user_pid/smaps > \
                        ${REPO_PATH[$index]}/"$user_pid"_smaps 2>/dev/null
        done

        USER_PIDS=$(($USER_PIDS + ${#thread_array[@]}))
        unset thread_array
        unset pid_array
    done

    echo "DONE"
    return 0
}
readonly -f create_snapshot

#
# pack_snapshot
#
# Creates a tar file of the collected snapshot for offbox analysis
#
function pack_snapshot()
{
    echo -n "Packing snapshot..."
    OLDDIR=$PWD
    cd "${MYDIR}"
    tar cf proc.tar proc
    gzip proc.tar
    cd $OLDDIR

    echo "DONE"
    echo "Use snapshot ${MYDIR}/proc.tar.gz for offline analysis"

    return 0
}

#
# start_snapshot
#
# Takes a snapshot of the processes on the system by copying the contents of
# /proc/*/*maps
#
function start_snapshot()
{
    if [[ "${VIRT_METHOD}" != "lxc" ]]; then
        echo "The option is supported only on LXC platforms. Exiting..."
        exit 1
    fi

    echo "Trying to gather memory usage of all processes"
    echo "Using directory for staging: ${MYDIR}"

    mkdir -p ${MYDIR}
    if [[ $? -ne 0 ]]; then
        echo "Failed to create directory '${MYDIR}'"
        exit 1
    fi

    create_snapshot
    if [[ $? -ne 0 ]]; then
        echo "Failed to create a snapshot for analysis"
        exit 1
    fi

    pack_snapshot
    if [[ $? -ne 0 ]]; then
        echo "Failed to pack snapshot!!"
        exit 1
    fi
}

#
# analyze
#
function analyze()
{
    ## HEADER INFO ##
    date >> ${LOGFILE}
    echo -n "Hostname: " >> ${LOGFILE}
    hostname >> ${LOGFILE}
    echo "Cnt of Kernel PIDS: $KERNEL_PIDS, User PIDS: $USER_PIDS" >> ${LOGFILE}
    echo "All values are in KB" >> ${LOGFILE}
    echo "" >> ${LOGFILE}

    if [[ "$BASIC_ANALYSIS" -eq 1 ]]; then
        process_snapshot_basic
        rc=$?
    else 
        process_snapshot_detailed
        rc=$?
    fi

    if [[ $rc -ne 0 ]]; then
        echo "Failed to process snapshot, continuing..."
    fi

    echo "Look at logfile '${LOGFILE}' for initial analysis"
    return 0
}
readonly -f analyze

function usage()
{
    echo "Usage:"
    echo " [-s]                         Collect snapshot and provide        "
    echo "                              basic memory usage                  "
    echo ""
    echo " [-p PID]                     Given a PID, it will dump out       "
    echo "                              detailed information on that process"
#    echo ""
#    echo " [-d]                         Generates a detailed memory use     "
#    echo "                              per process.                        "
#    echo " [-w WORKING_DIR]             Use proc info from the given        "
#    echo "                              directory                           "
}
readonly -f usage

#
# process_pid
#
function process_pid()
{
    local given_pid=$1

    # Get the TGID
    group_leader=`cat /proc/"$given_pid"/status | grep Tgid | \
                                                awk ' { print $2 }'`
    printf "Collecting info for PID: %d\n" "$given_pid"
   peekinto_process $group_leader pid_name \
                                  priv_clean \
                                  priv_dirty \
                                  shared_clean \
                                  shared_dirty \
                                  priv_stack \
                                  priv_heap \
                                  shared_stack \
                                  shared_heap \
                                  anon_priv_data \
                                  anon_shared_data \
                                  shmem_priv \
                                  shmem_shared \
                                  shmem_ltrace_priv \
                                  shmem_ltrace_shared \
                                  shmem_ltrace_heap_priv \
                                  shmem_ltrace_heap_shared 
    concat_str="${pid_name}/${given_pid}/${group_leader}"
    printf "Name/PID/GP Leader        : %20s\n" "$concat_str"
    printf "===================================================\n"

    concat_str="${priv_clean}/${priv_dirty}"
    printf "Private(Code/Data)        : %20s KB\n" "$concat_str"

    concat_str="${shared_clean}/${shared_dirty}"
    printf "Shared(Code/Data)         : %20s KB\n" "$concat_str"
    printf "\n"
    printf "===================================================\n"
    printf "Individual Memory Sections\n"
    printf "===================================================\n"

    printf "  PRIVATE:\n"
    printf "    Stack                 : %20d KB\n" "${priv_stack}"

    concat_data=$((${priv_heap} + ${anon_priv_data}))
    printf "    Heap                  : %20d KB\n" "$concat_data"
    printf "    /dev/shm              : %20d KB\n" "${shmem_priv}"
    printf "    ltrace(/dev/shm)      : %20d KB\n" "${shmem_ltrace_priv}"
    printf "\n"

    printf "  SHARED:\n"
    printf "    Stack                 : %20d KB\n" "${shared_stack}"

    concat_data=$((${shared_heap} + ${anon_shared_data}))
    printf "    Heap                  : %20d KB\n" "$concat_data"
    printf "    /dev/shm              : %20d KB\n" "${shmem_shared}"
    printf "    ltrace(/dev/shm)      : %20d KB\n" "${shmem_ltrace_shared}"
    printf "\n"

    printf "Parse /proc/${group_leader}/smaps for detailed info\n"
    printf "===================================================\n"
}

#
# Main starts here
#
if [[ $# -eq 0 ]]; then
    usage
    exit 1
fi

SNAPSHOT=0
DETAILED_LOG=0
GIVEN_DIR=""
PID=0

#
# Below code is copied from 
# http://www.bahmanm.com/blogs/command-line-options-how-to-parse-in-bash-using-getopt
#
# read the options
TEMP=`getopt -o hsp:d -n 'Collect Memory:' -- "$@"`
eval set -- "$TEMP"

# extract options and their arguments into variables.
while true ; do
    case "$1" in
        -s) SNAPSHOT=1 ; shift ;;
        -d) DETAILED_LOG=1; shift ;;
        -p)
            PID=$2
	    if [[ ! -f /proc/$PID/status ]]; then
   	        echo "Invalid PID"
                exit 1
            fi

            ksmaps=`cat /proc/$PID/smaps`
            if [[ ! -n $ksmaps ]]; then
                echo "Can't get info for Kernel threads"
                exit 1
            fi
            shift 2 ;;
#        -w)
#	    GIVEN_DIR=$2
#            if [[ ! -d GIVEN_DIR ]]; then
#   		echo "Invalid working directory"
#                exit 1
#	    fi
#            shift 2 ;;
        --) shift ; break ;;
        *) usage; exit 1 ;;
    esac
done

if [[ $SNAPSHOT -eq 1 && $GIVEN_DIR != "" ]]; then
    echo "Invalid args"
    exit 1
fi

if [[ $SNAPSHOT -eq 1 ]]; then
    BASIC_ANALYSIS=1
    start_snapshot
    if [[ $? -ne 0 ]]; then
       exit 1
    fi
    analyze
fi

#if [[ $DETAILED_LOG -eq 1 ]]; then
#    echo "****** Takes about 30 minutes to process all logs *******"    
#    start_snapshot
#    if [[ $? -ne 0 ]]; then
#       exit 1
#    fi
#    analyze
#fi

if [[ $PID -ne 0 ]]; then
    DYNAMIC=0
    process_pid $PID
    exit 0
fi

exit 0
