#!/bin/bash
# Copyright (c) 2015, 2020 by cisco Systems, Inc.
# January 2015, Wilson Talaugon
#
#--------------------------------------------------------------------------
# dcache_limit:
#     This script is responsible for monitoring and limiting the disk 
#     cache usage during heavy file/disk operations. Disk cache limit can
#     set in MAX_CACHE_MB. Only one instance of the script will be
#     allowed to run. Session name can be set as an option using
#     the DCL_SESSION environment variable.  The stop command must have
#     a matching session name with the current running script for it
#     to terminate the operation. The script will terminate in 60 mins
#     if it is not stopped. Timeout can be changed through MAX_RUNTIME. 
#
# Arguments
# $1: 
#     start - Starts the script in the background. It also kills any
#             previously running script before launching a new one.
#     stop  - Stops the current running script and drop the disk cache. 
#
# Environment:
#     DCL_SESSION=<name>
#       Session name can be set in this environment variable for the start 
#       and stop operations. This is to ensure that no other session can 
#       inadvertently interrupt the current running operation. This setting
#       is optional to support generic use of the script. This must be set
#       outside of the script.
#     MAX_CACHE_MB=<size>
#       This sets the max amount of disk cache that will be used for all
#       file/disk operations.  Default size is 256MB. 

arg=$1
MAX_CACHE_MB=256
PD_MAX_CACHE_MB=$(/usr/bin/xr_sysctl -e -n cisco.ios.xr.platform.max_cache)
if [[ -n $PD_MAX_CACHE_MB ]]; then
    MAX_CACHE_MB=$PD_MAX_CACHE_MB
fi
MAX_RUNTIME=3600

# Maximum num of log lines
# Each line will indicate cache clearence time-stamp
MAX_LOG_LINES=10000
LOG_FILE="/var/log/dcache_limit.log"

true=0
false=1
MYPID=$$

usage()
{
   echo "[DCL_SESSION=<session-name>] dcache_limit -start/stop"
}

#
# Check if the log file need to be truncated
#
check_n_truncate()
{
    if [[ -f "${LOG_FILE}" ]]; then
        local lines
        lines=`wc -l < "${LOG_FILE}"`
        if [[ ${lines} -ge ${MAX_LOG_LINES} ]]; then
            sed -i -e "1,$((lines/2))d" "${LOG_FILE}" 
            if [[ $? != 0 ]]; then
                # Could be due to space crunch
                rm -f "${LOG_FILE}" ;
                local DATE=`date +"%Y-%m-%d %T.%N %Z"`
                echo "$DATE : log reinitialized" > "${LOG_FILE}";
            else
                local DATE=`date +"%Y-%m-%d %T.%N %Z"`
                echo "$DATE : log truncated" >> "${LOG_FILE}";
            fi 
        fi
    else
        local DATE=`date +"%Y-%m-%d %T.%N %Z"`
        echo "$DATE : logging started" > "${LOG_FILE}";
    fi
}

log_message()
{
    local DATE=`date +"%Y-%m-%d %T.%N %Z"`

    check_n_truncate

    echo "$DATE : $*" >> $LOG_FILE
    
}

get_pid()
{
   # get the PID of the previous instance of the script
   eval $1=$(pgrep -o dcache_limit)
}

# Check if the script is already running.
# This is determined by comparing the pid it read and pid 
# that requested the start or stop.
is_not_running()
{
   local _pid
   get_pid _pid
   if [[ $_pid -eq $MYPID ]]; then
      return $true
   else
      return $false
   fi
}

# check if its the same session that started the script and doing the stop 
is_same_session()
{
   local _pid
   get_pid _pid
   local SESSION=$(strings /proc/$_pid/environ | grep DCL_SESSION | awk -F'=' '{print $2}')
   local MYSESSION=$DCL_SESSION
   if [[ "$SESSION" == "$MYSESSION" ]]; then
      return $true
   else
      return $false
   fi
}

start_limit()
{
   local _runcount=0
   while [[ $_runcount -le $MAX_RUNTIME ]];
   do
      # calculate the total disk cache usage
      cache=$(free -m |awk '/^Mem:/{print $7}')
      buffer=$(free -m |awk '/^Mem:/{print $6}')
      totalmem=$(($cache+$buffer))
      # drop the disk cache if it exceeded the limit
      if [[ $totalmem -ge $MAX_CACHE_MB ]]; then
         log_message "$DCL_SESSION cache=$cache M  buffer=$buffer M"  
         echo 3 > /proc/sys/vm/drop_caches
      fi
      sleep 1
      let _runcount++
   done
}

start()
{
   if is_not_running; then
      start_limit $* &
   fi
}

stop()
{
   local _pid
   # kill the old running script, if any
   if ! is_not_running && is_same_session ; then
       get_pid _pid
       kill -9 $_pid
       echo 3 > /proc/sys/vm/drop_caches
   fi
}

case "$arg" in
   -start)
      start
      ;;
   -stop)
      stop
      ;;
   *)
      usage
      ;;
esac
