#!/bin/bash
#
# Warn when the system is out of memory using an errmsg
# Do sync and drop_cache
#
# Jan 2012, Wei Xu
#
# Copyright (c) 2012-2015 by cisco Systems, Inc.
# All rights reserved

# Twiddle some settings

set -m
set -b
set -o nounset

OOM_LOG_FILE=oom

MY_NAME=$(basename $0)

OOM_NEVER="-17"
OOM_BAIT="15"

BAIT_PID_FILE="/var/run/oom-bait.pid"

# we should not get killed.
echo -n "$OOM_NEVER" > /proc/$$/oom_adj

#
# Only if memory falls below this value, the offending process
# would be killed.
#
REQ_MIN_MB=80
REQ_MIN_BYTES=$(($REQ_MIN_MB * 1024 * 1024))

. /etc/init.d/calvados_bootstrap.cfg

VMTYPE=`cat /proc/cmdline | grep vmtype=`
if [ -n "$VMTYPE" ]; then
    VMTYPE=`cat /proc/cmdline | sed 's/^.*vmtype=//' | cut -d" " -f1`
fi

IS_LXC_GUEST=0
if [[ "$VIRT_METHOD" == "lxc" && "$VMTYPE" != "hostos" ]]; then
    IS_LXC_GUEST=1
    TOTAL_BYTES=`free -b |awk '/^Mem:/{print $2}'`
    THRESHOLD_BYTES=$(($TOTAL_BYTES - $REQ_MIN_BYTES))
fi

#
# Start the cgroup_oom.sh, only if there isn't an already existing process
#
pgrep cgroup_oom.sh >/dev/null 2>&1
if [[ $? -ne 0 && "$IS_LXC_GUEST" -eq 1 ]]; then

    #
    # Start the process and set OOM priority.
    #
    /usr/bin/cgroup_oom.sh $THRESHOLD_BYTES > /dev/null 2>&1 &
    CGROUP_OOM_SH_PID=$!
    echo -n "$OOM_NEVER" > /proc/$CGROUP_OOM_SH_PID/oom_adj
fi

# maintain a fifo of kill events
declare -a OOMFIFO
# These many oom in OOMTIME_FATAL 
OOMFIFO_SIZE=4 # The zero based index, 
# Too many oom in this time frame considered FATAL
# reboot the system
OOMTIME_FATAL=3600 # One hour
OOMTIME_DAY_FATAL=86400 #24 hours

function add_to_oom_fifo () {
    local val=$1
    OOMFIFO[${#OOMFIFO[*]}]=$val
    # ${#OOMFIFO[*]} gives you the length of the array.
    # ${#OOMFIFO[@]} Number of elements in the array
    if [[ ${#OOMFIFO[*]} -gt $OOMFIFO_SIZE ]]; then
	OOMFIFO=( ${OOMFIFO[@]:1:$OOMFIFO_SIZE} )
    fi
}

function peek_oom_fifo () {
    # After array is full examine the last most element
    if [[ ${#OOMFIFO[*]} -eq $OOMFIFO_SIZE ]]; then
        echo -n "${OOMFIFO[@]:0:1}"
    fi
}

function handle_oom () {
    echo ""
    echo " -----------------------------------------------------"
    echo " System is running low on memory"
    echo " Free memory $phymem Mb"
    echo " -----------------------------------------------------"
    echo " Process consuming largest memory"
    echo " Memory in KBytes"
    echo " -----------------------------------------------------"
    logger -p 3 " Out of memory (oom.sh): Free memory $phymem Mb"
    logger -p 3 " Out of memory (oom.sh): Process consuming largest memory"
    logger -p 3 " Out of memory (oom.sh): Memory in KBytes"
    ps ax -orss=,comm=,pid= | sort -b -nr | head -1 |\
        while read prss ;\
        do\
            procs=( $prss );\
            logger -p 0 "Out of memory (oom.sh) killing ${procs[1]} (Rss ${procs[0]} KB)";\
            kill ${procs[2]};\
        done

    # drop caches again or we might kill more processes

    # drop_cache is not NS aware. In LXC env dropping cache from
    # within a container (say sysadmin) affects all LXCs and host,
    # temporarily affecting performance.
    sync
    echo 3 > /proc/sys/vm/drop_caches
    logger -p 3 "Just dropped the cache"
 
    LAST_REBOOT=0
    if [ -f /misc/config/oom-reboot.txt ]; then
        . /misc/config/oom-reboot.txt
        LAST_REBOOT=$LAST_OOM_REBOOT
    fi

    # To many oom in OOMTIME_FATAL time & Memory is below threshold 
    # Reboot the system
    if [[ -n $old_oom_ts ]]; then
      # If oom count is more than 5 in one hour reboot
      if (( ($oom_ts - $old_oom_ts) < $OOMTIME_FATAL )); then
        # Only one reboot is allowed per day
        if (( ($oom_ts - $LAST_REBOOT) > $OOMTIME_DAY_FATAL )); then
          logger -p 0 "Too many Out of memory (oom.sh) in $OOMTIME_FATAL seconds";
          logger -p 0 "Out of memory (oom.sh) reboot";
          echo "LAST_OOM_REBOOT=$oom_ts" >/misc/config/oom-reboot.txt
          sync
          /sbin/reboot &
          disown $!
          sleep 5
          /sbin/reboot  -f &
          disown $!
        fi
      fi
    fi
}

# Do forever, if we die we are restarted by init...

while :; do

    #echo "Launching bait process..."

    # The bait for the oom killer
    (while :; do sleep 100; done)&

    BAIT_PID=$!
    echo $BAIT_PID > $BAIT_PID_FILE
    sync

    #echo "bait process pid $BAIT_PID"

    # make the bait very attractive
    echo -n "$OOM_BAIT" > /proc/$BAIT_PID/oom_adj

    # wait for the trap to be sprung
    # which means the oom killer killed
    # our bait
    wait $BAIT_PID

    # Take oom action, at the least errmsg
    # that oom is on the prowl
    # echo "Drop caches for proper free memory accouting"
    # drop_cache is not NS aware. In LXC env dropping cache from
    # within a container (say sysadmin) affects all LXCs and host,
    # temporarily affecting performance.
    echo 3 > /proc/sys/vm/drop_caches

    if [[ $(free -m |awk '/^Mem:/{print $4}') -lt $REQ_MIN_MB ]]; then
        sync
        echo 3 > /proc/sys/vm/drop_caches
        logger -p 3 "Synced and dropped the cache"
    else
        logger -p 3 "Just dropped the cache"
    fi

    # read past oom value from the fifo
    old_oom_ts=$(peek_oom_fifo)
    # record the timestamp
    oom_ts=$(date +%s)
    oom_handled=0

    if [[ "$IS_LXC_GUEST" -eq 0 ]]; then
        phymem=$(free -m |awk '/^Mem:/{print $4}')
        if [[ $phymem -lt $REQ_MIN_MB ]]; then
            handle_oom
            oom_handled=1
        fi
    else
        while :; 
        do
            phymem=$(free -m |awk '/^Mem:/{print $4}')
            if [[ $phymem -lt $REQ_MIN_MB ]]; then
                handle_oom
                oom_handled=1
            else
                break
            fi

            #
            # Sleep for a second to unschedule ourselves.
            # This gives a chance, for the affected process to be scheduled
            # and handle the kill signal.
            sleep 1
        done
    fi

    if [[ "$oom_handled" -eq 1 ]]; then
        # record current failure in fifo
        add_to_oom_fifo $oom_ts
    fi
done
