#!/usr/bin/env python
#
# A small tool to help implement exponential backoff delays as part for
# upstart tasks.
#
# Copyright (c) 2017 by cisco Systems, Inc.
# All rights reserved.
#
# Neil McGill
#
import getopt
import json
import logging
import os
import os.path
import sys
import time
import imp


#
# Print some help of dubious value. Allow the plugins to participate.
#
def usage_and_exit():
    print("""
    A small tool to help implement exponential backoff delays as part for
    upstart tasks.

    Usage (server side):

        --init_sleep_time <secs>
            How long to sleep for initially

        --max_sleep_time <secs>
            How long to ever sleep for, max

        --healthy_time <secs>
            If called within this time, reset the delay to the init time

        --job_name <name>
            Upstart process name
""")


class UpstartHelper(object):

    #
    # Pre main, parse arguments.
    #
    def __init__(self):

        self.restart_count = 0
        self.restart_time = time.time()

        self.init_sleep_time = 1
        self.current_sleep_time = self.init_sleep_time
        self.max_sleep_time = 60

        self.healthy_time = 60
        self.job_name = None

        self.handler = logging.StreamHandler()
        self.handler.setLevel(logging.INFO)

        self.logger = logging.getLogger(__name__)
        self.logger.addHandler(self.handler)
        self.logger.setLevel(logging.INFO)

        logging.addLevelName(logging.INFO,
                             "\033[1;31m%s\033[1;0m" %
                             logging.getLevelName(logging.INFO))
        logging.addLevelName(logging.ERROR,
                             "\033[1;41m%s\033[1;0m" %
                             logging.getLevelName(logging.ERROR))

        try:
            opts, args = getopt.getopt(sys.argv[1:],
                                       "hi:m:H:j:",
                                       [
                                           "help",
                                           "init_sleep_time=",
                                           "max_sleep_time=",
                                           "healthy_time=",
                                           "job_name=",
                                       ])

        except getopt.GetoptError as err:
            usage_and_exit()

        #
        # First pass, get the job name so we can override parameters
        # from the config file if we want.
        #
        for opt, args in opts:

            if opt in ("-j", "--job_name"):
                self.job_name = args

        self.cfg_load()

        if self.job_name is None:
            self.logger.error("No job name")
            usage_and_exit()
            sys.exit(1)

        #
        # Now parse all args
        #
        for opt, args in opts:

            if opt in ("-i", "--init_sleep_time"):
                self.init_sleep_time = int(args)
                continue

            if opt in ("-m", "--max_sleep_time"):
                self.max_sleep_time = int(args)
                continue

            if opt in ("-H", "--healthy_time"):
                self.healthy_time = int(args)
                continue

            if opt in ("-j", "--job_name"):
                continue

            self.logger.error("Unknown arg: {0}".format(args))
            usage_and_exit()
            sys.exit(1)

    def cfg_load(self):

        self.cfg_file = "/tmp/{0}.cfg".format(self.job_name)

        if not os.path.exists(self.cfg_file):
            return

        with open(self.cfg_file, 'r') as f:
            cfg = json.loads(f.readline())

            val = cfg.get("restart_count", None)
            if val is not None:
                self.restart_count = int(val)

            val = cfg.get("current_sleep_time", None)
            if val is not None:
                self.current_sleep_time = int(val)

            val = cfg.get("init_sleep_time", None)
            if val is not None:
                self.init_sleep_time = int(val)

            val = cfg.get("max_sleep_time", None)
            if val is not None:
                self.max_sleep_time = int(val)

            val = cfg.get("healthy_time", None)
            if val is not None:
                self.healthy_time = int(val)

            val = cfg.get("restart_time", None)
            if val is not None:
                self.restart_time = int(val)

    def cfg_save(self):

        cfg = {}
        cfg["restart_count"] = self.restart_count
        cfg["restart_time"] = self.restart_time
        cfg["current_sleep_time"] = self.current_sleep_time
        cfg["init_sleep_time"] = self.init_sleep_time
        cfg["max_sleep_time"] = self.max_sleep_time
        cfg["healthy_time"] = self.healthy_time

        outstr = json.dumps(cfg)

        with open(self.cfg_file, "w") as f:
            f.write(outstr)

    def main(self):

        time_now = int(time.time())

        do_exponential_backoff = True
        if self.restart_count == 0:
            self.current_sleep_time = self.init_sleep_time
            do_exponential_backoff = False

        elapsed_time = int(time_now - self.restart_time)

        if elapsed_time > self.healthy_time:
            self.current_sleep_time > self.init_sleep_time
            do_exponential_backoff = False

        if do_exponential_backoff:
            self.current_sleep_time *= 2

        self.restart_count += 1

        if self.current_sleep_time >= self.max_sleep_time:
            self.current_sleep_time = self.max_sleep_time
            self.logger.info("Job {0} is at max sleep time".format(
                self.job_name))

        self.restart_time = time.time()

        self.cfg_save()

        if elapsed_time > self.healthy_time:
            self.logger.info(
                "Job {0} restarted({1} times). "
                "Time since last restart: {2} secs. "
                "Job is healthy, reset time to {3} secs.".format(
                    self.job_name,
                    self.restart_count,
                    elapsed_time,
                    self.current_sleep_time))
        else:
            self.logger.info(
                "Job {0} restarted({1} times). "
                "Time since last restart: {2} secs. "
                "Sleep now for {3} secs.".format(
                    self.job_name,
                    self.restart_count,
                    elapsed_time,
                    self.current_sleep_time))

        time.sleep(int(self.current_sleep_time))


if __name__ == '__main__':
    UpstartHelper().main()
