TPE-481 [AWS] On-demand scailing for OBS POWER workers 08/164608/3
authorhyokeun <hyokeun.jeon@samsung.com>
Wed, 20 Dec 2017 04:10:14 +0000 (13:10 +0900)
committerhyokeun <hyokeun.jeon@samsung.com>
Thu, 21 Dec 2017 00:31:42 +0000 (09:31 +0900)
Change-Id: I6667540b4087325b41558bb87c0ba5f735bf3d85

groovy_init_scripts/scriptApproval.xml
job_control_ondemand_slaves.groovy
job_obs_worker_auto_scailing.py

index 415d3dd..47ebe92 100644 (file)
@@ -7,10 +7,12 @@
     <string>05477fbaff453d4eb3045f2039d1b586a24ee631</string>
     <string>0bc5dcb90964741115308f13c042998b6bbfbc66</string>
     <string>0d3674cfaee34604a98aa6375a979e4bef3077ef</string>
+    <string>1469b56a8f49513d0f8c94ab23d328e1137922ad</string>
     <string>1a82acd5437b3c3aef5a000285178fc408c3c0c2</string>
     <string>1ac9db42ad5c4b6b6ccd110bd83605d3554ce88b</string>
     <string>1b6ee1e9ec250e65df3f124a6b10a9c745d06476</string>
     <string>1bbf8aee9d5da6972564a4d923f5c5910a0c7cb7</string>
+    <string>1c182883484c53e6b4a892aa58d9a680a91b4bb0</string>
     <string>1d77669ccac0b2c08f9bee43daac4b4a94e07fb9</string>
     <string>2106e0b7eb4d561e1f5bcf051a7b31a9685796ab</string>
     <string>216d450d99f23770b5154e0dd45d35df3d6e0ab8</string>
     <string>4f6d925a3136fc4cc49e0f5fb3f8dce839f2e1e5</string>
     <string>50f9fd9924630d7ea65598d2fd4e033b348ca222</string>
     <string>53ef9694505b6e77c41c053f6705fd2a98551f19</string>
+    <string>54701e2dff94b43b9212eabc057856fd8ee3e1f0</string>
     <string>5606a923993c9286637839d1f7644e62a6448911</string>
     <string>577be6db4995b509b1cb64ce8d4bee846f866b26</string>
     <string>5c577e8bcd8fb6ce138d934e346f549cad5192bd</string>
     <string>5e2f81d1ef933d23aa6cbd673f9ea56fc4468387</string>
     <string>6d8a18aba007598a7901495cfffcf08865883633</string>
     <string>6e55cef58086f2adc3e9976836bbdf920b6648b2</string>
+    <string>6f248c2b9d5e12807feb984c526b4927f84bb5a6</string>
     <string>6f752d0538cfd87584216b82cf31f3bb796740f2</string>
     <string>6f9b2b76bc0ec61825a9480c6e52b5d1cbbceffb</string>
     <string>7a52ba91799cb22a82f442011f2bdf6f7898264f</string>
+    <string>758dad930f2538176e5faebea432e899489eff93</string>
     <string>7d581316af66034919a152e9014b8b0a69657e6e</string>
     <string>7d5ae9a876a71918f07edc15742ec5c01d9193ed</string>
     <string>7f9dd3bb442e5ac921535b98e4170f48f06901f3</string>
@@ -63,6 +68,7 @@
     <string>a822c49f1bc711ae2f7c2b6db6a36d67206d6afb</string>
     <string>a83f0569a7722bca5897f5c6ccafa3814026a8f9</string>
     <string>ac72e4e077ba06d6e4279314fd0c237df37be248</string>
+    <string>b34bd747d1766835ffd44190b617bbd45c05e19e</string>
     <string>b5ac3e66f309dd83d0fb250e7b90b34644852e75</string>
     <string>b6013706571921d7bf30bde6c3f995c637206241</string>
     <string>b63100fdc3f59e323f619076d54d4857e2f3d699</string>
     <string>staticMethod org.codehaus.groovy.runtime.DefaultGroovyMethods minus java.util.Set java.lang.Iterable</string>
     <string>staticMethod org.codehaus.groovy.runtime.DefaultGroovyMethods minus java.util.Set java.lang.Object</string>
     <string>staticMethod org.codehaus.groovy.runtime.DefaultGroovyMethods minus java.util.Set java.util.Collection</string>
+    <string>staticMethod org.codehaus.groovy.runtime.DefaultGroovyMethods print groovy.lang.Closure java.lang.Object</string>
     <string>staticMethod org.codehaus.groovy.runtime.DefaultGroovyMethods println groovy.lang.Closure java.lang.Object</string>
+    <string>staticMethod org.codehaus.groovy.runtime.DefaultGroovyMethods println java.lang.Object java.lang.Object</string>
     <string>staticMethod org.codehaus.groovy.runtime.DefaultGroovyMethods unique java.util.Collection groovy.lang.Closure</string>
     <string>staticMethod org.codehaus.groovy.runtime.EncodingGroovyMethods encodeBase64 byte[]</string>
     <string>staticMethod org.codehaus.groovy.runtime.ProcessGroovyMethods consumeProcessOutput java.lang.Process java.lang.Appendable java.lang.Appendable</string>
index 1a17e06..81bb469 100644 (file)
@@ -120,8 +120,22 @@ public class WorkerConf_JENKINS_IMAGER extends WorkerConf {
     }
     @Override
     String name_prefix() { conf.EC2_WORKER_IMAGER_DESCRIPTION }
-    String tag_source() { conf.EC2_WORKER_IMAGER_TAG_SOURCE }
+    String tag_source() { conf.EC2_WORKER_TAG_SOURCE }
     List check_queue_list() { conf.EC2_WORKER_IMAGER_QUEUE_CHECK_LIST.split(",") }
+    String get_remote_ssh_priv_key() { conf.EC2_WORKER_IMAGER_REMOTE_SSH_PRIV_KEY }
+    String get_remote_ssh_known_hosts() { conf.EC2_WORKER_IMAGER_REMOTE_SSH_KNOWN_HOSTS }
+    String make_init_script() {
+        def String fileContents = new File(this.get_remote_ssh_priv_key()).text
+        def String fileContentsKnownHosts = new File(this.get_remote_ssh_known_hosts()).text
+        return """#!bin/bash
+echo "${fileContents}" >> /home/${this.remote_user()}/.ssh/id_rsa
+echo "${fileContentsKnownHosts}" >> /home/${this.remote_user()}/.ssh/known_hosts
+echo "User ${this.remote_user()}" > /home/${this.remote_user()}/.ssh/config
+echo "IdentityFile /home/${this.remote_user()}/.ssh/id_rsa" >> /home/${this.remote_user()}/.ssh/config
+chown -R ${this.remote_user()}:${this.remote_user()} ${this.remote_fs()}
+chmod 0600 /home/${this.remote_user()}/.ssh/id_rsa
+"""
+    }
 }
 
 public class WorkerConf_OBS_WORKER_NORMAL extends WorkerConf {
@@ -131,17 +145,19 @@ public class WorkerConf_OBS_WORKER_NORMAL extends WorkerConf {
     @Override
     String name_prefix() { conf.EC2_WORKER_OBS_NORMAL_DESCRIPTION }
     String labels() { conf.EC2_WORKER_OBS_NORMAL_LABEL_STRING }
-    String ami_id() { conf.EC2_WORKER_OBS_AMI_ID }
+    String ami_id() { conf.EC2_WORKER_OBS_NORMAL_AMI_ID }
     String security_groups() { conf.EC2_WORKER_OBS_SECURITY_GROUPS.replaceAll(" ","") }
-    String instance_type() { conf.EC2_WORKER_OBS_INSTANCE_TYPE }
+    String instance_type() { conf.EC2_WORKER_OBS_NORMAL_INSTANCE_TYPE }
     String tag_name() { conf.EC2_WORKER_OBS_NORMAL_TAG_NAME }
     String tag_hostname() { conf.EC2_WORKER_OBS_NORMAL_TAG_HOSTNAME + this.get_slot_number() }
-    String get_remote_ssh_pub_key() { conf.EC2_WORKER_OBS_NORMAL_REMOTE_SSH_PUB_KEY }
+    String get_remote_ssh_pub_key() { conf.EC2_WORKER_OBS_REMOTE_SSH_PUB_KEY }
+    String get_hostname_prefix() { return "obsnw"; }
+    Integer get_obs_backend_02_num() { conf.EC2_WORKER_OBS_NORMAL_BACKEND02_NUM }
     String make_init_script() {
-        def hostname_prefix = "obsnw"
+        def hostname_prefix = this.get_hostname_prefix();
         def docker_compose_file = this.get_docker_compose_path()
         def String fileContents = new File(this.get_remote_ssh_pub_key()).text
-        def backend02_num = conf.EC2_WORKER_OBS_NORMAL_BACKEND02_NUM
+        def backend02_num = this.get_obs_backend_02_num();
         def instance_base = this.instance_base()
         return """#!bin/bash
 docker exec obs_worker rcobsworker stop
@@ -169,13 +185,23 @@ chown -R ${this.remote_user()}:${this.remote_user()} ${this.remote_fs()}
     }
     Integer executors_per_slave() { conf.EC2_WORKER_OBS_NORMAL_NUMBER_OF_EXECUTORS.toInteger() }
     Integer max_slaves() { conf.EC2_WORKER_OBS_NORMAL_INSTANCE_CAP_STR.toInteger() }
-    String tag_source() { conf.EC2_WORKER_OBS_NORMAL_TAG_SOURCE }
+    String tag_source() { conf.EC2_WORKER_TAG_SOURCE }
     boolean health_check_enabled() { return true }
 }
 
 class WorkerConf_OBS_WORKER_POWER extends WorkerConf_OBS_WORKER_NORMAL {
     @Override
     String name_prefix() { conf.EC2_WORKER_OBS_POWER_DESCRIPTION }
+    String labels() { conf.EC2_WORKER_OBS_POWER_LABEL_STRING }
+    String ami_id() { conf.EC2_WORKER_OBS_POWER_AMI_ID }
+    String instance_type() { conf.EC2_WORKER_OBS_POWER_INSTANCE_TYPE }
+    String tag_name() { conf.EC2_WORKER_OBS_POWER_TAG_NAME }
+    String tag_hostname() { conf.EC2_WORKER_OBS_POWER_TAG_HOSTNAME + this.get_slot_number() }
+
+    String get_hostname_prefix() { return "obspw"; }
+    Integer executors_per_slave() { conf.EC2_WORKER_OBS_POWER_NUMBER_OF_EXECUTORS.toInteger() }
+    Integer max_slaves() { conf.EC2_WORKER_OBS_POWER_INSTANCE_CAP_STR.toInteger() }
+    Integer get_obs_backend_02_num() { conf.EC2_WORKER_OBS_POWER_BACKEND02_NUM }
 }
 
 class SlaveStatus {
@@ -323,6 +349,7 @@ def check_healthy_status(worker_conf, vm_list) {
     }
 
     //TODO: Lets run multi-threaded code
+
 }
 
 
@@ -451,7 +478,7 @@ def worker_ondemand_create_request(worker_conf, Integer num_requested_executors)
     // Find empty slot from 01 ~ 99
     def free_slots = []
     def allocated_slots = []
-    for (i = worker_conf.instance_base(); i <= worker_conf.instance_base()+worker_conf.max_slaves(); i++) { free_slots.add(String.format("%03d", i)) }
+    for (i = worker_conf.instance_base(); i <= worker_conf.instance_base()+worker_conf.max_slaves(); i++) { free_slots.add(String.format("%02d", i)) }
     def current_aws_status_list = get_aws_status(worker_conf)
     current_aws_status_list.each { k, v ->
         println k
@@ -701,7 +728,9 @@ def __main__() {
     purpose = buildEnv['PURPOSE']
     requested_num_executors = buildEnv['REQUESTED_NUM_EXECUTORS']
 
-    assert (buildEnv['PURPOSE'] == 'JENKINS_IMAGER') || (buildEnv['PURPOSE'] == 'OBS_WORKER_NORMAL')
+    assert (buildEnv['PURPOSE'] == 'JENKINS_IMAGER') \
+        || (buildEnv['PURPOSE'] == 'OBS_WORKER_NORMAL') \
+        || (buildEnv['PURPOSE'] == 'OBS_WORKER_POWER')
 
     def worker_conf = get_worker_conf(purpose)
 
index 95f790c..30380e8 100644 (file)
@@ -28,6 +28,7 @@ import requests
 from common.utils import sync, unicode_to_str
 from common.buildtrigger import trigger_next
 import xml.etree.ElementTree as ElementTree
+import ConfigParser
 
 OBS_NORMAL_WORKER = 'obsnw'
 OBS_POWER_WORKER  = 'obspw'
@@ -47,35 +48,61 @@ def get_worker_status():
     tree = ElementTree.fromstring(resp.text)
 
     ret_data = {'idle_normal': 0, 'idle_power': 0,
-                'building_normal': 0, 'building_power': 0,
+                'building_normal': 0, 'building_power': 0, 'building_power_chromium': 0,
                 'waiting': 0, 'blocked': 0,
                 'normal_slots': [], 'power_slots': [],
-                'normal_slots_idle': []}
+                'normal_slots_idle': [], 'power_slots_idle': []}
+
+    worker_stat_container = {}
 
     for item in tree.findall('idle'):
-        slot_number = item.get('workerid').split(':')[0].split(OBS_NORMAL_WORKER)[-1].split(OBS_POWER_WORKER)[-1]
-        if item.get('workerid').startswith(OBS_NORMAL_WORKER):
+        slot_inst = item.get('workerid')
+        slot_name = slot_inst.split(':')[0]
+        slot_number = slot_inst.split(':')[0].split(OBS_NORMAL_WORKER)[-1].split(OBS_POWER_WORKER)[-1]
+        inst_number = slot_inst.split(':')[1]
+        if slot_name not in worker_stat_container:
+            worker_stat_container[slot_name] = []
+        if inst_number in worker_stat_container[slot_name]:
+            continue
+        worker_stat_container[slot_name].append(inst_number)
+        if slot_inst.startswith(OBS_NORMAL_WORKER):
             ret_data['idle_normal'] += 1
             if slot_number not in ret_data['normal_slots']:
                 ret_data['normal_slots'].append(slot_number)
             if slot_number not in ret_data['normal_slots_idle']:
                 ret_data['normal_slots_idle'].append(slot_number)
-        elif item.get('workerid').startswith(OBS_POWER_WORKER):
+        elif slot_inst.startswith(OBS_POWER_WORKER):
             ret_data['idle_power'] += 1
             if slot_number not in ret_data['power_slots']:
                 ret_data['power_slots'].append(slot_number)
+            if slot_number not in ret_data['power_slots_idle']:
+                ret_data['power_slots_idle'].append(slot_number)
+
     for item in tree.findall('building'):
-        slot_number = item.get('workerid').split(':')[0].split(OBS_NORMAL_WORKER)[-1].split(OBS_POWER_WORKER)[-1]
-        if item.get('workerid').startswith(OBS_NORMAL_WORKER):
+        slot_inst = item.get('workerid')
+        slot_name = slot_inst.split(':')[0]
+        slot_number = slot_inst.split(':')[0].split(OBS_NORMAL_WORKER)[-1].split(OBS_POWER_WORKER)[-1]
+        inst_number = slot_inst.split(':')[1]
+        if slot_name not in worker_stat_container:
+            worker_stat_container[slot_name] = []
+        if inst_number in worker_stat_container[slot_name]:
+            continue
+        worker_stat_container[slot_name].append(inst_number)
+        if slot_inst.startswith(OBS_NORMAL_WORKER):
             ret_data['building_normal'] += 1
             if slot_number not in ret_data['normal_slots']:
                 ret_data['normal_slots'].append(slot_number)
             if slot_number in ret_data['normal_slots_idle']:
                 ret_data['normal_slots_idle'].remove(slot_number)
-        elif item.get('workerid').startswith(OBS_POWER_WORKER):
+        elif slot_inst.startswith(OBS_POWER_WORKER):
             ret_data['building_power'] += 1
             if slot_number not in ret_data['power_slots']:
                 ret_data['power_slots'].append(slot_number)
+            if item.get('package') == 'chromium-efl':
+                ret_data['building_power_chromium'] += 1
+            if slot_number in ret_data['power_slots_idle']:
+                ret_data['power_slots_idle'].remove(slot_number)
+
     for item in tree.findall('waiting'):
         ret_data['waiting'] += int(item.get('jobs'))
     for item in tree.findall('blocked'):
@@ -87,8 +114,6 @@ def get_worker_status():
     print '\n'
     sys.stdout.flush()
 
-    #if ret_data['building_normal'] > 0 or ret_data['building_power'] > 0 \
-    #    or ret_data['waiting'] > 0:
     if True:
         sync_src = os.path.join(os.getenv('JENKINS_HOME'), '.obs_worker_trend', 'obs_worker_history.log')
         sync_dest = os.path.join(os.getenv('IMG_SYNC_DEST_BASE'), 'snapshots', \
@@ -107,10 +132,13 @@ def get_worker_status():
             with open(sync_src, 'a') as wh:
                 wh.write(curr_item)
             print sync(os.path.dirname(sync_src), sync_dest)
+            print curr_item
     return ret_data
 
 def request_workers(num_executors, worker_type=OBS_NORMAL_WORKER):
     # Request number of imager nodes
+    if num_executors <= 0:
+        num_executors = 1
     print 'Requesting %d executors' % num_executors
     purpose = "OBS_WORKER_NORMAL"
     if worker_type == OBS_POWER_WORKER:
@@ -135,35 +163,68 @@ def revoke_workers(slot_numbers, worker_type=OBS_NORMAL_WORKER):
                                    "PURPOSE": purpose, \
                                    "SLOT_NUMBERS": "%s" % ','.join(slot_numbers)})
 
+class ReadConfig(object):
+
+    def __init__(self, fname=None):
+        if fname is None:
+            conf_file = os.path.join(os.getenv('JENKINS_HOME'), 'init.groovy.d', 'setup.properties')
+        else:
+            conf_file = fname
+
+        if not os.path.isfile(conf_file):
+            return
+
+        self.configParser = ConfigParser.RawConfigParser()
+        with open(conf_file, 'r') as rf:
+            temp_conf_str = '[default]\n' + rf.read()
+        temp_conf_file = os.path.join(os.getenv('WORKSPACE'), '.prop')
+        with open(temp_conf_file, 'w') as wf:
+            wf.write(temp_conf_str.replace('\\n\\',''))
+
+        self.configParser.read(temp_conf_file)
+
+    def get_config(self, key, section='default'):
+        return self.configParser.get(section, key)
+
 def main():
 
     worker_status = get_worker_status()
 
     need_new_worker = worker_status['waiting'] - worker_status['idle_normal'] - worker_status['idle_power']
 
-    #TODO: scheduling
-    # 3 packages per 1 instance for 5 minutes
     # number of packages can be handled with existing instances
-    num_can_handle_now = 3 * (worker_status['idle_normal'] \
-                            + worker_status['idle_power'] \
-                            + worker_status['building_normal'] \
-                            + worker_status['building_power'])
+    num_can_handle_now = int(4 * (worker_status['idle_normal'] + worker_status['building_normal']) \
+                       + int(0.6 * (worker_status['idle_power'] + worker_status['building_power'] - worker_status['building_power_chromium'])))
+
     # number of packages need workers
-    num_need_more = int((need_new_worker - num_can_handle_now) / 10)
+    num_need_more = int((need_new_worker - num_can_handle_now))
+    print 'need_new_worker: %d' % need_new_worker
+    print 'num_can_handle_now: %d' % num_can_handle_now
+    print 'num_need_more: %d' % num_need_more
 
     if os.getenv('OBS_WORKER_NORMAL_AUTO_SCAILING_ENABLED', '0') == '0':
         return
 
+    conf_inst = ReadConfig()
+    max_normal = conf_inst.get_config('EC2_WORKER_OBS_NORMAL_INSTANCE_CAP_STR')
+    max_power  = conf_inst.get_config('EC2_WORKER_OBS_POWER_INSTANCE_CAP_STR')
+
     if num_need_more > 0:
-        print "\"TitleDisplay\": \"+ %d\"" % num_need_more
-        request_workers(need_new_worker)
+        if len(worker_status['power_slots']) < int(max_power):
+            request_workers(num_need_more / 4, worker_type=OBS_POWER_WORKER)
+            print "\"TitleDisplay\": \"+P(%d)\"" % num_need_more
+        else:
+            request_workers(num_need_more / 10, worker_type=OBS_NORMAL_WORKER)
+            print "\"TitleDisplay\": \"+N(%d)\"" % num_need_more
+
     elif worker_status['waiting'] <= 0 and len(worker_status['normal_slots_idle']) > 0:
         revoke_workers(sorted(worker_status['normal_slots_idle']), OBS_NORMAL_WORKER)
-    elif worker_status['building_normal'] <= 0 and worker_status['building_power'] <= 0 \
-        and worker_status['waiting'] <= 0:
-        print "\"TitleDisplay\": \"- %d\"" % len(worker_status['normal_slots'])
-        revoke_workers(sorted(worker_status['normal_slots']), OBS_NORMAL_WORKER)
-#       #revoke_workers(sorted(worker_status['power_slots']), OBS_POWER_WORKER)
+        print "\"TitleDisplay\": \"-N(%d)\"" % len(worker_status['normal_slots_idle'])
+
+    elif worker_status['waiting'] <= 0 and len(worker_status['power_slots_idle']) > 0:
+        revoke_workers(sorted(worker_status['power_slots_idle']), OBS_POWER_WORKER)
+        print "\"TitleDisplay\": \"-P(%d)\"" % len(worker_status['power_slots_idle'])
+
     else:
         print "\"TitleDisplay\": \"BN(%d) BP(%d) B(%d)\"" \
             % (worker_status['building_normal'], worker_status['building_power'], worker_status['blocked'])