From: hyokeun Date: Fri, 16 Jun 2017 08:22:15 +0000 (+0900) Subject: NEW FEATURE: Control ondemand slaves X-Git-Tag: submit/trunk/20190927.012743~409^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0231741c73c22f40bc6ea80de1b53a81c74f7a6b;p=services%2Fjenkins-scripts.git NEW FEATURE: Control ondemand slaves Dynamically create/release imager slaves through python AWS SDK. Change-Id: I4ace932e298546bb94a7fa9c0a9c0bdfb9b199a1 --- diff --git a/common/aws_ec2.py b/common/aws_ec2.py new file mode 100644 index 0000000..1f56315 --- /dev/null +++ b/common/aws_ec2.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python + +import os +import sys +import json +import argparse +import boto3 +from argparse import ArgumentParser +from datetime import datetime, timedelta + +# set default char-set endcoding to utf-8 +reload(sys) +sys.setdefaultencoding('utf-8') # pylint: disable-msg=E1101 + +def unicode_to_str(obj): + """convert unicode object to str""" + + if isinstance(obj, list): + return [unicode_to_str(element) for element in obj] + elif isinstance(obj, dict): + return {unicode_to_str(key) : unicode_to_str(value) for key, value \ + in obj.iteritems()} + elif isinstance(obj, unicode): + return obj.encode('utf-8') + else: + return obj + +def format_output(data): + print '\n\n-----BEGIN RC OUTPUT-----' + sys.stdout.flush() + print data + sys.stdout.flush() + print '-----END RC OUTPUT-----\n\n' + +def stop_instances(ids): + assert type(ids) == list + print 'Terminating %s' % ids + #TODO: Check state before proceed. + ec2 = boto3.resource('ec2') + ec2.instances.filter(InstanceIds=ids).stop() + ec2.instances.filter(InstanceIds=ids).terminate() + +def describe_instance_by_instanceid(instance_ids): + if type(instance_ids) == list: + ec2 = boto3.resource('ec2') + filtered_ids = ec2.instances.filter(InstanceIds=instance_ids) + else: + filtered_ids = instance_ids + + for instance in filtered_ids: + print '========================' + print "ami_launch_index = %s" % instance.ami_launch_index + print "architecture = %s" % instance.architecture + print "block_device_mappings = %s" % unicode_to_str(instance.block_device_mappings) + print "client_token = %s" % instance.client_token + print "ebs_optimized = %s" % instance.ebs_optimized + print "ena_support = %s" % instance.ena_support + print "hypervisor = %s" % instance.hypervisor + print "iam_instance_profile = %s" % instance.iam_instance_profile + print "image_id = %s" % instance.image_id + print "instance_id = %s" % instance.instance_id + print "instance_lifecycle = %s" % instance.instance_lifecycle + print "instance_type = %s" % instance.instance_type + print "kernel_id = %s" % instance.kernel_id + print "key_name = %s" % instance.key_name + print "launch_time = %s" % instance.launch_time + print "monitoring = %s" % unicode_to_str(instance.monitoring) + print "network_interfaces_attribute = %s" % unicode_to_str(instance.network_interfaces_attribute) + print "placement = %s" % unicode_to_str(instance.placement) + print "platform = %s" % instance.platform + print "private_dns_name = %s" % instance.private_dns_name + print "private_ip_address = %s" % instance.private_ip_address + print "product_codes = %s" % instance.product_codes + print "public_dns_name = %s" % instance.public_dns_name + print "public_ip_address = %s" % instance.public_ip_address + print "ramdisk_id = %s" % instance.ramdisk_id + print "root_device_name = %s" % instance.root_device_name + print "root_device_type = %s" % instance.root_device_type + print "security_groups = %s" % unicode_to_str(instance.security_groups) + print "source_dest_check = %s" % instance.source_dest_check + print "spot_instance_request_id = %s" % instance.spot_instance_request_id + print "sriov_net_support = %s" % instance.sriov_net_support + print "state = %s" % unicode_to_str(instance.state) + print "state_reason = %s" % instance.state_reason + print "state_transition_reason = %s" % instance.state_transition_reason + print "subnet_id = %s" % instance.subnet_id + print "tags = %s" % unicode_to_str(instance.tags) + print "virtualization_type = %s" % instance.virtualization_type + print "vpc_id = %s" % instance.vpc_id + +def get_essential_info_from_instance(instance_id): + if type(instance_id) == list: + ec2 = boto3.resource('ec2') + filtered_ids = ec2.instances.filter(InstanceIds=instance_id) + else: + filtered_ids = instance_id + ret_data = {} + for instance in filtered_ids: + ret_my = {} + ret_my['instance_id'] = instance.instance_id + ret_my['private_ip_address'] = instance.private_ip_address + ret_my['launch_time'] = str(instance.launch_time) + ret_my['state'] = str(instance.state['Name']) + ret_data[instance.instance_id] = ret_my + return ret_data + +def get_instances_by_tag(tag): + ret_ids = [] + ec2 = boto3.resource('ec2') + myfilter = [] + for x in tag: + myfilter.append({'Name': 'tag:%s' % x['Key'], 'Values': ['%s' % x['Value']]}) + return ec2.instances.filter(Filters=myfilter) + +def create_ec2_ondemand_instance(ImageId, MinCount, MaxCount, KeyName, SecurityGroupIds, \ + InstanceType, Placement, SubnetId, EbsOptimized, Tags): + print 'Creating instance....' + ec2 = boto3.resource('ec2') + + ret_data = {} + + if False: #TEST-PURPOSE ONLY + test_inst = ['i-0000', 'i-0000'] + describe_instance_by_instanceid(test_inst) + sys.stdout.flush() + ret_data = get_essential_info_from_instance(test_inst) + format_output(ret_data) + else: + print '\nCREATE STARTED AT %s' % (str(datetime.now())) + rc = ec2.create_instances( + ImageId=ImageId, + MinCount=MinCount, + MaxCount=MaxCount, + KeyName=KeyName, + SecurityGroupIds=SecurityGroupIds, + InstanceType=InstanceType, + Placement=Placement, + SubnetId=SubnetId, + EbsOptimized=EbsOptimized, + TagSpecifications=[{'ResourceType': 'instance', + 'Tags': Tags + }] + ) + print '\nCREATE FINISHED AT %s' % (str(datetime.now())) + + generated_ids = [ x.instance_id for x in rc ] + describe_instance_by_instanceid(generated_ids) + ret_data = get_essential_info_from_instance(generated_ids) + format_output(ret_data) + return rc + + return [] + +def ping_main(args): + tags = [ {'Key': t.split(':')[0], 'Value': t.split(':')[1]} \ + for t in [ x for x in args.tagname.split(',')] ] + my_ids = get_instances_by_tag(tag=tags) + #describe_instance_by_instanceid(my_ids) + ret_data = get_essential_info_from_instance(my_ids) + format_output(ret_data) + print '\nMAIN FINISHED AT %s' % (str(datetime.now())) + +def terminate_main(args): + instance_ids = args.instanceid.split(',') + print instance_ids + stop_instances(instance_ids) + describe_instance_by_instanceid(instance_ids) + print '\nMAIN FINISHED AT %s' % (str(datetime.now())) + +def create_main(args): + # All arguments are mandatory + assert args.amiid + assert args.mincount + assert args.maxcount + assert args.keyname + assert args.securitygroupids + assert args.instancetype + assert args.placement + assert args.subnetid + assert args.ebsoptimized + assert args.tags + print 'ImageId: [%s]' % args.amiid + print 'MinCount: [%d]' % int(args.mincount) + print 'MaxCount: [%d]' % int(args.maxcount) + print 'KeyName: [%s]' % args.keyname + print 'SecurityGroupIds: [%s]' % args.securitygroupids.split(',') + print 'InstanceType: [%s]' % args.instancetype + placement = dict(x.split(':') for x in args.placement.split(',')) + print 'Placement: [%s]' % placement + print 'SubnetId: [%s]' % args.subnetid + print 'EbsOptimized: [%s]' % json.loads(args.ebsoptimized.lower()) + print 'Tags: [%s], type(%s)' % (args.tags, type(args.tags)) + tags = [ {'Key': t.split(':')[0], 'Value': t.split(':')[1]} \ + for t in [ x for x in args.tags.split(',')] ] + print 'Tags: [%s]' % tags + + create_ec2_ondemand_instance(args.amiid, + int(args.mincount), + int(args.maxcount), + args.keyname, + args.securitygroupids.split(','), + args.instancetype, + placement, + args.subnetid, + json.loads(args.ebsoptimized.lower()), + tags) + print '\nMAIN FINISHED AT %s' % (str(datetime.now())) + +def argument_parsing(argv): + """Any arguments passed from user""" + + parser = argparse.ArgumentParser(description='AWS control interface') + + subparsers = parser.add_subparsers(dest='subcommands') + + #### [subcommand - PING] #### + cmd_ping = subparsers.add_parser('ping') + cmd_ping.add_argument('-t', '--tagname', action='store', dest='tagname', \ + help='Comma(,) separeted KEY:VALUE paires of the tag.' \ + ' ex) Name:myinstancename,hostname:myhost') + cmd_ping.add_argument('-e', '--env', action='store', dest='env', \ + help='env. ex) stage') + + #### [subcommand - CREATE] #### + cmd_create = subparsers.add_parser('create') + cmd_create.add_argument('-a', '--amiid', action='store', dest='amiid', \ + help='ImageID. ex) ami-1234abcd') + cmd_create.add_argument('-n', '--mincount', action='store', default='1', dest='mincount', \ + help='MinCount(Integer).') + cmd_create.add_argument('-x', '--maxcount', action='store', default='1', dest='maxcount', \ + help='MaxCount(Integer).') + cmd_create.add_argument('-k', '--keyname', action='store', dest='keyname', \ + help='KeyName.') + cmd_create.add_argument('-s', '--securitygroupids', action='store', dest='securitygroupids', \ + help='Comma(,) separeted SecurityGroupIds.' \ + ' ex) sg-abcd1234,sg-1234abcd') + cmd_create.add_argument('-i', '--instancetype', action='store', dest='instancetype', \ + help='InstanceType. ex) t2.micro') + cmd_create.add_argument('-p', '--placement', action='store', dest='placement', \ + help='Comma(,) separated KEY:VALUE pairs.' \ + ' ex) Tenancy:default,GroupName:,AvailabilityZone:ap-northeast-2a') + cmd_create.add_argument('-b', '--subnetid', action='store', dest='subnetid', \ + help='SubnetId. ex) subnet-abcd1234') + cmd_create.add_argument('-e', '--ebsoptimized', action='store', default='false', dest='ebsoptimized', \ + help='EbsOptimized(True or False)') + cmd_create.add_argument('-t', '--tags', action='store', dest='tags', \ + help='Comma(,) separeted KEY:VALUE pairs.' \ + ' ex) Name:test_name_tag,env:stage,hostname:mycom') + + #### [subcommand - TERMINATE] #### + cmd_terminate = subparsers.add_parser('terminate') + cmd_terminate.add_argument('-i', '--instanceid', action='store', dest='instanceid', \ + help='Comma(,) separated instance ids.' \ + ' ex) i-0123456789abcdef0,i-abcdef01234567890') + + + return parser.parse_args(argv[1:]) + +def main(argv): + print '\nMAIN START AT %s' % (str(datetime.now())) + args = argument_parsing(argv) + if args.subcommands == 'ping': + return ping_main(args) + elif args.subcommands == 'create': + return create_main(args) + elif args.subcommands == 'terminate': + return terminate_main(args) + else: + print 'Unsopported command %s' % args.subcommands + return -1 + +if __name__ == '__main__': + sys.exit(main(sys.argv)) + diff --git a/debian/control b/debian/control index aabfdc5..0f543b9 100644 --- a/debian/control +++ b/debian/control @@ -46,6 +46,7 @@ Depends: ${python:Depends}, python-yaml, python-lxml, gbs-api, + python-boto3, Description: Common part of jenkins scripts Package: jenkins-scripts-tzs diff --git a/debian/jenkins-scripts-common.install b/debian/jenkins-scripts-common.install index b8455a4..243cdb8 100644 --- a/debian/jenkins-scripts-common.install +++ b/debian/jenkins-scripts-common.install @@ -39,3 +39,4 @@ debian/tmp/job_update_local_git.py /var/lib/jenkins/jenkins-scripts/ debian/tmp/scripts/check_section.sh /var/lib/jenkins/jenkins-scripts/scripts debian/tmp/scripts/get_git_desc_info.sh /var/lib/jenkins/jenkins-scripts/scripts debian/tmp/scripts/nuget.exe /var/lib/jenkins/jenkins-scripts/scripts +debian/tmp/common/aws_ec2.py /var/lib/jenkins/jenkins-scripts/common/ diff --git a/debian/jenkins-scripts.install b/debian/jenkins-scripts.install index 9a255c1..d26f870 100644 --- a/debian/jenkins-scripts.install +++ b/debian/jenkins-scripts.install @@ -33,4 +33,5 @@ debian/tmp/job_sync_repo.py /var/lib/jenkins/jenkins-scripts/ debian/tmp/job_trigger_for_sync_repo.py /var/lib/jenkins/jenkins-scripts/ debian/tmp/job_trigger_obs_sync.py /var/lib/jenkins/jenkins-scripts/ debian/tmp/job_update_public_git.py /var/lib/jenkins/jenkins-scripts/ +debian/tmp/job_control_ondemand_slaves.groovy /var/lib/jenkins/jenkins-scripts/ diff --git a/debian/rules b/debian/rules index 4879bc0..f67d143 100755 --- a/debian/rules +++ b/debian/rules @@ -23,7 +23,7 @@ install: build # Installing package mkdir -p $(CURDIR)/debian/tmp/ install -d $(CURDIR)/debian/tmp/ - cp -r job_*.py dir-purge-tool.sh logs-collector.sh common obs_requests templates scripts dep_graph vis groovy_init_scripts trbs abs $(CURDIR)/debian/tmp/ + cp -r job_*.py job_*.groovy dir-purge-tool.sh logs-collector.sh common obs_requests templates scripts dep_graph vis groovy_init_scripts trbs abs $(CURDIR)/debian/tmp/ binary-indep: build install dh_testdir dh_testroot diff --git a/job_control_ondemand_slaves.groovy b/job_control_ondemand_slaves.groovy new file mode 100644 index 0000000..d51b516 --- /dev/null +++ b/job_control_ondemand_slaves.groovy @@ -0,0 +1,428 @@ +import hudson.model.* +import jenkins.model.* +import groovy.json.JsonSlurper + +def execute_command(cmd, args, verbose=false) { + if (!cmd.toString()?.trim()) { + cmd = "python "+System.getenv("JENKINS_HOME")+"/hyokeun/jenkins-scripts/common/aws_ec2.py" + } + Process process = "${cmd} ${args}".execute() + def out = new StringBuffer() + def err = new StringBuffer() + process.consumeProcessOutput( out, err ) + process.waitFor() + if (verbose == true) { + println "\n<<<< START CMD: ${args.split()[0]} >>>>" + println "OUT:\n" + out.toString() + println "ERR:\n" + err.toString() + println "<<<< END CMD >>>>\n" + } + + def HashMap ret_items + if (err.toString()?.trim()) { + println "You got error message:" + println out.toString() + println err.toString() + assert false + } else { + out_str = out.toString().replace("None", "''") + if (out_str.contains("-----BEGIN RC OUTPUT-----") && \ + out_str.contains("-----END RC OUTPUT-----")) { + ret_data = out_str.substring( \ + out_str.indexOf("-----BEGIN RC OUTPUT-----") + 26, \ + out_str.indexOf("-----END RC OUTPUT-----")) + def jsonSlurper = new JsonSlurper() + ret_items = jsonSlurper.parseText(ret_data.replaceAll("'","\"")) + } + } + return ret_items +} + +class WorkerConf { + protected Properties conf + + WorkerConf() { + conf = new Properties() + conf.load(new FileInputStream(System.getenv('JENKINS_HOME') \ + + '/init.groovy.d/setup.properties')) + } + + Integer max_slaves() { conf.EC2_INSTANCE_CAP_STR.toInteger() } + Integer executors_per_slave() { conf.EC2_NUMBER_OF_EXECUTORS.toInteger() } + String name_prefix() { conf.EC2_DESCRIPTION } + String keyname() { conf.AWS_CLOUD_KEYNAME } + + String ami_id() { conf.EC2_AMI_ID } + String availability_zone() { conf.EC2_AV_ZONE } + String placement() { + return "Tenancy:default," \ + + "GroupName:," \ + + "AvailabilityZone:${this.availability_zone()}" + } + String security_groups() { conf.EC2_SECURITY_GROUPS.replaceAll(" ","") } + String remote_fs() { conf.EC2_REMOTE_FS } + String instance_type() { conf.EC2_INSTANCE_TYPE } + String labels() { conf.EC2_LABEL_STRING } + String remote_user() { conf.EC2_REMOTE_ADMIN } + String ssh_port() { conf.EC2_SSH_PORT } + String subnet_id() { conf.EC2_SUBNET_ID } + String launch_timeout() { conf.EC2_LAUNCH_TIMEOUT } + long idle_termination_minutes() { conf.EC2_IDLE_TERMINATION_MINUTES.toInteger() } + String credential_id() { conf.EC2_CREDENTIAL_ID } + String tag_name() { conf.EC2_TAG_NAME } + String tag_hostname() { conf.EC2_TAG_HOSTNAME } + String tag_env() { conf.EC2_TAG_ENV } + String tag_source() { conf.EC2_TAG_SOURCE } + String tags() { + return "Name:${this.tag_name()}," \ + + "hostname:${this.tag_hostname()}," \ + + "env:${this.tag_env()}," \ + + "source:${this.tag_source()}," \ + + "jenkins_slave_type:demand_${this.name_prefix()}" + } +} + +class WorkerConf_JENKINS_IMAGER extends WorkerConf { + @Override + String name_prefix() { conf.EC2_WORKER_IMAGER_DESCRIPTION } + String tag_source() { conf.EC2_WORKER_IMAGER_TAG_SOURCE } + + List check_queue_list() { conf.EC2_WORKER_IMAGER_QUEUE_CHECK_LIST.split(",") } +} + +class WorkerConf_OBS_WORKER_NORMAL extends WorkerConf { + @Override + String name_prefix() { conf.EC2_WORKER_OBS_NORMAL_DESCRIPTION } +} + +class WorkerConf_OBS_WORKER_POWER extends WorkerConf { + @Override + String name_prefix() { conf.EC2_WORKER_OBS_POWER_DESCRIPTION } +} + +class SlaveStatus { + private Map slave_stat = ['CURR_NUMBER_OF_NODES': 0, + 'CURR_TOTAL_EXECUTORS': 0, + 'CURR_BUSY_EXECUTORS': 0, + 'CURR_IDLE_EXECUTORS': 0] + private Map slave_info = [:] + + SlaveStatus(String name_prefix) { + for (slave in Hudson.instance.slaves) { + def slaveComputer = slave.getComputer() + if ("${slaveComputer.getName()}".startsWith(name_prefix)) { + slave_stat['CURR_NUMBER_OF_NODES']++ + slave_stat['CURR_TOTAL_EXECUTORS'] += slaveComputer.countExecutors() + slave_stat['CURR_BUSY_EXECUTORS'] += slaveComputer.countBusy() + if (!slaveComputer.offline && slaveComputer.isAlive()) { + slave_stat['CURR_IDLE_EXECUTORS'] += slaveComputer.countIdle() + } + def this_slave = ["name":slaveComputer.getName(), + "idle_since":System.currentTimeMillis() \ + - slaveComputer.getIdleStartMilliseconds(), + "host":slave.getLauncher().getHost(), + "object":slaveComputer] + slave_info[slaveComputer.getName()] = this_slave + } + } + } + + Integer number_of_nodes() { + slave_stat['CURR_NUMBER_OF_NODES'] + } + Integer total_executors() { + slave_stat['CURR_TOTAL_EXECUTORS'] + } + Integer idle_executors() { + slave_stat['CURR_IDLE_EXECUTORS'] + } + Integer busy_executors() { + slave_stat['CURR_BUSY_EXECUTORS'] + } + Map get_slave_info() { + return slave_info + } + void disconnect_node(slaveName) { + if (slaveName in slave_info) { + slave_info[slaveName]["object"].setTemporarilyOffline(true, null) + slave_info[slaveName]["object"].disconnect(null) + } + } + void delete_node(slaveName) { + if (slaveName in slave_info) { + slave_info[slaveName]["object"].doDoDelete() + } + } +} + +def get_aws_status(worker_conf) { + args = "ping -t ${worker_conf.tags()}" + return execute_command("", args, verbose=true) +} + +def terminate_aws_ec2_instances(instance_ids) { + args = "terminate -i ${instance_ids.join(',')}" + return execute_command("", args, verbose=true) +} + +def create_aws_ec2_instances(ami_id, + min_count, + max_count, + keyname, + security_group_ids, + instance_type, + placement, + subnet_id, + ebsoptimized, + tags) { + + args = "create --amiid ${ami_id} " \ + + " --keyname ${keyname} " \ + + " --mincount ${min_count} --maxcount ${max_count} " \ + + " --securitygroupids ${security_group_ids} " \ + + " --instancetype ${instance_type} " \ + + " --placement ${placement} " \ + + " --subnetid ${subnet_id} " \ + + " --ebsoptimized ${ebsoptimized} " \ + + " --tags ${tags}" + command = "python /var/lib/jenkins/hyokeun/jenkins-scripts/common/aws_ec2.py" + return execute_command(command, args, verbose=true) +} + +def get_worker_conf(purpose) { + + def worker_conf = Class.forName("WorkerConf_${purpose}", true, \ + this.class.classLoader).newInstance() + assert worker_conf + println "\nWORKER CONFIGURATION (${purpose})" + println " MAX_SLAVES: ${worker_conf.max_slaves()}" + println " EXECUTORS_PER_SLAVE: ${worker_conf.executors_per_slave()}" + println " NAME_PREFIX: ${worker_conf.name_prefix()}\n" + return worker_conf +} + +//TODO: FIXME: +e = { filepath -> + evaluate(new File(System.getenv("JENKINS_HOME") + "/init.groovy.d/" + filepath)) +} +create_slave_node = e("Module_Node") + +def create_slaves(worker_conf, num_nodes_to_create) { + ec2s = create_aws_ec2_instances(ami_id=worker_conf.ami_id(), + min_count=1, + max_count=num_nodes_to_create, + keyname=worker_conf.keyname(), + security_group_ids=worker_conf.security_groups(), + instance_type=worker_conf.instance_type(), + placement=worker_conf.placement(), + subnet_id=worker_conf.subnet_id(), + ebsoptimized='False', + tags=worker_conf.tags()) + + Date date = new Date() + String curr_date = date.format("yyyyMMdd.HHmmss") + + ec2s.each{ k, v -> + println "Creating jenkins node ${worker_conf.name_prefix()} (${v.instance_id}) at " + curr_date + println " Instance ID: ${v.instance_id}" + println " IP Address: ${v.private_ip_address}" + println " Launch Time: ${v.launch_time}" + create_slave_node( + instance = Jenkins.getInstance(), + name = "${worker_conf.name_prefix()} (${v.instance_id})", + remoteFS = "${worker_conf.remote_fs()}", + numExecutors = "${worker_conf.executors_per_slave()}", + labelString = "${worker_conf.labels()}", + sshHost = "${v.private_ip_address}", + sshPort = "${worker_conf.ssh_port()}", + sshCredentials = "${worker_conf.credential_id()}", + userId = "${worker_conf.remote_user()}", + description = "${v.launch_time}" + ) + instance.save() + } + println "" + return ec2s +} + +Integer how_many_slaves_to_create(worker_conf, num_requested_executors) { + + // Check slave nodes are free enough to allocate image jobs + def slave_stat = new SlaveStatus(worker_conf.name_prefix()) + println "\nSLAVE STATUS (${worker_conf.name_prefix()}" + println " NUMBER_OF_NODES: ${slave_stat.number_of_nodes()}" + println " TOTAL EXECUTORS: ${slave_stat.total_executors()}" + println " IDLE EXECUTORS: ${slave_stat.idle_executors()}" + println "" + slave_stat.get_slave_info().each{ k, v -> + println " ${k}:${v}" + } + println "" + def Integer final_nodes_to_create = 0 + if (slave_stat.number_of_nodes() >= worker_conf.max_slaves() ) { + println "REACHED MAXIMUM SLAVES" + return 0 + } + if (slave_stat.idle_executors() >= num_requested_executors) { + println "HAVE ENOUGH EXECUTORS" + return 0 + } + + println "CALCULATING..." + def Integer min_to_create = \ + Math.min(num_requested_executors - slave_stat.idle_executors(), \ + (worker_conf.max_slaves() - slave_stat.number_of_nodes()) \ + * worker_conf.executors_per_slave()) + def Double nodes_to_create = min_to_create.div(worker_conf.executors_per_slave()) + final_nodes_to_create = Math.min(Math.ceil(nodes_to_create).intValue(), \ + worker_conf.max_slaves() - slave_stat.number_of_nodes()) + println "MIN(" + num_requested_executors + "-" + slave_stat.idle_executors() \ + + ", (" + worker_conf.max_slaves() + "-" + slave_stat.number_of_nodes() \ + + ")*" + worker_conf.executors_per_slave() + ")" \ + + " = " + min_to_create + println "FINAL NUMBER OF SLAVES TO CREATE: ${final_nodes_to_create} \n" + return final_nodes_to_create +} + +def worker_ondemand_create_request(worker_conf, Integer num_requested_executors) { + + println "YOU REQUESTED ${num_requested_executors} executors!" + get_aws_status(worker_conf) + + // Check slave nodes are free enough to allocate image jobs + def num_nodes_to_create = how_many_slaves_to_create(worker_conf, num_requested_executors) + + if (num_nodes_to_create > 0) { + inst_info = create_slaves(worker_conf, num_nodes_to_create) + println "\"TitleDisplay\": \"Create(${inst_info.size()}/${num_requested_executors})\"" + } +} + +def worker_ondemand_revoke_request(worker_conf) { + + def bRevoke = true + + // Check slave nodes for running builds + def slave_stat = new SlaveStatus(worker_conf.name_prefix()) + if (slave_stat.busy_executors() > 0) { + println "\nYOU HAVE BUSY EXECUTORS." + return + } + + // Check build queue + Jenkins.instance.queue.items.find { it -> + if (it.task.name in worker_conf.check_queue_list()) { + println "\nPENDING BUILD IN THE QUEUE (${it.task.name}:${it.id}). STOP REVOKE OPERATION." + bRevoke = false + return true + } + } + if (bRevoke != true) { return } + + // Check build + worker_conf.check_queue_list().find { it -> + def item = hudson.model.Hudson.instance.getItem(it) + if (item.isInQueue() || item.isBuilding()) { + println "YOU HAVE QUEUED OR BUILDING ITEMS FOR ${it}" + bRevoke = false + return true + } + } + if (bRevoke != true) { return } + + existing_instance_ids = [] + get_aws_status(worker_conf).each { k, v -> + if (v["state"] != "terminated") { + existing_instance_ids.add(k) + } + } + println "\nEXISTING INSTANCE INFO:\n${existing_instance_ids}\n" + + Date date = new Date() + String curr_date = date.format("yyyyMMdd.HHmmss") + + existing_slave_ids = [] + inst_for_terminate = [] + slave_stat.get_slave_info().each { k, v -> + def matcher = v["name"] =~ /^${worker_conf.name_prefix()} \((i-[0-9a-f]+)\)$/ + if (matcher?.matches()) { + def inst_id = matcher.group(1) + existing_slave_ids.add(inst_id) + if ( v["idle_since"].toLong().div(1000*60) >= worker_conf.idle_termination_minutes()) { + println ".... Delete candidate ${k} : " + v["idle_since"].toLong().div(1000*60) + // Mark the node as offline and disconnect + slave_stat.disconnect_node(v["name"]) + if (inst_id in existing_instance_ids) { + inst_for_terminate.add(inst_id) + } + } else { + println ".... TIMER NOT EXPIRED FOR ${k} : " + v["idle_since"].toLong().div(1000*60) + } + } else { + println "SLAVE \"${k}\" IS NOT MY CHILD." + } + } + println "\nEXISTING NODE INFO:\n${existing_slave_ids}\n" + + println inst_for_terminate + if (inst_for_terminate) { + terminate_aws_ec2_instances(inst_for_terminate) + println "\"TitleDisplay\": \"Delete(${inst_for_terminate.size()})\"" + } + + slave_stat.get_slave_info().each { k, v -> + def matcher = v["name"] =~ /^${worker_conf.name_prefix()} \((i-[0-9a-f]+)\)$/ + if (matcher?.matches()) { + def inst_id = matcher.group(1) + // Delete the node + if (inst_id in inst_for_terminate) { + println "Deleting jenkins node ${k} at " + curr_date \ + + " Cause: " + v["idle_since"].toLong().div(1000*60) + slave_stat.delete_node(v["name"]) + } + } + } + + dangled_jenkins_nodes = existing_slave_ids.toSet() - existing_instance_ids.toSet() + if (dangled_jenkins_nodes) { + println "\nDangled jenkins nodes:\n${dangled_jenkins_nodes}" + } + + dangled_ec2_instances = existing_instance_ids.toSet() - existing_slave_ids.toSet() + if (dangled_ec2_instances) { + println "\nDangled EC2 instances:\n${dangled_ec2_instances}" + assert false + } +} + +def __main__() { + + def buildEnv = build.getEnvironment(listener) + + if (buildEnv["ONDEMAND_SLAVE_CONFIGURATION_ENABLED"] != "1") { + println "FEATURE NOT ENABLED" + return + } + + action = buildEnv['ACTION'] + purpose = buildEnv['PURPOSE'] + requested_num_executors = buildEnv['REQUESTED_NUM_EXECUTORS'] + + assert (buildEnv['PURPOSE'] == 'JENKINS_IMAGER') + + def worker_conf = get_worker_conf(purpose) + + if (action == "REQUEST_WORKER") { + worker_ondemand_create_request(worker_conf, \ + requested_num_executors.toInteger()) + } else if(action == "REVOKE_WORKER") { + worker_ondemand_revoke_request(worker_conf) + } else { + println "Invalid action: " + action + } +} + +__main__() + +return 0 + diff --git a/job_create_snapshot.py b/job_create_snapshot.py index b1a7556..0b373c0 100755 --- a/job_create_snapshot.py +++ b/job_create_snapshot.py @@ -80,9 +80,17 @@ def prepare_trigger_data(images, build_id, path_repo, project, def trigger_image_creation(trigger_data): """Trigger the image_creation jobs""" + count = 0 for repo in trigger_data.keys(): for index, data in enumerate(trigger_data[repo]): trigger_next('image_trigger_%s_%s' % (repo, index), data) + # Request number of imager nodes + if os.getenv("ONDEMAND_SLAVE_CONFIGURATION_ENABLED", "0") == "1": + if count > 0: + trigger_next("SLAVE_BUILDER", {"data":"dummy"}, \ + extra_params={"ACTION": "REQUEST_WORKER", \ + "PURPOSE": "JENKINS_IMAGER", \ + "REQUESTED_NUM_EXECUTORS": "%d" % count}) def make_repo(project, backenddb, base_path, live_repo_base): """ diff --git a/job_pre_release_obs.py b/job_pre_release_obs.py index 0e1b392..7f3e68e 100755 --- a/job_pre_release_obs.py +++ b/job_pre_release_obs.py @@ -364,6 +364,15 @@ def make_repo(project, repo, backenddb, base_url, base_path, os.path.join(prerelease.dir, prerelease.build_id), project, base_url, repo['Name'], download_num=current_download_num) + + # Request number of imager nodes + if os.getenv("ONDEMAND_SLAVE_CONFIGURATION_ENABLED", "0") == "1": + if images_count > 0: + trigger_next("SLAVE_BUILDER", {"data":"dummy"}, \ + extra_params={"ACTION": "REQUEST_WORKER", \ + "PURPOSE": "JENKINS_IMAGER", \ + "REQUESTED_NUM_EXECUTORS": "%d" % images_count}) + # reset 'images' and add download_url meta info build.update_info({'images_count': images_count, 'images': [], diff --git a/packaging/jenkins-scripts.spec b/packaging/jenkins-scripts.spec index 3707901..9972537 100644 --- a/packaging/jenkins-scripts.spec +++ b/packaging/jenkins-scripts.spec @@ -45,6 +45,7 @@ Requires: python-yaml Requires: python-lxml Requires: python-mysql Requires: gbs-api +Requires: python-boto3 %description common Common part of jenkins scripts @@ -103,7 +104,7 @@ Isolated job_submitobs to avoid package installation conflicts %install install -d %{buildroot}%{destdir} -cp -r job_*.py codebase.py dir-purge-tool.sh logs-collector.sh common obs_requests templates scripts vis dep_graph trbs abs %{buildroot}%{destdir}/ +cp -r job_*.py job_*.groovy codebase.py dir-purge-tool.sh logs-collector.sh common obs_requests templates scripts vis dep_graph trbs abs %{buildroot}%{destdir}/ install -d %{buildroot}%{destinitdir} cp -r groovy_init_scripts/* %{buildroot}%{destinitdir}/ @@ -160,6 +161,7 @@ fi %{destdir}/job_trigger_for_sync_repo.py %{destdir}/job_trigger_obs_sync.py %{destdir}/job_update_public_git.py +%{destdir}/job_control_ondemand_slaves.groovy %files common %defattr(-,jenkins,jenkins) @@ -209,6 +211,7 @@ fi %{destdir}/scripts/check_section.sh %{destdir}/scripts/get_git_desc_info.sh %{destdir}/scripts/nuget.exe +%{destdir}/common/aws_ec2.py %files dependsgraph %defattr(-,jenkins,jenkins)