# HG changeset patch # User chaithan.prakash@oracle.com # Date 1468347538 25200 # Node ID ed601ca40b9c48679e07eaa7b79e43449a3ae464 # Parent 02b02527288bfb355e91155232b3500b393e4be1 23726258 SMF enable-disable with many nws puts neutron-dhcp-agent into maintenance 23855850 Neutron agents (L3 and DHCP) should cleanup pre-existing resources when starting 23855912 neutron-l3-agent should make sure contract is empty in SMF stop method diff -r 02b02527288b -r ed601ca40b9c components/openstack/common/files/openstack_common.py --- a/components/openstack/common/files/openstack_common.py Tue Jul 12 11:11:21 2016 -0700 +++ b/components/openstack/common/files/openstack_common.py Tue Jul 12 11:18:58 2016 -0700 @@ -29,7 +29,7 @@ import json import os import shutil -from subprocess import Popen, PIPE +from subprocess import CalledProcessError, Popen, PIPE, check_call import time import uuid @@ -290,3 +290,27 @@ except NoOptionError: return False return "ml2" in core_plugin.lower() + + +def kill_contract(attempts, interval, ctid): + """ Keeps issuing SIGTERM to contract-id at specified intervals until + either the contract is empty or the specified number of attempts are made. + Returns 0 if pkill failed, 1 if contract was successfully emptied and 2 + if attempts were exhausted before the contract could be emptied. + """ + for _ in xrange(attempts): + # Kill the SMF contract + try: + check_call(["/usr/bin/pkill", "-c", ctid]) + except CalledProcessError as err: + print "failed to kill the SMF contract: %s" % err + return 0 + time.sleep(interval) + try: + # check if contract is empty + check_call(["/usr/bin/pgrep", "-c", ctid], stdout=PIPE, + stderr=PIPE) + except: + # contract is empty + return 1 + return 2 diff -r 02b02527288b -r ed601ca40b9c components/openstack/neutron/files/neutron-dhcp-agent --- a/components/openstack/neutron/files/neutron-dhcp-agent Tue Jul 12 11:11:21 2016 -0700 +++ b/components/openstack/neutron/files/neutron-dhcp-agent Tue Jul 12 11:18:58 2016 -0700 @@ -19,7 +19,7 @@ import re import sys -from openstack_common import is_ml2_plugin +from openstack_common import is_ml2_plugin, kill_contract import smf_include from subprocess import CalledProcessError, Popen, PIPE, check_call @@ -45,6 +45,48 @@ return True +def cleanup_dhcp_agent_datalinks(): + cmd = ["/usr/sbin/dladm", "show-link", "-p", "-o", "link"] + p = Popen(cmd, stdout=PIPE, stderr=PIPE) + output, error = p.communicate() + if p.returncode != 0: + print "failed to retrieve datalink names" + return smf_include.SMF_EXIT_ERR_FATAL + + dlnames = output.splitlines() + # DHCP agent datalinks are always 15 characters in length. They start with + # 'dh', end with '_0', and in between they are hexadecimal digits. + prog = re.compile('dh[0-9A-Fa-f\_]{11}_0') + ret_code = smf_include.SMF_EXIT_OK + ovs_bridge = None + if is_ml2_plugin(): + ovs_bridge = get_ovs_bridge() + for dlname in dlnames: + if prog.search(dlname) is None: + continue + try: + # first remove the IP + check_call(["/usr/bin/pfexec", "/usr/sbin/ipadm", "delete-ip", + dlname]) + except: + # It is possible that the IP was already deleted but not the + # datalink. So we continue and try and delete the datalink. + pass + try: + # next remove the VNIC + check_call(["/usr/bin/pfexec", "/usr/sbin/dladm", "delete-vnic", + dlname]) + # remove the OVS Port + if ovs_bridge: + check_call(["/usr/bin/pfexec", "/usr/sbin/ovs-vsctl", "--", + "--if-exists", "del-port", ovs_bridge, dlname]) + except CalledProcessError as err: + print "failed to remove datalink '%s' used by DHCP agent: %s" % \ + (dlname, err) + ret_code = smf_include.SMF_EXIT_ERR_FATAL + return ret_code + + def start(): # verify paths are valid for f in sys.argv[2:4]: @@ -52,6 +94,11 @@ print '%s does not exist or is not readable' % f return smf_include.SMF_EXIT_ERR_CONFIG + # remove VNICs associated with DHCP agent if any were left over. + ret_code = cleanup_dhcp_agent_datalinks() + if ret_code != smf_include.SMF_EXIT_OK: + return ret_code + # set the hostmodel property if necessary if not set_hostmodel("src-priority"): return smf_include.SMF_EXIT_ERR_FATAL @@ -72,47 +119,22 @@ def stop(): - try: - # first kill the SMF contract - check_call(["/usr/bin/pkill", "-c", sys.argv[2]]) - except CalledProcessError as err: - print "failed to kill the SMF contract: %s" % err - return smf_include.SMF_EXIT_ERR_FATAL - - cmd = ["/usr/sbin/ipadm", "show-if", "-p", "-o", "ifname"] - p = Popen(cmd, stdout=PIPE, stderr=PIPE) - output, error = p.communicate() - if p.returncode != 0: - print "failed to retrieve IP interface names" + # Keep issuing SIGTERM until the contract is empty. This way we will catch + # any child processes missed because they were getting forked. + # 50 attempts will be made at intervals of 2 seconds. Typically, we + # will only need 0 or 1 additional attempt before the contract is empty but + # we chose to err on the side of caution. In the worst case, we will use + # 100 seconds in the below loop which will leave 500 seconds (timeout is + # 600s) for the other cleanup tasks, after which the service will be put to + # maintenance state if the contract was not killed successfully. + if not kill_contract(50, 2, sys.argv[2]): return smf_include.SMF_EXIT_ERR_FATAL - ifnames = output.splitlines() - # DHCP agent datalinks are always 15 characters in length. They start with - # 'dh', end with '_0', and in between they are hexadecimal digits. - prog = re.compile('dh[0-9A-Fa-f\_]{11}_0') - err_delete = False - for ifname in ifnames: - if prog.search(ifname) is None: - continue - try: - # first remove the IP - check_call(["/usr/bin/pfexec", "/usr/sbin/ipadm", "delete-ip", - ifname]) - # next remove the VNIC - check_call(["/usr/bin/pfexec", "/usr/sbin/dladm", "delete-vnic", - ifname]) - # remove the OVS Port - if is_ml2_plugin(): - check_call(["/usr/bin/pfexec", "/usr/sbin/ovs-vsctl", "--", - "--if-exists", "del-port", get_ovs_bridge(), - ifname]) - except CalledProcessError as err: - print "failed to remove datalink '%s' used by DHCP agent: %s" % \ - (ifname, err) - err_delete = True + # remove VNICs associated with DHCP agent + ret_code = cleanup_dhcp_agent_datalinks() # finally reset the hostmodel property - if not set_hostmodel("weak") or err_delete: + if not set_hostmodel("weak") or ret_code != smf_include.SMF_EXIT_OK: return smf_include.SMF_EXIT_ERR_FATAL return smf_include.SMF_EXIT_OK diff -r 02b02527288b -r ed601ca40b9c components/openstack/neutron/files/neutron-dhcp-agent.xml --- a/components/openstack/neutron/files/neutron-dhcp-agent.xml Tue Jul 12 11:11:21 2016 -0700 +++ b/components/openstack/neutron/files/neutron-dhcp-agent.xml Tue Jul 12 11:18:58 2016 -0700 @@ -85,7 +85,7 @@ - -