23726258 SMF enable-disable with many nws puts neutron-dhcp-agent into maintenance
23855912 neutron-l3-agent should make sure contract is empty in SMF stop method
--- a/components/openstack/common/files/openstack_common.py Wed Nov 02 06:19:13 2016 -0700
+++ b/components/openstack/common/files/openstack_common.py Wed Nov 16 12:04:24 2016 -0800
@@ -1,4 +1,4 @@
-# Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
@@ -24,6 +24,7 @@
import glob
import os
import shutil
+from subprocess import CalledProcessError, PIPE, check_call
import time
import iniparse
@@ -230,3 +231,27 @@
with open(new_file, 'wb+') as fh:
new.write(fh)
+
+
+def kill_contract(attempts, interval, ctid):
+ """ Keeps issuing SIGTERM to contract-id at specified intervals until
+ either the contract is empty or the specified number of attempts are made.
+ Returns 0 if pkill failed, 1 if contract was successfully emptied and 2
+ if attempts were exhausted before the contract could be emptied.
+ """
+ for _ in xrange(attempts):
+ # Kill the SMF contract
+ try:
+ check_call(["/usr/bin/pkill", "-c", ctid])
+ except CalledProcessError as err:
+ print "failed to kill the SMF contract: %s" % err
+ return 0
+ time.sleep(interval)
+ try:
+ # check if contract is empty
+ check_call(["/usr/bin/pgrep", "-c", ctid], stdout=PIPE,
+ stderr=PIPE)
+ except:
+ # contract is empty
+ return 1
+ return 2
--- a/components/openstack/neutron/files/neutron-dhcp-agent Wed Nov 02 06:19:13 2016 -0700
+++ b/components/openstack/neutron/files/neutron-dhcp-agent Wed Nov 16 12:04:24 2016 -0800
@@ -1,6 +1,6 @@
#!/usr/bin/python2.7
-# Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
@@ -18,6 +18,7 @@
import re
import sys
+from openstack_common import kill_contract
import smf_include
from subprocess import CalledProcessError, Popen, PIPE, check_call
@@ -60,11 +61,15 @@
def stop():
- try:
- # first kill the SMF contract
- check_call(["/usr/bin/pkill", "-c", sys.argv[2]])
- except CalledProcessError as err:
- print "failed to kill the SMF contract: %s" % err
+ # Keep issuing SIGTERM until the contract is empty. This way we will catch
+ # any child processes missed because they were getting forked.
+ # 50 attempts will be made at intervals of 2 seconds. Typically, we
+ # will only need 0 or 1 additional attempt before the contract is empty but
+ # we chose to err on the side of caution. In the worst case, we will use
+ # 100 seconds in the below loop which will leave 500 seconds (timeout is
+ # 600s) for the other cleanup tasks, after which the service will be put to
+ # maintenance state if the contract was not killed successfully.
+ if not kill_contract(50, 2, sys.argv[2]):
return smf_include.SMF_EXIT_ERR_FATAL
cmd = ["/usr/sbin/ipadm", "show-if", "-p", "-o", "ifname"]
--- a/components/openstack/neutron/files/neutron-l3-agent Wed Nov 02 06:19:13 2016 -0700
+++ b/components/openstack/neutron/files/neutron-l3-agent Wed Nov 16 12:04:24 2016 -0800
@@ -19,6 +19,7 @@
import sys
import netaddr
+from openstack_common import kill_contract
import smf_include
from subprocess import CalledProcessError, Popen, PIPE, check_call
@@ -154,11 +155,16 @@
def stop():
shutdown_vpn()
- try:
- # first kill the SMF contract
- check_call(["/usr/bin/pkill", "-c", sys.argv[2]])
- except CalledProcessError as err:
- print "failed to kill the SMF contract: %s" % (err)
+ # Keep issuing SIGTERM until the contract is empty. This way we will catch
+ # any child processes missed because they were getting forked.
+ # 50 attempts will be made at intervals of 2 seconds. Typically, we
+ # will only need 0 or 1 additional attempt before the contract is empty but
+ # we chose to err on the side of caution. In the worst case, we will use
+ # 100 seconds in the below loop which will leave 500 seconds (timeout is
+ # 600s) for the other cleanup tasks, after which the service will be put to
+ # maintenance state if the contract was not killed successfully.
+ if not kill_contract(50, 2, sys.argv[2]):
+ return smf_include.SMF_EXIT_ERR_FATAL
# We need to first remove the IP filter rules and then remove
# the IP interfaces on which the rules were applied.