23726258 SMF enable-disable with many nws puts neutron-dhcp-agent into maintenance s11u3-sru
authorchaithan.prakash@oracle.com <chaithan.prakash@oracle.com>
Wed, 16 Nov 2016 12:04:24 -0800
branchs11u3-sru
changeset 7319 0753ecc76d4d
parent 7317 bd14d5a59818
child 7320 edeb951aa980
23726258 SMF enable-disable with many nws puts neutron-dhcp-agent into maintenance 23855912 neutron-l3-agent should make sure contract is empty in SMF stop method
components/openstack/common/files/openstack_common.py
components/openstack/neutron/files/neutron-dhcp-agent
components/openstack/neutron/files/neutron-l3-agent
--- a/components/openstack/common/files/openstack_common.py	Wed Nov 02 06:19:13 2016 -0700
+++ b/components/openstack/common/files/openstack_common.py	Wed Nov 16 12:04:24 2016 -0800
@@ -1,4 +1,4 @@
-# Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
 #
 #    Licensed under the Apache License, Version 2.0 (the "License"); you may
 #    not use this file except in compliance with the License. You may obtain
@@ -24,6 +24,7 @@
 import glob
 import os
 import shutil
+from subprocess import CalledProcessError, PIPE, check_call
 import time
 
 import iniparse
@@ -230,3 +231,27 @@
 
     with open(new_file, 'wb+') as fh:
         new.write(fh)
+
+
+def kill_contract(attempts, interval, ctid):
+    """ Keeps issuing SIGTERM to contract-id at specified intervals until
+    either the contract is empty or the specified number of attempts are made.
+    Returns 0 if pkill failed, 1 if contract was successfully emptied and 2
+    if attempts were exhausted before the contract could be emptied.
+    """
+    for _ in xrange(attempts):
+        # Kill the SMF contract
+        try:
+            check_call(["/usr/bin/pkill", "-c", ctid])
+        except CalledProcessError as err:
+            print "failed to kill the SMF contract: %s" % err
+            return 0
+        time.sleep(interval)
+        try:
+            # check if contract is empty
+            check_call(["/usr/bin/pgrep", "-c", ctid], stdout=PIPE,
+                       stderr=PIPE)
+        except:
+            # contract is empty
+            return 1
+    return 2
--- a/components/openstack/neutron/files/neutron-dhcp-agent	Wed Nov 02 06:19:13 2016 -0700
+++ b/components/openstack/neutron/files/neutron-dhcp-agent	Wed Nov 16 12:04:24 2016 -0800
@@ -1,6 +1,6 @@
 #!/usr/bin/python2.7
 
-# Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
 #
 #    Licensed under the Apache License, Version 2.0 (the "License"); you may
 #    not use this file except in compliance with the License. You may obtain
@@ -18,6 +18,7 @@
 import re
 import sys
 
+from openstack_common import kill_contract
 import smf_include
 
 from subprocess import CalledProcessError, Popen, PIPE, check_call
@@ -60,11 +61,15 @@
 
 
 def stop():
-    try:
-        # first kill the SMF contract
-        check_call(["/usr/bin/pkill", "-c", sys.argv[2]])
-    except CalledProcessError as err:
-        print "failed to kill the SMF contract: %s" % err
+    # Keep issuing SIGTERM until the contract is empty. This way we will catch
+    # any child processes missed because they were getting forked.
+    # 50 attempts will be made at intervals of 2 seconds. Typically, we
+    # will only need 0 or 1 additional attempt before the contract is empty but
+    # we chose to err on the side of caution. In the worst case, we will use
+    # 100 seconds in the below loop which will leave 500 seconds (timeout is
+    # 600s) for the other cleanup tasks, after which the service will be put to
+    # maintenance state if the contract was not killed successfully.
+    if not kill_contract(50, 2, sys.argv[2]):
         return smf_include.SMF_EXIT_ERR_FATAL
 
     cmd = ["/usr/sbin/ipadm", "show-if", "-p", "-o", "ifname"]
--- a/components/openstack/neutron/files/neutron-l3-agent	Wed Nov 02 06:19:13 2016 -0700
+++ b/components/openstack/neutron/files/neutron-l3-agent	Wed Nov 16 12:04:24 2016 -0800
@@ -19,6 +19,7 @@
 import sys
 
 import netaddr
+from openstack_common import kill_contract
 import smf_include
 
 from subprocess import CalledProcessError, Popen, PIPE, check_call
@@ -154,11 +155,16 @@
 
 def stop():
     shutdown_vpn()
-    try:
-        # first kill the SMF contract
-        check_call(["/usr/bin/pkill", "-c", sys.argv[2]])
-    except CalledProcessError as err:
-        print "failed to kill the SMF contract: %s" % (err)
+    # Keep issuing SIGTERM until the contract is empty. This way we will catch
+    # any child processes missed because they were getting forked.
+    # 50 attempts will be made at intervals of 2 seconds. Typically, we
+    # will only need 0 or 1 additional attempt before the contract is empty but
+    # we chose to err on the side of caution. In the worst case, we will use
+    # 100 seconds in the below loop which will leave 500 seconds (timeout is
+    # 600s) for the other cleanup tasks, after which the service will be put to
+    # maintenance state if the contract was not killed successfully.
+    if not kill_contract(50, 2, sys.argv[2]):
+        return smf_include.SMF_EXIT_ERR_FATAL
 
     # We need to first remove the IP filter rules and then remove
     # the IP interfaces on which the rules were applied.