25406236 openstack/nova/nova-compute service fails after reboot with created instances
authorSean Wilcox <sean.wilcox@oracle.com>
Mon, 23 Jan 2017 12:54:02 -0700
changeset 7593 3f5b9ebe028b
parent 7590 5461fab14904
child 7596 83b4091f7969
25406236 openstack/nova/nova-compute service fails after reboot with created instances
components/openstack/nova/files/solariszones/driver.py
--- a/components/openstack/nova/files/solariszones/driver.py	Thu Jan 19 16:57:09 2017 -0800
+++ b/components/openstack/nova/files/solariszones/driver.py	Mon Jan 23 12:54:02 2017 -0700
@@ -786,7 +786,7 @@
                 LOG.error(_("Unable to aggregate non-summable kstat %s;%s "
                             " of type %s") % (ks.getParent().uri, statistic,
                                               type(value)))
-                return None
+                return 0
 
         return total
 
@@ -805,7 +805,9 @@
 
     def _get_cpu_time(self, zone):
         """Return the CPU time used in nanoseconds."""
-        if zone.name is None:
+        # If the zone is not in a running state, the information requested
+        # cannot be gathered, so simply return 0.
+        if zone.name is None or zone.state != ZONE_STATE_RUNNING:
             return 0
 
         # Loop over the kstats for each cpu. If the cpu list changes, the
@@ -814,24 +816,41 @@
         # The retry value of 3 was determined by the "we shouldn't hit this
         # often, but if we do it should resolve quickly so try again"+1
         # algorithm.
+        uri = "kstat:/zones/%s/cpu" % zone.name
+        accum_uri = "kstat:/zones/%s/cpu/accum/sys" % zone.name
         for _attempt in range(3):
             total = 0
 
-            uri = "kstat:/zones/%s/cpu" % zone.name
-            accum_uri = "kstat:/zones/%s/cpu/accum/sys" % zone.name
-
             initial = self._kstat_data(accum_uri)
             cpus = self._kstat_data(uri)
+            if cpus is None:
+                # If the zone state is not running then give up but if it is
+                # running try again.
+                if zone.state != ZONE_STATE_RUNNING:
+                    break
+                else:
+                    continue
             cpus.pop('pset_accum')
             cpus.pop('accum')
 
-            for n in cpus.keys():
+            cpu = None
+            for n in cpus:
                 cpu = self._kstat_data(uri + "/%s" % n)
+                if cpu is None:
+                    if zone.state != ZONE_STATE_RUNNING:
+                        return 0
+                    else:
+                        break
 
                 total += self._sum_kstat_statistic(cpu, 'cpu_nsec_kernel_cur')
                 total += self._sum_kstat_statistic(cpu, 'cpu_nsec_user_cur')
 
+            if cpu is None:
+                continue
+
             final = self._kstat_data(accum_uri)
+            if final is None:
+                continue
 
             if initial['gen_num'] == final['gen_num']:
                 total += initial['cpu_nsec_user'] + initial['cpu_nsec_kernel']
@@ -2693,7 +2712,7 @@
 
     def _get_zone_diagnostics(self, zone):
         """Return data about Solaris Zone diagnostics."""
-        if zone.id == -1:
+        if zone.name is None or zone.state != ZONE_STATE_RUNNING:
             return None
 
         diagnostics = defaultdict(lambda: 0)
@@ -2714,14 +2733,34 @@
 
         for _attempt in range(3):
             initial = self._kstat_data(accum_uri)
-            data = self._kstat_data(uri)
-            data.pop('accum')
-            data.pop('pset_accum')
+            cpus = self._kstat_data(uri)
+            # Something went wrong in the data collection, if the zone is no
+            # longer running simply return None otherwise continue
+            if cpus is None:
+                if zone.state != ZONE_STATE_RUNNING:
+                    return None
+                continue
+            cpus.pop('accum')
+            cpus.pop('pset_accum')
 
             # Turn the list of cpu ids in data.keys into a dictionary of the
             # 'sys' kstat for each cpu id.
-            data = {n: self._kstat_data(uri + "/%s" % n)['sys']
-                    for n in data.keys()}
+            data = {}
+            datapoint = None
+            for n in cpus:
+                datapoint = self._kstat_data(uri + "/%s" % n)
+                # We could not get a datapoint for one of the cpus, so try the
+                # whole thing again if zone.state is still running.
+                if datapoint is None:
+                    if zone.state != ZONE_STATE_RUNNING:
+                        return None
+                    else:
+                        break
+
+                data.update({n: datapoint['sys']})
+
+            if datapoint is None:
+                continue
 
             # The list of cpu kstats in data must contain at least one element
             # and all elements have the same map of statistics, since they're