PSARC 2015/357 OpenStack Nova support for kernel zone suspend/resume
authorElena Ouyang <elena.ouyang@oracle.com>
Fri, 14 Aug 2015 14:44:36 -0700
changeset 4781 93d68a5ece25
parent 4780 646672ba9940
child 4782 256e6426f580
PSARC 2015/357 OpenStack Nova support for kernel zone suspend/resume 21630538 Nova driver should support suspend/resume
components/openstack/horizon/files/overrides.py
components/openstack/nova/files/nova-compute
components/openstack/nova/files/nova.conf
components/openstack/nova/files/nova.exec_attr
components/openstack/nova/files/solariszones/driver.py
--- a/components/openstack/horizon/files/overrides.py	Fri Aug 14 10:54:44 2015 -0700
+++ b/components/openstack/horizon/files/overrides.py	Fri Aug 14 14:44:36 2015 -0700
@@ -75,19 +75,20 @@
 AccessAndSecurityTabs.tabs = (KeypairsTab, FloatingIPsTab, APIAccessTab)
 
 # Remove 'ConfirmResize', 'RevertResize', 'TogglePause',
-# 'ToggleSuspend', 'MigrateInstance' actions from Admin/Instances/Actions
+# 'MigrateInstance' actions from Admin/Instances/Actions
 admin_tables.AdminInstancesTable._meta.row_actions = (
     admin_tables.AdminEditInstance,
     project_tables.ConsoleLink,
     project_tables.LogLink,
     project_tables.CreateSnapshot,
+    project_tables.ToggleSuspend,
     project_tables.SoftRebootInstance,
     project_tables.RebootInstance,
     project_tables.TerminateInstance
 )
 
 # Remove 'ConfirmResize', 'RevertResize', 'EditInstanceSecurityGroups',
-# 'TogglePause', 'ToggleSuspend', 'ResizeLink', 'RebuildInstance' actions
+# 'TogglePause', 'ResizeLink', 'RebuildInstance' actions
 # from Project/Instances/Actions
 project_tables.InstancesTable._meta.row_actions = (
     project_tables.StartInstance,
@@ -99,6 +100,7 @@
     project_tables.DecryptInstancePassword,
     project_tables.ConsoleLink,
     project_tables.LogLink,
+    project_tables.ToggleSuspend,
     project_tables.SoftRebootInstance,
     project_tables.RebootInstance,
     project_tables.StopInstance,
--- a/components/openstack/nova/files/nova-compute	Fri Aug 14 10:54:44 2015 -0700
+++ b/components/openstack/nova/files/nova-compute	Fri Aug 14 14:44:36 2015 -0700
@@ -14,12 +14,54 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 
+import ConfigParser
 import os
+from subprocess import CalledProcessError, Popen, PIPE, check_call
 
 import smf_include
 
 
 def start():
+    # retrieve dataset path for suspend images
+    nova_conf = "/etc/nova/nova.conf"
+    if not os.path.exists(nova_conf):
+        print "%s doesn't exist" % nova_conf
+        return smf_include.SMF_EXIT_ERR_CONFIG
+
+    parser = ConfigParser.ConfigParser()
+    parser.read(nova_conf)
+
+    # retrieve the suspend path or just get the default
+    default_path = '/var/share/suspend'
+    try:
+        suspend_path = parser.get('DEFAULT', 'zones_suspend_path')
+    except ConfigParser.NoOptionError:
+        suspend_path = default_path
+
+    if not os.path.exists(suspend_path):
+        if suspend_path == default_path:
+            # get the root pool name
+            cmd = ['/usr/sbin/zfs', 'list', '-Ho', 'name', '/']
+            p = Popen(cmd, stdout=PIPE, stderr=PIPE)
+            output, error = p.communicate()
+            if p.returncode != 0:
+                print "unable to determine root pool name: %s" % (error)
+                return smf_include.SMF_EXIT_ERR_CONFIG
+            rpool = output.split('/')[0]
+
+            # the default directory doesn't exist, create a new dataset for it
+            suspend_ds = os.path.join(rpool, 'VARSHARE/suspend')
+            try:
+                check_call(['/usr/bin/pfexec', '/usr/sbin/zfs', 'create', '-p',
+                            '-o', 'mountpoint=' + suspend_path, suspend_ds])
+            except CalledProcessError as err:
+                print "unable to create %s: %s" % (suspend_ds, err)
+                return smf_include.SMF_EXIT_ERR_CONFIG
+        else:
+            # the user specified a path, but it doesn't exist
+            print "Zones suspend path %s does not exist" % (suspend_path)
+            return smf_include.SMF_EXIT_ERR_CONFIG
+
     smf_include.smf_subprocess("/usr/bin/pfexec /usr/lib/nova/nova-compute")
 
 if __name__ == "__main__":
--- a/components/openstack/nova/files/nova.conf	Fri Aug 14 10:54:44 2015 -0700
+++ b/components/openstack/nova/files/nova.conf	Fri Aug 14 14:44:36 2015 -0700
@@ -1977,6 +1977,9 @@
 # value)
 #solariszones_snapshots_directory=$instances_path/snapshots
 
+# Default path for suspend images for Solaris Zones. (string value)
+#zones_suspend_path=/var/share/suspend
+
 
 #
 # Options defined in nova.vnc
--- a/components/openstack/nova/files/nova.exec_attr	Fri Aug 14 10:54:44 2015 -0700
+++ b/components/openstack/nova/files/nova.exec_attr	Fri Aug 14 14:44:36 2015 -0700
@@ -11,4 +11,6 @@
 
 nova-compute:solaris:cmd:RO::/usr/sbin/iscsiadm:euid=0
 
+nova-compute:solaris:cmd:RO::/usr/sbin/zfs:euid=0
+
 nova-compute:solaris:cmd:RO::/usr/sbin/zlogin:uid=0
--- a/components/openstack/nova/files/solariszones/driver.py	Fri Aug 14 10:54:44 2015 -0700
+++ b/components/openstack/nova/files/solariszones/driver.py	Fri Aug 14 14:44:36 2015 -0700
@@ -20,6 +20,7 @@
 """
 
 import base64
+import errno
 import glob
 import os
 import platform
@@ -77,6 +78,9 @@
                default='$instances_path/snapshots',
                help='Location where solariszones driver will store snapshots '
                     'before uploading them to the Glance image service'),
+    cfg.StrOpt('zones_suspend_path',
+               default='/var/share/suspend',
+               help='Default path for suspend images for Solaris Zones.'),
 ]
 
 CONF = cfg.CONF
@@ -131,6 +135,17 @@
 XTERM_PATH = '/usr/bin/xterm'
 
 
+def lookup_resource(zone, resource):
+    """Lookup specified resource from specified Solaris Zone."""
+    try:
+        val = zone.getResources(zonemgr.Resource(resource))
+    except rad.client.ObjectError:
+        return None
+    except Exception:
+        raise
+    return val[0] if val else None
+
+
 def lookup_resource_property(zone, resource, prop, filter=None):
     """Lookup specified property from specified Solaris Zone resource."""
     try:
@@ -1050,6 +1065,18 @@
             with ZoneConfig(zone) as zc:
                 zc.setprop('global', 'tenant', tenant_id)
 
+    def _set_suspend(self, instance):
+        """Use the instance name to specify the pathname for the suspend image.
+        """
+        name = instance['name']
+        zone = self._get_zone_by_name(name)
+        if zone is None:
+            raise exception.InstanceNotFound(instance_id=name)
+
+        path = os.path.join(CONF.zones_suspend_path, '%{zonename}')
+        with ZoneConfig(zone) as zc:
+            zc.addresource('suspend', [zonemgr.Property('path', path)])
+
     def _verify_sysconfig(self, sc_dir, instance, admin_password=None):
         """verify the SC profile(s) passed in contain an entry for
         system/config-user to configure the root account.  If an SSH key is
@@ -2089,7 +2116,39 @@
         :param instance: nova.objects.instance.Instance
         """
         # TODO(Vek): Need to pass context in for access to auth_token
-        raise NotImplementedError()
+        name = instance['name']
+        zone = self._get_zone_by_name(name)
+        if zone is None:
+            raise exception.InstanceNotFound(instance_id=name)
+
+        if zone.brand != ZONE_BRAND_SOLARIS_KZ:
+            # Only Solaris kernel zones are currently supported.
+            reason = (_("'%s' branded zones do not currently support "
+                        "suspend. Use 'nova reset-state --active %s' "
+                        "to reset instance state back to 'active'.")
+                      % (zone.brand, instance['display_name']))
+            raise exception.InstanceSuspendFailure(reason=reason)
+
+        if self._get_state(zone) != power_state.RUNNING:
+            reason = (_("Instance '%s' is not running.") % name)
+            raise exception.InstanceSuspendFailure(reason=reason)
+
+        try:
+            new_path = os.path.join(CONF.zones_suspend_path, '%{zonename}')
+            if not lookup_resource(zone, 'suspend'):
+                # add suspend if not configured
+                self._set_suspend(instance)
+            elif lookup_resource_property(zone, 'suspend', 'path') != new_path:
+                # replace the old suspend resource with the new one
+                with ZoneConfig(zone) as zc:
+                    zc.removeresources('suspend')
+                self._set_suspend(instance)
+
+            zone.suspend()
+        except Exception as reason:
+            LOG.error(_("Unable to suspend instance '%s' via "
+                        "zonemgr(3RAD): %s") % (name, reason))
+            raise exception.InstanceSuspendFailure(reason=reason)
 
     def resume(self, context, instance, network_info, block_device_info=None):
         """resume the specified instance.
@@ -2100,7 +2159,28 @@
            :py:meth:`~nova.network.manager.NetworkManager.get_instance_nw_info`
         :param block_device_info: instance volume block device info
         """
-        raise NotImplementedError()
+        name = instance['name']
+        zone = self._get_zone_by_name(name)
+        if zone is None:
+            raise exception.InstanceNotFound(instance_id=name)
+
+        if zone.brand != ZONE_BRAND_SOLARIS_KZ:
+            # Only Solaris kernel zones are currently supported.
+            reason = (_("'%s' branded zones do not currently support "
+                      "resume.") % zone.brand)
+            raise exception.InstanceResumeFailure(reason=reason)
+
+        # check that the instance is suspended
+        if self._get_state(zone) != power_state.SHUTDOWN:
+            reason = (_("Instance '%s' is not suspended.") % name)
+            raise exception.InstanceResumeFailure(reason=reason)
+
+        try:
+            zone.boot()
+        except Exception as reason:
+            LOG.error(_("Unable to resume instance '%s' via zonemgr(3RAD): %s")
+                      % (name, reason))
+            raise exception.InstanceResumeFailure(reason=reason)
 
     def resume_state_on_host_boot(self, context, instance, network_info,
                                   block_device_info=None):