22713569 nova-conductor doesn't handle RPC timeout during live-migration well
authordavid.comay@oracle.com
Thu, 11 Feb 2016 17:35:03 -0800
changeset 5450 699a9e31ddae
parent 5449 ba6fc2429fb0
child 5451 ff0c0a96e393
22713569 nova-conductor doesn't handle RPC timeout during live-migration well
components/openstack/nova/patches/11-launchpad-1435633.patch
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openstack/nova/patches/11-launchpad-1435633.patch	Thu Feb 11 17:35:03 2016 -0800
@@ -0,0 +1,75 @@
+This upstream patch has been addressed in Liberty but has not yet been
+addressed in Kilo.
+
+commit c0d0e5ccf62f45498f084ed59e776c3b370e9137
+Author: jichenjc <[email protected]>
+Date:   Mon Mar 23 00:36:43 2015 +0800
+
+    Handle MessageTimeout to MigrationPreCheckError
+    
+    There are a few checks before live-migration, if any of the
+    check failed, the live-migration can't be done. However
+    during _call_livem_checks_on_host check, because it's a
+    RPC call ,so it might result in a MessageTimeout exception.
+    If this occurs, we should safely revert its state,
+    since no real opperation occured.
+    
+    This patch translates the MessageTimeout to
+    MigrationPreCheckError and leverage existing MigrationPreCheckError
+    processing to revert instance to normal state instead of error
+    
+    Closes-Bug: 1435633
+    
+    Change-Id: I8b484abb6650e14e2d225ca5e476d1fa7a6ee990
+
+--- nova-2015.1.2/nova/conductor/tasks/live_migrate.py.~1~	2015-10-13 07:52:44.000000000 -0700
++++ nova-2015.1.2/nova/conductor/tasks/live_migrate.py	2016-02-10 23:52:06.321170852 -0800
[email protected]@ -12,6 +12,7 @@
+ 
+ from oslo_config import cfg
+ from oslo_log import log as logging
++import oslo_messaging as messaging
+ 
+ from nova.compute import power_state
+ from nova.compute import rpcapi as compute_rpcapi
[email protected]@ -140,9 +141,14 @@ class LiveMigrationTask(object):
+             raise exception.DestinationHypervisorTooOld()
+ 
+     def _call_livem_checks_on_host(self, destination):
+-        self.migrate_data = self.compute_rpcapi.\
+-            check_can_live_migrate_destination(self.context, self.instance,
+-                destination, self.block_migration, self.disk_over_commit)
++        try:
++            self.migrate_data = self.compute_rpcapi.\
++                check_can_live_migrate_destination(self.context, self.instance,
++                    destination, self.block_migration, self.disk_over_commit)
++        except messaging.MessagingTimeout:
++            msg = _("Timeout while checking if we can live migrate to host: "
++                    "%s") % destination
++            raise exception.MigrationPreCheckError(msg)
+ 
+     def _find_destination(self):
+         # TODO(johngarbutt) this retry loop should be shared
+--- nova-2015.1.2/nova/tests/unit/conductor/tasks/test_live_migrate.py.~1~	2015-10-13 07:52:44.000000000 -0700
++++ nova-2015.1.2/nova/tests/unit/conductor/tasks/test_live_migrate.py	2016-02-10 23:52:46.817923384 -0800
[email protected]@ -11,6 +11,7 @@
+ #    under the License.
+ 
+ from mox3 import mox
++import oslo_messaging as messaging
+ 
+ from nova.compute import power_state
+ from nova.compute import utils as compute_utils
[email protected]@ -401,5 +402,12 @@ class LiveMigrationTaskTestCase(test.NoD
+         self.mox.ReplayAll()
+         self.assertRaises(exception.NoValidHost, self.task._find_destination)
+ 
++    def test_call_livem_checks_on_host(self):
++        with mock.patch.object(self.task.compute_rpcapi,
++            'check_can_live_migrate_destination',
++            side_effect=messaging.MessagingTimeout):
++            self.assertRaises(exception.MigrationPreCheckError,
++                self.task._call_livem_checks_on_host, {})
++
+     def test_not_implemented_rollback(self):
+         self.assertRaises(NotImplementedError, self.task.rollback)