20089330 Horizon reports "Error: Unable to contact Neutron" on large data set
authorGirish Moodalbail <Girish.Moodalbail@oracle.COM>
Mon, 08 Dec 2014 15:34:49 -0800
changeset 3524 ad6a9e0880b9
parent 3521 958ebf9deed1
child 3526 5fd7084d7e59
20089330 Horizon reports "Error: Unable to contact Neutron" on large data set 20103259 unable to delete instance, reports "EVS controller: vport in use" 19970607 disabling neutron smf services does not remove services' vports 18091479 unable delete a port that was associated with incomplete zone
components/openstack/neutron/files/agent/solaris/dhcp.py
components/openstack/neutron/files/agent/solaris/interface.py
components/openstack/neutron/files/evs/db/l3nat.py
components/openstack/neutron/files/evs/plugin.py
components/openstack/neutron/files/neutron-dhcp-agent
--- a/components/openstack/neutron/files/agent/solaris/dhcp.py	Mon Dec 08 12:01:59 2014 -0800
+++ b/components/openstack/neutron/files/agent/solaris/dhcp.py	Mon Dec 08 15:34:49 2014 -0800
@@ -33,6 +33,7 @@
 
 from neutron.agent.linux import utils
 from neutron.agent.solaris import net_lib
+from neutron.common import constants
 from neutron.common import exceptions
 from neutron.openstack.common import importutils
 from neutron.openstack.common import jsonutils
@@ -573,7 +574,14 @@
         dhcp_port = None
         for port in network.ports:
             port_device_id = getattr(port, 'device_id', None)
-            if port_device_id == device_id:
+            port_device_owner = getattr(port, 'device_owner', None)
+
+            # if the agent is started on a different node, then the
+            # device_ids will be different since they are based off
+            # hostname.
+            if (port_device_id == device_id or
+                    (port_device_owner == constants.DEVICE_OWNER_DHCP and
+                     port_device_id.startswith('dhcp'))):
                 port_fixed_ips = []
                 for fixed_ip in port.fixed_ips:
                     port_fixed_ips.append({'subnet_id': fixed_ip.subnet_id,
--- a/components/openstack/neutron/files/agent/solaris/interface.py	Mon Dec 08 12:01:59 2014 -0800
+++ b/components/openstack/neutron/files/agent/solaris/interface.py	Mon Dec 08 15:34:49 2014 -0800
@@ -88,6 +88,20 @@
 
         evs_vport = ('%s/%s') % (network_id, port_id)
         dl = net_lib.Datalink(datalink_name)
+
+        # This is to handle HA when the 1st DHCP/L3 agent is down and
+        # the second DHCP/L3 agent tries to connect its VNIC to EVS, we will
+        # end up in "vport in use" error. So, we need to reset the vport
+        # before we connect the VNIC to EVS.
+        cmd = ['/usr/sbin/evsadm', 'show-vport', '-f',
+               'vport=%s' % port_id, '-co', 'evs,vport,status']
+        stdout = utils.execute(cmd)
+        evsname, vportname, status = stdout.strip().split(':')
+        if status == 'used':
+            cmd = ['/usr/sbin/evsadm', 'reset-vport', '-T', tenant_id,
+                   '%s/%s' % (evsname, vportname)]
+            utils.execute(cmd)
+
         dl.connect_vnic(evs_vport, tenant_id)
 
         if not protection:
--- a/components/openstack/neutron/files/evs/db/l3nat.py	Mon Dec 08 12:01:59 2014 -0800
+++ b/components/openstack/neutron/files/evs/db/l3nat.py	Mon Dec 08 15:34:49 2014 -0800
@@ -254,6 +254,8 @@
         proper deletion checks.
         """
         port = self.get_port(context, port_id)
+        if not port:
+            return
         if port['device_owner'] in [DEVICE_OWNER_ROUTER_INTF,
                                     DEVICE_OWNER_ROUTER_GW,
                                     DEVICE_OWNER_FLOATINGIP]:
--- a/components/openstack/neutron/files/evs/plugin.py	Mon Dec 08 12:01:59 2014 -0800
+++ b/components/openstack/neutron/files/evs/plugin.py	Mon Dec 08 15:34:49 2014 -0800
@@ -527,7 +527,7 @@
                     continue
                 key = SUBNET_IPNET_ATTRIBUTE_MAP.get(key, key)
                 if isinstance(value, list):
-                    value = ",".join([str(val) for val in value])
+                    value = ",".join(map(str, set(value)))
                     if not value:
                         continue
                 filterlist.append("%s=%s" % (key, value))
@@ -715,7 +715,7 @@
                     continue
                 key = NETWORK_EVS_ATTRIBUTE_MAP.get(key, key)
                 if isinstance(value, list):
-                    value = ",".join([str(val) for val in value])
+                    value = ",".join(map(str, set(value)))
                     if not value:
                         continue
                 filterlist.append("%s=%s" % (key, value))
@@ -929,7 +929,7 @@
                     continue
                 key = PORT_VPORT_ATTRIBUTE_MAP.get(key, key)
                 if isinstance(value, list):
-                    value = ",".join([str(val) for val in value])
+                    value = ",".join(map(str, set(value)))
                     if not value:
                         continue
                 filterlist.append("%s=%s" % (key, value))
@@ -978,13 +978,29 @@
                                                 topic=topics.L3_AGENT)
 
     @lockutils.synchronized('evs-plugin', 'neutron-')
-    def evs_controller_removeVPort(self, tenantname, evsname, vportuuid):
+    def evs_controller_removeVPort(self, tenantname, evsname, vportuuid,
+                                   vportname):
+        pat = radcli.ADRGlobPattern({'name': evsname,
+                                     'tenant': tenantname})
         try:
-            pat = radcli.ADRGlobPattern({'name': evsname,
-                                         'tenant': tenantname})
             evs = self._rc.get_object(evsbind.EVS(), pat)
             evs.removeVPort(vportuuid)
         except radcli.ObjectError as oe:
+            # '7' corresponds to EVS' EVS_EBUSY_VPORT error code
+            if oe.get_payload().err == 7:
+                # It is possible that the VM is destroyed, but EVS is unaware
+                # of it. So, try to reset the vport. If it succeeds, then call
+                # removeVPort() again.
+                try:
+                    evs.resetVPort(vportname)
+                    evs.removeVPort(vportuuid)
+                except:
+                    # we failed one of the above operations, just return
+                    # the original exception.
+                    pass
+                else:
+                    # the reset and remove succeeded, just return.
+                    return
             raise EVSControllerError(oe.get_payload().errmsg)
 
     def delete_port(self, context, id, l3_port_check=True):
@@ -997,7 +1013,7 @@
         if not l3_port_check:
             self._release_l3agent_internal_port(context, port)
         self.evs_controller_removeVPort(port['tenant_id'], port['network_id'],
-                                        id)
+                                        id, port['name'])
 
         # notify dhcp agent of port deletion
         payload = {
--- a/components/openstack/neutron/files/neutron-dhcp-agent	Mon Dec 08 12:01:59 2014 -0800
+++ b/components/openstack/neutron/files/neutron-dhcp-agent	Mon Dec 08 15:34:49 2014 -0800
@@ -62,9 +62,22 @@
             # first remove the IP
             check_call(["/usr/bin/pfexec", "/usr/sbin/ipadm", "delete-ip",
                         ifname])
+            # get the tenant, evs, and vport name for the VNIC
+            cmd = ["/usr/sbin/dladm", "show-vnic", "-po",
+                   "tenant,evs,vport", ifname]
+            p = Popen(cmd, stdout=PIPE, stderr=PIPE)
+            output, error = p.communicate()
+            if p.returncode != 0:
+                print "failed to retrieve Tenant, EVS," \
+                      " and VPort info for a VNIC"
+                return smf_include.SMF_EXIT_ERR_FATAL
+            tenant, evs, vport = output.strip().split(':')
             # next remove the VNIC
             check_call(["/usr/bin/pfexec", "/usr/sbin/dladm", "delete-vnic",
                         ifname])
+            # remove the EVS VPort
+            check_call(["/usr/sbin/evsadm", "remove-vport", "-T", tenant,
+                        "%s/%s" % (evs, vport)])
         except CalledProcessError as err:
             print "failed to remove datalinks used by DHCP agent: %s" % err
             return smf_include.SMF_EXIT_ERR_FATAL