6750617 un-MacGyver BugDB lookups
authorJohn Sonnenschein <John.Sonnenschein@Sun.COM>
Fri, 07 Nov 2008 15:51:59 -0800
changeset 8042 54a2500a85d8
parent 8041 313cd9ed7f9c
child 8043 616fecb342e7
6750617 un-MacGyver BugDB lookups 6750611 DbLookups.ARC & Comments should coalesce ARC queries 6759911 hg pbchk could report more useful URL errors
usr/src/tools/onbld/Checks/Comments.py
usr/src/tools/onbld/Checks/DbLookups.py
--- a/usr/src/tools/onbld/Checks/Comments.py	Fri Nov 07 17:29:17 2008 -0500
+++ b/usr/src/tools/onbld/Checks/Comments.py	Fri Nov 07 15:51:59 2008 -0800
@@ -52,17 +52,17 @@
 	return re.sub(r'^([A-Z][A-Z]*ARC)[/ \t]', '\\1 ', caseid)
 
 def comchk(comments, check_db=True, output=sys.stderr):
-        '''Validate checkin comments against ON standards.
+	'''Validate checkin comments against ON standards.
 
-        Comments must be a list of one-line comments, with no trailing
-        newline.
-        
-        If check_db is True (the default), validate CR and ARC
-        synopses against the databases.
+	Comments must be a list of one-line comments, with no trailing
+	newline.
+	
+	If check_db is True (the default), validate CR and ARC
+	synopses against the databases.
 
-        Error messages intended for the user are written to output,
-        which defaults to stderr
-        '''
+	Error messages intended for the user are written to output,
+	which defaults to stderr
+	'''
 	bugnospcre = re.compile(r'^(\d{7})([^ ].*)')
 	ignorere = re.compile(r'^(Portions contributed by |Contributed by |back[ -]?out )')
 
@@ -77,10 +77,10 @@
 	blanks = False
 
 	for com in comments:
-                # Our input must be newline-free, comments are line-wise.
-                if com.find('\n') != -1:
-                        raise ValueError("newline in comment '%s'" % com)
-                
+		# Our input must be newline-free, comments are line-wise.
+		if com.find('\n') != -1:
+			raise ValueError("newline in comment '%s'" % com)
+
 		# Ignore valid comments we can't check
 		if ignorere.search(com):
 			continue
@@ -112,9 +112,8 @@
 		# ARC case
 		match = arcre.search(com)
 		if match:
-			case = normalize_arc(match.group(1))
-			if case not in arcs: arcs[case] = []
-			arcs[case].append(match.group(2))
+			arc, case = re.split('[/ \t]', match.group(1), 1)
+			arcs.setdefault((arc, case), []).append(match.group(2))
 			continue
 
 		# Anything else is bogus
@@ -143,24 +142,23 @@
 		for entered in insts:
 			synopsis = results[crid]["synopsis"]
 			if not re.search(r'^' + re.escape(synopsis) +
-					 r'( \([^)]+\))?$', entered):
+					r'( \([^)]+\))?$', entered):
 				errors['nomatch'].append([crid, synopsis,
-							  entered])
+							entered])
+
+	if check_db:
+		valid = ARC(arcs.keys())
 
 	for case, insts in arcs.iteritems():
 		if len(insts) > 1:
-			errors['dup'].append(case)
-
-		if not check_db:
-			continue
+			errors['dup'].append(' '.join(case))
 
-		com, id = case.split(' ')
-		arc = ARC(com, id)
-
-		if not arc.valid():
-			errors['nonexistent'].append(case)
+ 		if not check_db:
 			continue
-
+                
+		if not case in valid:
+			errors['nonexistent'].append(' '.join(case))
+			continue
 		#
 		# The opensolaris.org ARC interfaces only give us the
 		# first 40 characters of the case name, so we must limit
@@ -171,15 +169,15 @@
 		# trailing (fix nit)-type comment, and re-try.
 		#
 		for entered in insts:
-			if entered[0:40] == arc.name():
-				continue
+			if entered[0:40] == valid[case]:
+				break
 			else:
 				# Try again with trailing (fix ...) removed.
 				dbcom = re.sub(r' \([^)]+\)$', '', entered)
-				if dbcom[0:40] != arc.name():
-					errors['nomatch'].append([case,
-								  arc.name(),
-								  entered])
+				if dbcom[0:40] != valid[case]:
+					errors['nomatch'].append(
+						[' '.join(case), valid[case],
+						 entered])
 
 	if blanks:
 		output.write("WARNING: Blank line(s) in comments\n")
--- a/usr/src/tools/onbld/Checks/DbLookups.py	Fri Nov 07 17:29:17 2008 -0500
+++ b/usr/src/tools/onbld/Checks/DbLookups.py	Fri Nov 07 15:51:59 2008 -0800
@@ -34,6 +34,7 @@
 
 import re
 import urllib
+import urllib2
 import htmllib
 import os
 from socket import socket, AF_INET, SOCK_STREAM
@@ -52,122 +53,13 @@
 	def __str__(self):
 		return "Bug %s does not exist" % self.data
 
-class Monaco(object):
-	"""
-	Query bug database.
-
-	Methods:
-	queryBugs()
-	expertQuery()
-	"""
-	
-	def __init__(self):
-		self.__baseURL = "http://hestia.sfbay.sun.com/cgi-bin/expert?"
-
-	def expertQuery(self, cmd, format="Normal+text", header=False):
-		"""Return results of user-supplied bug query.
-
-		Argument:
-		cmd: query to run
-
-		Keyword arguments:
-		format: desired output format (default="Normal+text")
-		header: include headers in output? (default=False)
-
-		Returns:
-		List of lines representing the output from Monaco
-		"""
-
-		url = self.__baseURL + "format=" + format + ";Go=2;"
-		if not header: url += "no_header=on;"
-		url += "cmds=" + urllib.quote_plus("\n".join(cmd))
-		myMonaco = urllib.urlopen(url)
-		return myMonaco.readlines()
-
-	def queryBugs(self, crs):
-		"""Return all info for requested change reports.
-
-		Argument:
-		crs: list of change request ids
+class BugDBException(Exception):
+	def __init__(self, data=''):
+		self.data = data
+		Exception.__init__(self, data)
 
-		Returns:
-		Dictionary, mapping CR=>dictionary, where the nested dictionary
-		is a mapping of field=>value
-		"""
-		monacoFields = [ "cr_number", "category", "sub_category",
-			"area", "release", "build", "responsible_manager",
-			"responsible_engineer", "priority", "status", "sub_status",
-			"submitted_by", "date_submitted", "synopsis" ]
-		cmd = []
-		cmd.append("set What = cr." + ', cr.'.join(monacoFields))
-		cmd.append("")
-		cmd.append("set Which = cr.cr_number in (" + ','.join(crs) +")")
-		cmd.append("")
-		cmd.append("set FinalClauses = order by cr.cr_number")
-		cmd.append("")
-		cmd.append("doMeta genQuery cr")
-		output = self.expertQuery(cmd, "Pipe-delimited+text")
-		results = {}
-		for line in output:
-			line = line.rstrip('\n')
-                        
-			#
-			# We request synopsis last, and split on only
-			# the number of separators that we expect to
-			# see such that a | in the synopsis doesn't
-			# throw us out of whack.
-			#
-			values = line.split('|', len(monacoFields) - 1)
-			v = 0
-			cr = values[0]
-			results[cr] = {}
-			for field in monacoFields:
-				results[cr][field] = values[v]
-				v += 1
-		return results
-
-class BooBug(object):
-	"""Look up a single bug on bugs.opensolaris.org."""
-	def __init__(self, cr):
-		cr = str(cr)
-		url = "http://bugs.opensolaris.org/view_bug.do?bug_id="+cr
-		data = urllib.urlopen(url).readlines()
-		self.__fields = {}
-		self.__fields["cr_number"] = cr
-		htmlParser = htmllib.HTMLParser(None)
-		metaHtmlRe = re.compile(r'^<meta name="([^"]+)" content="([^"]*)">$')
-		for line in data:
-			m = metaHtmlRe.search(line)
-			if not m:
-				continue
-			val = urllib.unquote(m.group(2))
-			htmlParser.save_bgn()
-			htmlParser.feed(val)
-			self.__fields[m.group(1)] = htmlParser.save_end()
-		htmlParser.close()
-		if "synopsis" not in self.__fields:
-			raise NonExistentBug(cr)
-	
-	def synopsis(self):
-		return self.__fields["synopsis"]
-	def product(self):
-		return self.__fields["product"]
-	def cat(self):
-		return self.__fields["category"]
-	def subcat(self):
-		return self.__fields["subcategory"]
-	def keywords(self):
-		return self.__fields["keywords"]
-	def state(self):
-		return self.__fields["state"]
-	def submit_date(self):
-		return self.__fields["submit_date"]
-	def type(self):
-		return self.__fields["type"]
-	def date(self):
-		return self.__fields["date"]
-	def number(self):
-		return self.__fields["cr_number"]
+	def __str__(self):
+		return "Unknown bug database: %s" % self.data
 
 class BugDB(object):
 	"""Lookup change requests.
@@ -183,18 +75,118 @@
 	print r["6505625"]["synopsis"]
 	"""
 
-	def __init__(self, forceBoo = False):
+	def __init__(self, priority = ("bugster",), forceBoo = False):
 		"""Create a BugDB object.
 
 		Keyword argument:
 		forceBoo: use b.o.o even from SWAN (default=False)
+		priority: use bug databases in this order
 		"""
-		if forceBoo:
-			self.__onSWAN = False
-		else:
-			self.__onSWAN = onSWAN()
-			if self.__onSWAN:
-				self.__m = Monaco()
+		self.__validBugDB = ["bugster"]
+		self.__onSWAN = not forceBoo and onSWAN()
+		for database in priority:
+			if database not in self.__validBugDB:
+				raise BugDBException, database
+		self.__priority = priority
+
+
+	def __boobug(self, cr):
+		cr = str(cr)
+		url = "http://bugs.opensolaris.org/view_bug.do"
+   		req = urllib2.Request(url, urllib.urlencode({"bug_id": cr}))
+		results = {}
+		try:
+			data = urllib2.urlopen(req).readlines()
+		except urllib2.HTTPError, e:
+			if e.code != 404:
+				print "ERROR: HTTP error at " + \
+					req.get_full_url() + \
+					" got error: " + str(e.code)
+				raise e
+			else:
+				raise NonExistentBug
+		except urllib2.URLError, e:
+			print "ERROR: could not connect to " + \
+				req.get_full_url() + \
+				' got error: "' + e.reason[1] + '"'
+			raise e
+		htmlParser = htmllib.HTMLParser(None)
+		metaHtmlRe = re.compile(r'^<meta name="([^"]+)" content="([^"]*)">$')
+		for line in data:
+			m = metaHtmlRe.search(line)
+			if not m:
+				continue
+			val = urllib.unquote(m.group(2))
+			htmlParser.save_bgn()
+			htmlParser.feed(val)
+			results[m.group(1)] = htmlParser.save_end()
+		htmlParser.close()
+
+		if "synopsis" not in results:
+			raise NonExistentBug(cr)
+					
+		results["cr_number"] = cr
+		results["sub_category"] = results.pop("subcategory")
+		results["status"] = results.pop("state")
+		results["date_submitted"] = results.pop("submit_date")
+		
+		return results
+
+
+	def __monaco(self, crs):
+		"""Return all info for requested change reports.
+
+		Argument:
+		crs: list of change request ids
+
+		Returns:
+		Dictionary, mapping CR=>dictionary, where the nested dictionary
+		is a mapping of field=>value
+		"""
+		
+		#
+		# We request synopsis last, and split on only
+		# the number of separators that we expect to
+		# see such that a | in the synopsis doesn't
+		# throw us out of whack.
+		#
+		monacoFields = [ "cr_number", "category", "sub_category",
+			"area", "release", "build", "responsible_manager",
+			"responsible_engineer", "priority", "status", "sub_status",
+			"submitted_by", "date_submitted", "synopsis" ]
+		cmd = []
+		cmd.append("set What = cr." + ', cr.'.join(monacoFields))
+		cmd.append("")
+		cmd.append("set Which = cr.cr_number in (" + ','.join(crs) +")")
+		cmd.append("")
+		cmd.append("set FinalClauses = order by cr.cr_number")
+		cmd.append("")
+		cmd.append("doMeta genQuery cr")
+		url = "http://hestia.sfbay.sun.com/cgi-bin/expert?format="
+		url += "Pipe-delimited+text;Go=2;no_header=on;cmds="
+		url += urllib.quote_plus("\n".join(cmd))
+		results = {}
+		try:
+			data = urllib2.urlopen(url).readlines()
+		except urllib2.HTTPError, e:
+			print "ERROR: HTTP error at " + url + \
+				" got error: " + str(e.code)
+			raise e
+
+		except urllib2.URLError, e:
+			print "ERROR: could not connect to " + url + \
+				' got error: "' + e.reason[1] + '"'
+			raise e
+		for line in data:
+			line = line.rstrip('\n')
+			values = line.split('|', len(monacoFields) - 1)
+			v = 0
+			cr = values[0]
+			results[cr] = {}
+			for field in monacoFields:
+				results[cr][field] = values[v]
+				v += 1
+		return results
 
 	def lookup(self, crs):
 		"""Return all info for requested change reports.
@@ -207,72 +199,61 @@
 		Dictionary, mapping CR=>dictionary, where the nested dictionary
 		is a mapping of field=>value
 		"""
+		results = {}
 		if not isinstance(crs, list):
 			crs = [str(crs)]
-		if self.__onSWAN:
-			results = self.__m.queryBugs(crs)
-			return self.__m.queryBugs(crs)
-		# else we're off-swan and querying via boo, which we can
-		# only do one bug at a time
-		results = {}
-		for cr in crs:
-			cr = str(cr)
-			try:
-				b = BooBug(cr)
-			except NonExistentBug:
-				continue
-			
-			results[cr] = {}
-			results[cr]["cr_number"] = cr
-			results[cr]["product"] = b.product()
-			results[cr]["synopsis"] = b.synopsis()
-			results[cr]["category"] = b.cat()
-			results[cr]["sub_category"] = b.subcat()
-			results[cr]["keywords"] = b.keywords()
-			results[cr]["status"] = b.state()
-			results[cr]["date_submitted"] = b.submit_date()
-			results[cr]["type"] = b.type()
-			results[cr]["date"] = b.date()
-
-		return results
-
-####################################################################
-
-class ARC(object):
-	"""Lookup an ARC case on opensolaris.org.
+		for database in self.__priority:
+			if database == "bugster":				
+				if self.__onSWAN:
+					results.update(self.__monaco(crs))
+				# else we're off-swan and querying via boo, which we can
+				# only do one bug at a time
+				else:
+					for cr in crs:
+						cr = str(cr)
+						try:
+							results[cr] = self.__boobug(cr)
+						except NonExistentBug:
+							continue
 
-	Usage:
-	a = ARC("PSARC", "2008/002")
-	if a.valid():
-		print a.name()
-	"""
-	def __init__(self, arc, case):
-		self.__valid = False
-		q = "http://opensolaris.org/cgi/arc.py?n=1"
-		q += "&arc0=" + arc
-		q += "&case0=" + case
-		data = urllib.urlopen(q).readlines()
-		self.__fields = {}
-		for line in data:
-			line = line.rstrip('\n')
-			fields = line.split('|')
-			validity = fields[0]
+			# the CR has already been found by one bug database
+			# so don't bother looking it up in the others
+			for cr in crs:
+				if cr in results:
+					crs.remove(cr)
+		
+		return results
+####################################################################
+def ARC(arclist):
+	opts = {}
+	url = "http://opensolaris.org/cgi/arc.py"
+	opts["n"] = str(len(arclist))
+	for i, arc in enumerate(arclist):
+		arc, case = arc
+		opts["arc" + str(i)] = arc
+		opts["case" + str(i)] = case
+	req = urllib2.Request(url, urllib.urlencode(opts))
+	try:
+		data = urllib2.urlopen(req).readlines()
+	except urllib2.HTTPError, e:
+		print "ERROR: HTTP error at " + req.get_ful_url() + \
+			" got error: " + str(e.code)
+		raise e
 
-			if validity != "0":
-				return
-			else:
-				self.__fields["Name"] = fields[2]
-
-		self.__valid = True
-
-	def valid(self):
-		return self.__valid
-	def name(self):
-		return self.__fields["Name"]
-	def status(self):
-		return self.__fields["Status"]
-	def type(self):
-		return self.__fields["Type"]
+	except urllib2.URLError, e:
+		print "ERROR: could not connect to " + req.get_ful_url() + \
+			' got error: "' + e.reason[1] + '"'
+		raise e
+	ret = {}
+	for line in data:
+		oneline = line.rstrip('\n')
+		fields = oneline.split('|')
+		# check if each is valid ( fields[0]::validity )
+		if fields[0] != "0":
+			continue
+		arc, case = fields[1].split(" ")
+		ret[(arc, case)] = fields[2]
+	return ret
 
 ####################################################################