18026094 hwdata files should not include control characters or invalid UTF-8 sequences
authorAlan Coopersmith <Alan.Coopersmith@Oracle.COM>
Mon, 10 Feb 2014 15:17:52 -0800
changeset 1704 00a5911e2a6d
parent 1703 17defa2e55e3
child 1705 f76a8a4838a4
18026094 hwdata files should not include control characters or invalid UTF-8 sequences
components/hwdata/Makefile
components/hwdata/usb.ids
--- a/components/hwdata/Makefile	Mon Feb 10 13:39:58 2014 -0800
+++ b/components/hwdata/Makefile	Mon Feb 10 15:17:52 2014 -0800
@@ -18,7 +18,7 @@
 #
 # CDDL HEADER END
 #
-# Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
 #
 include ../../make-rules/shared-macros.mk
 
@@ -35,7 +35,9 @@
 # When it's time to update, you can use these rules to download with
 # 'gmake -B pci.ids usb.ids'   After updating the data files, ensure
 # that the header with version/date and license info is still being 
-# correctly extracted and copied into the generated license files.
+# correctly extracted and copied into the generated license files,
+# and run 'gmake test' to verify there are no invalid or control chars
+# in the files.
 
 download::
 	@echo 'No downloads for this component'
@@ -84,7 +86,19 @@
 
 install build: $(BUILT_LICENSES)
 
-test:	$(NO_TESTS)
+# Ensure there are no control characters/escape sequences or invalid UTF-8
+# characters in the *.ids text files.
+test: $(BUILD_DIR)
+	@ export LC_ALL='en_US.UTF-8' ; \
+	for f in $(ID_FILES) ; do \
+		tr -cd '[:print:][:space:]' < $$f > $(BUILD_DIR)/$$f.clean \
+		&& diff -u $$f $(BUILD_DIR)/$$f.clean ; \
+		if (( $$? == 0 )) ; then \
+			print $$f PASSED ; \
+		else \
+			print $$f FAILED ; \
+		fi \
+	done
 
 clobber clean::
 	$(RM) -r $(BUILD_DIR)
--- a/components/hwdata/usb.ids	Mon Feb 10 13:39:58 2014 -0800
+++ b/components/hwdata/usb.ids	Mon Feb 10 15:17:52 2014 -0800
@@ -16898,7 +16898,7 @@
 	031  \ and | (Backslash and Bar)
 	032  # and ~ (Hash and Tilde, Non-US Keyboard near right shift)
 	033  ; and : (Semicolon and Colon)
-	034   and " (Accent Acute and Double Quotes)
+	034  ´ and " (Accent Acute and Double Quotes)
 	035  ` and ~ (Accent Grace and Tilde)
 	036  , and < (Comma and Less)
 	037  . and > (Period and Greater)