7040301 Uncomplete LC_CTYPE information after migration to CLDR 1.9 & Unicode 6.0
authorjenda
Thu, 19 May 2011 11:33:20 +0200
changeset 134 455f05a692fc
parent 133 65086617f925
child 135 d7f2a801f9fe
7040301 Uncomplete LC_CTYPE information after migration to CLDR 1.9 & Unicode 6.0
Makefile
misc/patches/cldr_tools/21_default_repertire.patch
misc/patches/cldr_tools/23_ctype.patch
--- a/Makefile	Wed May 18 16:39:52 2011 +0200
+++ b/Makefile	Thu May 19 11:33:20 2011 +0200
@@ -90,7 +90,7 @@
 
 # java flags
 # for cldr posix locale generator
-JAVAFLAGS	= -Xmx256m
+JAVAFLAGS	= -Xmx512m
 
 # misc
 DOWNLOADS	?= .
--- a/misc/patches/cldr_tools/21_default_repertire.patch	Wed May 18 16:39:52 2011 +0200
+++ b/misc/patches/cldr_tools/21_default_repertire.patch	Thu May 19 11:33:20 2011 +0200
@@ -1,13 +1,13 @@
+repertoire is all assigned unicode characters
+
 diff -ruN cldr_tools.orig//tools/java/org/unicode/cldr/posix/POSIXLocale.java cldr_tools/tools/java/org/unicode/cldr/posix/POSIXLocale.java
 --- cldr_tools.orig//tools/java/org/unicode/cldr/posix/POSIXLocale.java	2011-04-15 15:33:44.072439117 +0200
 +++ cldr_tools/tools/java/org/unicode/cldr/posix/POSIXLocale.java	2011-04-15 15:37:17.712439014 +0200
-@@ -100,6 +100,9 @@
+@@ -100,6 +100,7 @@
          }
   
          repertoire.add(0x0000,0x007f);        // Always add the ASCII set
-+	repertoire.addAll(new UnicodeSet("[:Whitespace:]"));	// Always add whitespace and control chars
-+	repertoire.addAll(new UnicodeSet("[:Control:]"));
-+	repertoire.addAll(new UnicodeSet("[:Symbol:]"));
++	repertoire.addAll(new UnicodeSet("[^[:Noncharacter_Code_Point:][:Cn:][:Cs:]]"));
          
       }
       else if ( ! codeset.equals("UTF-8") )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/misc/patches/cldr_tools/23_ctype.patch	Thu May 19 11:33:20 2011 +0200
@@ -0,0 +1,37 @@
+The CLDR definition of ctype categories is based on ICU categories.
+This patch defines ctype categories by Unicode character categories
+(http://www.unicode.org/versions/Unicode6.0.0/ch04.pdf)
+instead of ICU categories.
+
+diff -ruN cldr_tools.orig//tools/java/org/unicode/cldr/posix/POSIX_LCCtype.java cldr_tools/tools/java/org/unicode/cldr/posix/POSIX_LCCtype.java
+--- cldr_tools.orig//tools/java/org/unicode/cldr/posix/POSIX_LCCtype.java
++++ cldr_tools/tools/java/org/unicode/cldr/posix/POSIX_LCCtype.java
+@@ -57,17 +57,17 @@
+       out.println();
+ 
+       String[][] types = { 
+-          { "upper", "[:Uppercase:]" },
+-		  { "lower", "[:Lowercase:]" }, 
+-		  { "alpha", "[[:Alphabetic:]-[[:Uppercase:][:Lowercase:]]]" },
+-          { "space", "[:Whitespace:]" },
+-		  { "cntrl", "[:Control:]" }, 
+-          { "graph", "[^[:Whitespace:][:Control:][:Format:][:Surrogate:][:Unassigned:]]" },
+-          { "print", "[^[:Control:][:Format:][:Surrogate:][:Unassigned:]]" },
+-          { "punct", "[:Punctuation:]" },
+-		  { "digit", "[0-9]" }, 
+-          { "xdigit", "[0-9 a-f A-F]" },
+-		  { "blank", "[[:Whitespace:]-[\\u000A-\\u000D \\u0085 [:Line_Separator:][:Paragraph_Separator:]]]" } };
++		  { "upper", "[[:Lu:][:Lt:]]" },
++		  { "lower", "[[:Ll:][\\u01C5\\u01C8\\u01CB\\u01F2]]" }, 
++		  { "alpha", "[[[:L:][:N:]]-[[:Lu:][:Lt:][:Ll:][0-9]]]" },
++		  { "space", "[[:Z:][\\u0009\\u000A\\u000B\\u000C\\u000D\\u0085]]" },
++		  { "cntrl", "[:Cc:]" }, 
++		  { "graph", "[[:L:][:N:][:P:][:S:][:Cf:][:M:][:Co:]]" },
++		  { "print", "[[:L:][:N:][:P:][:S:][:Cf:][:M:][:Co:][:Z:]]" },
++		  { "punct", "[[:P:][:S:][:Cf:]]" },
++		  { "digit", "[0-9]" },
++		  { "xdigit", "[0-9 a-f A-F]" },
++		  { "blank", "[[:Z:][\\u0009]]" } };
+ 
+         // print character types, restricted to the charset
+         int item, last;