7040301 Uncomplete LC_CTYPE information after migration to CLDR 1.9 & Unicode 6.0
--- a/Makefile Wed May 18 16:39:52 2011 +0200
+++ b/Makefile Thu May 19 11:33:20 2011 +0200
@@ -90,7 +90,7 @@
# java flags
# for cldr posix locale generator
-JAVAFLAGS = -Xmx256m
+JAVAFLAGS = -Xmx512m
# misc
DOWNLOADS ?= .
--- a/misc/patches/cldr_tools/21_default_repertire.patch Wed May 18 16:39:52 2011 +0200
+++ b/misc/patches/cldr_tools/21_default_repertire.patch Thu May 19 11:33:20 2011 +0200
@@ -1,13 +1,13 @@
+repertoire is all assigned unicode characters
+
diff -ruN cldr_tools.orig//tools/java/org/unicode/cldr/posix/POSIXLocale.java cldr_tools/tools/java/org/unicode/cldr/posix/POSIXLocale.java
--- cldr_tools.orig//tools/java/org/unicode/cldr/posix/POSIXLocale.java 2011-04-15 15:33:44.072439117 +0200
+++ cldr_tools/tools/java/org/unicode/cldr/posix/POSIXLocale.java 2011-04-15 15:37:17.712439014 +0200
-@@ -100,6 +100,9 @@
+@@ -100,6 +100,7 @@
}
repertoire.add(0x0000,0x007f); // Always add the ASCII set
-+ repertoire.addAll(new UnicodeSet("[:Whitespace:]")); // Always add whitespace and control chars
-+ repertoire.addAll(new UnicodeSet("[:Control:]"));
-+ repertoire.addAll(new UnicodeSet("[:Symbol:]"));
++ repertoire.addAll(new UnicodeSet("[^[:Noncharacter_Code_Point:][:Cn:][:Cs:]]"));
}
else if ( ! codeset.equals("UTF-8") )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/misc/patches/cldr_tools/23_ctype.patch Thu May 19 11:33:20 2011 +0200
@@ -0,0 +1,37 @@
+The CLDR definition of ctype categories is based on ICU categories.
+This patch defines ctype categories by Unicode character categories
+(http://www.unicode.org/versions/Unicode6.0.0/ch04.pdf)
+instead of ICU categories.
+
+diff -ruN cldr_tools.orig//tools/java/org/unicode/cldr/posix/POSIX_LCCtype.java cldr_tools/tools/java/org/unicode/cldr/posix/POSIX_LCCtype.java
+--- cldr_tools.orig//tools/java/org/unicode/cldr/posix/POSIX_LCCtype.java
++++ cldr_tools/tools/java/org/unicode/cldr/posix/POSIX_LCCtype.java
+@@ -57,17 +57,17 @@
+ out.println();
+
+ String[][] types = {
+- { "upper", "[:Uppercase:]" },
+- { "lower", "[:Lowercase:]" },
+- { "alpha", "[[:Alphabetic:]-[[:Uppercase:][:Lowercase:]]]" },
+- { "space", "[:Whitespace:]" },
+- { "cntrl", "[:Control:]" },
+- { "graph", "[^[:Whitespace:][:Control:][:Format:][:Surrogate:][:Unassigned:]]" },
+- { "print", "[^[:Control:][:Format:][:Surrogate:][:Unassigned:]]" },
+- { "punct", "[:Punctuation:]" },
+- { "digit", "[0-9]" },
+- { "xdigit", "[0-9 a-f A-F]" },
+- { "blank", "[[:Whitespace:]-[\\u000A-\\u000D \\u0085 [:Line_Separator:][:Paragraph_Separator:]]]" } };
++ { "upper", "[[:Lu:][:Lt:]]" },
++ { "lower", "[[:Ll:][\\u01C5\\u01C8\\u01CB\\u01F2]]" },
++ { "alpha", "[[[:L:][:N:]]-[[:Lu:][:Lt:][:Ll:][0-9]]]" },
++ { "space", "[[:Z:][\\u0009\\u000A\\u000B\\u000C\\u000D\\u0085]]" },
++ { "cntrl", "[:Cc:]" },
++ { "graph", "[[:L:][:N:][:P:][:S:][:Cf:][:M:][:Co:]]" },
++ { "print", "[[:L:][:N:][:P:][:S:][:Cf:][:M:][:Co:][:Z:]]" },
++ { "punct", "[[:P:][:S:][:Cf:]]" },
++ { "digit", "[0-9]" },
++ { "xdigit", "[0-9 a-f A-F]" },
++ { "blank", "[[:Z:][\\u0009]]" } };
+
+ // print character types, restricted to the charset
+ int item, last;