extension of locale data build process - build locale data from CLDR directly
authorjenda
Wed, 04 Feb 2009 12:34:16 +0100
changeset 0 ab8dc3de97cd
child 1 32204145b74c
extension of locale data build process - build locale data from CLDR directly
Makefile
extension/ISO8859.x
extension/UTF-8.x
patches/cldr_tools/01_dtdcache.patch
tools/cmap.pl
tools/lcmp.pl
tools/ldump.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Makefile	Wed Feb 04 12:34:16 2009 +0100
@@ -0,0 +1,195 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").  
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+
+
+include $(SRC)/Makefile.master
+
+LOCALES = 
+
+COMMON_CFLAGS	= $(CCVERBOSE) -K pic -D PIC -G -I. -z defs -z text -z ignore -D_REENTRANT
+
+# escape ',' for localedef
+sparc_CCUNBOUND		= -Wd\,-xsafe=unboundsym
+sparc_SPACEFLAG		= -xspace -W0\,-Lt -W2\,-Rcond_elim
+sparcv9_SPACEFLAG	= -xspace -W0\,-Lt -W2\,-Rcond_elim
+CCREGSYM		= -Wc\,-Qiselect-regsym=0
+
+CERRWARN	=
+CFLAGS          += $(COMMON_CFLAGS) $(XREGSFLAG)
+CFLAGS64        += $(COMMON_CFLAGS) $(XREGSFLAG64)
+
+LDF_OPT		= 
+LDF_OPT64	= -m lp64
+
+LOPT_ORIGIN	= "-R\\\$$ORIGIN"
+
+JAVA_FLAGS	= -Dhttp.proxyHost=webcache.sfbay -Dhttp.proxyPort=8080
+JAVA_FLAGS	+= -Xms128m -Xmx128m
+
+JARS		= jars/icu4j.jar jars/utilities.jar jars/xalan.jar jars/xml-apis.jar jars/cldr.jar
+JAVA_CP		= echo $(JARS) | tr ' ' ':'
+
+LOCALEDEF	= $(SRC)/tools/localedef.sh -P /opt/SUNWspro/bin $(LDF_OPT) -c -v -W cc,"$(CFLAGS)" -L "$(LOPT_ORIGIN)"
+LOCALEDEF64	= $(SRC)/tools/localedef.sh -P /opt/SUNWspro/bin $(LDF_OPT64) -c -v -W cc,"$(CFLAGS64)" -L "$(LOPT_ORIGIN)"
+GENERATE_POSIX	= $(JAVA_ROOT)/bin/java  $(JAVA_FLAGS) -cp $(JAVA_CP:sh) -DCLDR_DTD_CACHE=dtd_cache org.unicode.cldr.posix.GeneratePOSIX -s cldr_core -d posix
+
+
+all: $(LOCALES:%=locale/.%) diffs.html
+
+install: all
+	for l in $(LOCALES); do \
+		$(INSDIR) $(FILEROOT)/usr/lib/locale/$$l/$(MACH64); \
+		cp locale/$$l.so.3 $(FILEROOT)/usr/lib/locale/$$l; \
+		cp locale64/$$l.so.3 $(FILEROOT)/usr/lib/locale/$$l/$(MACH64); \
+	done
+
+DIRS = charmap cldr_tools cldr_core locale locale64 posix xalan jars
+
+clean:
+	rm -Rf $(DIRS)
+	rm -Rf cldr.jar icu4j.jar xalan.jar utilities.jar xml-apis.jar
+	rm -Rf cmap.dat ldump diffs.html
+	rm -Rf .unpack_*
+
+distclean: clean
+	rm -Rf download
+
+
+
+locale/.%.UTF-8: charmap/UTF-8.cm extension/UTF-8.x posix/%.UTF-8.src locale/.dir locale64/.dir
+	cd locale; $(LOCALEDEF) -x ../extension/UTF-8.x -f ../charmap/UTF-8.cm -i ../posix/$*.UTF-8.src $*.UTF-8
+	cd locale64; $(LOCALEDEF64) -x ../extension/UTF-8.x -f ../charmap/UTF-8.cm -i ../posix/$*.UTF-8.src $*.UTF-8
+	touch $@
+
+locale/.%.ISO8859-1: charmap/ISO8859-1.cm extension/ISO8859.x posix/%.ISO8859-1.src locale/.dir locale64/.dir
+	cd locale; $(LOCALEDEF) -x ../extension/ISO8859.x -f ../charmap/ISO8859-1.cm -i ../posix/$*.ISO8859-1.src $*.ISO8859-1
+	cd locale64; $(LOCALEDEF64) -x ../extension/ISO8859.x -f ../charmap/ISO8859-1.cm -i ../posix/$*.ISO8859-1.src $*.ISO8859-1
+	touch $@
+
+locale/.%.ISO8859-2: charmap/ISO8859-2.cm extension/ISO8859.x posix/%.ISO8859-2.src locale/.dir locale64/.dir
+	cd locale; $(LOCALEDEF) -x ../extension/ISO8859.x -f ../charmap/ISO8859-2.cm -i ../posix/$*.ISO8859-2.src $*.ISO8859-2
+	cd locale64; $(LOCALEDEF64) -x ../extension/ISO8859.x -f ../charmap/ISO8859-2.cm -i ../posix/$*.ISO8859-2.src $*.ISO8859-2
+	touch $@
+
+
+
+charmap/%.cm: $(JARS) charmap/.dir
+	$(JAVA_ROOT)/bin/java  $(JAVA_FLAGS) -cp $(JAVA_CP:sh) org.unicode.cldr.posix.GenerateCharmap -d charmap -c $*
+
+
+
+posix/%.UTF-8.src: $(JARS) charmap/UTF-8.cm cldr_core/.src posix/.dir dtd_cache/.dir
+	$(GENERATE_POSIX) -c UTF-8 -m $*@platform=solaris,yesno=short -u [\\\\u0000-\\\\uFFFF] -x [\\\\u0000-\\\\uFFFF] 
+	mv posix/$*@platform=solaris,yesno=short.UTF-8.src $@
+
+posix/%.ISO8859-1.src: $(JARS) charmap/ISO8859-1.cm cldr_core/.src posix/.dir dtd_cache/.dir
+	$(GENERATE_POSIX) -c ISO8859-1 -m $*@platform=solaris,yesno=short 
+	mv posix/$*@platform=solaris,yesno=short.ISO8859-1.src $@
+
+posix/%.ISO8859-2.src: $(JARS) charmap/ISO8859-2.cm cldr_core/.src posix/.dir dtd_cache/.dir
+	$(GENERATE_POSIX) -c ISO8859-2 -m $*@platform=solaris,yesno=short 
+	mv posix/$*@platform=solaris,yesno=short.ISO8859-2.src $@
+
+
+
+jars/cldr.jar: jars/icu4j.jar jars/utilities.jar jars/xalan.jar jars/xml-apis.jar cldr_tools/.src
+	cd cldr_tools; ICU4J_JAR=../jars/icu4j.jar UTILITIES_JAR=../jars/utilities.jar CLDR_JAR=../jars/xalan.jar XML_APIS_JAR=../xml-apis.jar ant jar
+	cp cldr_tools/cldr.jar $@
+
+jars/utilities.jar: cldr_tools/.src  jars/.dir
+	cp cldr_tools/utilities.jar $@
+
+jars/xalan.jar: xalan/.unpack jars/.dir
+	cp xalan/xalan-j_2_7_1/xalan.jar $@
+
+jars/xml-apis.jar: xalan/.unpack jars/.dir
+	cp xalan/xalan-j_2_7_1/xml-apis.jar $@
+
+jars/icu4j.jar: download/icu4j-4_0.jar jars/.dir
+	cp download/icu4j-4_0.jar $@
+
+
+
+cldr_core/.unpack: download/cldr_core.zip cldr_core/.dir
+	unzip -q download/cldr_core.zip -d cldr_core
+	touch $@
+
+cldr_tools/.unpack: download/cldr_tools.zip cldr_tools/.dir
+	unzip -q download/cldr_tools.zip -d cldr_tools
+	touch $@
+
+xalan/.unpack: download/xalan-j_2_7_1-bin.zip xalan/.dir
+	unzip -q download/xalan-j_2_7_1-bin.zip -d xalan
+	touch $@
+
+cldr_tools/.src: cldr_tools/.unpack
+	@[ ! "$$(ls -A patches/cldr_tools $(@D))" ] || for a in patches/$(@D)/*.patch; do \
+		echo "applying $$a"; \
+		(cd $(@D); patch -p1) < $$a; \
+	done
+	touch $@
+
+cldr_core/.src: cldr_core/.unpack
+	@[ ! "$$(ls -A patches/$(@D))" ] || for a in patches/$(@D)/*.patch; do \
+		echo "applying $$a"; \
+		(cd $(@D); patch -p1) < $$a; \
+	done
+	touch $@
+
+
+
+download/icu4j-4_0.jar: download/.dir
+	wget -q -O $@ http://download.icu-project.org/files/icu4j/4.0/icu4j-4_0.jar
+	touch $@
+
+download/cldr_tools.zip: download/.dir
+	wget -q -O $@ http://unicode.org/Public/cldr/1.6.1/tools.zip
+	touch $@
+
+download/cldr_core.zip: download/.dir
+	wget -q -O $@ http://unicode.org/Public/cldr/1.6.1/core.zip
+	touch $@
+
+download/xalan-j_2_7_1-bin.zip: download/.dir
+	wget -q -O $@ http://www.mirrorgeek.com/apache.org/xml/xalan-j/xalan-j_2_7_1-bin.zip
+	touch $@
+
+
+
+diffs.html: ldump tools/lcmp.pl
+	tools/lcmp.pl $(LOCALES) >$@
+
+
+
+# tools
+ldump: tools/ldump.c cmap.dat
+	$(CC) -I . -o ldump tools/ldump.c
+
+cmap.dat: tools/cmap.pl $(LOCALES:%=locale/.%)
+	tools/cmap.pl charmap/*.cm > $@
+
+
+
+%/.dir:
+	rm -Rf $*; mkdir -p $*
+	touch $@
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extension/ISO8859.x	Wed Feb 04 12:34:16 2009 +0100
@@ -0,0 +1,92 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").  
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#ident	"@(#)iso_8859_1.extension.src	1.4	05/06/23 SMI"
+
+METHODS
+process_code	euc
+cswidth		1:1,0:0,0:0
+
+eucpctowc	"__eucpctowc_gen"	"libc"	"/usr/lib"  "libc.so.1"
+fgetwc		"__fgetwc_euc"
+fgetwc@native	"__fgetwc_sb"
+fnmatch		"__fnmatch_sb"
+getdate		"__getdate_std"
+iswctype	"__iswctype_bc"
+iswctype@native	"__iswctype_sb"
+mbftowc		"__mbftowc_euc"
+mbftowc@native	"__mbftowc_sb"
+mblen		"__mblen_sb"
+mbstowcs	"__mbstowcs_euc"
+mbstowcs@native	"__mbstowcs_sb"
+mbtowc		"__mbtowc_euc"
+mbtowc@native	"__mbtowc_sb"
+regcomp		"__regcomp_std"
+regexec		"__regexec_std"
+regerror	"__regerror_std"
+regfree		"__regfree_std"
+strcoll		"__strcoll_sb"
+strfmon		"__strfmon_std"
+strftime	"__strftime_std"
+strptime	"__strptime_std"
+strxfrm		"__strxfrm_sb"
+towctrans	"__towctrans_bc"
+towctrans@native "__towctrans_std"
+towlower	"__towlower_bc"
+towlower@native	"__towlower_std"
+towupper	"__towupper_bc"
+towupper@native	"__towupper_std"
+trwctype	"__trwctype_std"
+wcscoll		"__wcscoll_bc"
+wcscoll@native	"__wcscoll_std"
+wcsftime	"__wcsftime_std"
+wcstombs	"__wcstombs_euc"
+wcstombs@native	"__wcstombs_sb"
+wcswidth	"__wcswidth_euc"
+wcswidth@native	"__wcswidth_sb"
+wcsxfrm		"__wcsxfrm_bc"
+wcsxfrm@native	"__wcsxfrm_std"
+wctoeucpc	"__wctoeucpc_gen"
+wctomb		"__wctomb_euc"
+wctomb@native	"__wctomb_sb"
+wctrans		"__wctrans_std"
+wctype		"__wctype_std"
+wcwidth		"__wcwidth_euc"
+wcwidth@native	"__wcwidth_sb"
+btowc		"__btowc_euc"
+btowc@native	"__btowc_sb"
+wctob		"__wctob_euc"
+wctob@native	"__wctob_sb"
+mbsinit		"__mbsinit_gen"
+mbrlen		"__mbrlen_sb"
+mbrtowc		"__mbrtowc_euc"
+mbrtowc@native	"__mbrtowc_sb"
+wcrtomb		"__wcrtomb_euc"
+wcrtomb@native	"__wcrtomb_sb"
+mbsrtowcs	"__mbsrtowcs_euc"
+mbsrtowcs@native	"__mbsrtowcs_sb"
+wcsrtombs	"__wcsrtombs_euc"
+wcsrtombs@native	"__wcsrtombs_sb"
+
+END METHODS
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extension/UTF-8.x	Wed Feb 04 12:34:16 2009 +0100
@@ -0,0 +1,114 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").  
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 1996-1997, 2000-2003 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# Method file for Solaris Unicode locales.
+#
+#
+#ident	"@(#)UTF-8.x	1.4 03/08/19 SMI"
+#
+
+METHODS
+
+#
+# Encoding definitions to use UTF-8 (MB) and UTF-32 (WC):
+file_code	utf8
+process_code	ucs4
+
+#
+# We use the following methods from the libc:
+iswctype@native	"__iswctype_std"        "libc" "/usr/lib/" "libc.so.1"
+towctrans@native "__towctrans_std"
+towlower@native	"__towlower_std"
+towupper@native "__towupper_std"
+trwctype        "__trwctype_std"
+wctrans         "__wctrans_std"
+wctype          "__wctype_std"
+
+mbsinit		"__mbsinit_gen"
+mbrlen		"__mbrlen_gen"
+ 
+strcoll         "__strcoll_std"
+strxfrm         "__strxfrm_std"
+wcscoll         "__wcscoll_bc"
+wcscoll@native  "__wcscoll_std"
+wcsxfrm         "__wcsxfrm_bc"
+wcsxfrm@native  "__wcsxfrm_std"
+
+fnmatch         "__fnmatch_std"
+regcomp         "__regcomp_std"
+regexec         "__regexec_std"
+regerror        "__regerror_std"
+regfree         "__regfree_std"
+
+strfmon         "__strfmon_std"
+
+strftime        "__strftime_std"
+strptime        "__strptime_std"
+wcsftime        "__wcsftime_std"
+
+getdate         "__getdate_std"
+
+#
+# The methods designated at below are all Unicode locale-specific methods
+# coming from the methods_unicode.so.3 shared object:
+eucpctowc	"__u32_to_dense_u32_utf8"	"localelib" "/usr/lib/locale/common" "methods_unicode.so.3"
+wctoeucpc	"__dense_u32_to_u32_utf8"
+
+iswctype        "__iswctype_bc_utf8"
+towctrans       "__towctrans_bc_utf8"
+towlower        "__towlower_bc_utf8"
+towupper        "__towupper_bc_utf8"
+
+mbftowc		"__mbftowc_dense_utf8"
+mbftowc@native	"__mbftowc_dense_native_utf8"
+fgetwc		"__fgetwc_dense_utf8"
+fgetwc@native	"__fgetwc_dense_native_utf8"
+mblen		"__mblen_dense_utf8"
+mbstowcs	"__mbstowcs_dense_utf8"
+mbstowcs@native	"__mbstowcs_dense_native_utf8"
+mbtowc		"__mbtowc_dense_utf8"
+mbtowc@native	"__mbtowc_dense_native_utf8"
+wcstombs	"__wcstombs_dense_utf8"
+wcstombs@native	"__wcstombs_dense_native_utf8"
+wcswidth	"__wcswidth_dense_utf8"
+wcswidth@native	"__wcswidth_dense_utf8"
+wctomb		"__wctomb_dense_utf8"
+wctomb@native	"__wctomb_dense_native_utf8"
+wcwidth		"__wcwidth_dense_utf8"
+wcwidth@native	"__wcwidth_dense_utf8"
+
+btowc		"__btowc_dense_utf8"
+btowc@native	"__btowc_dense_utf8"
+wctob		"__wctob_dense_utf8"
+wctob@native	"__wctob_dense_utf8"
+mbrtowc		"__mbrtowc_dense_utf8"
+mbrtowc@native	"__mbrtowc_dense_native_utf8"
+wcrtomb		"__wcrtomb_dense_utf8"
+wcrtomb@native	"__wcrtomb_dense_native_utf8"
+mbsrtowcs	"__mbsrtowcs_dense_utf8"
+mbsrtowcs@native "__mbsrtowcs_dense_native_utf8"
+wcsrtombs	"__wcsrtombs_dense_utf8"
+wcsrtombs@native "__wcsrtombs_dense_native_utf8"
+
+END METHODS
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/cldr_tools/01_dtdcache.patch	Wed Feb 04 12:34:16 2009 +0100
@@ -0,0 +1,15 @@
+diff -ruN cldr_tools.orig/org/unicode/cldr/util/CachingEntityResolver.java cldr_tools/org/unicode/cldr/util/CachingEntityResolver.java
+--- cldr_tools.orig/org/unicode/cldr/util/CachingEntityResolver.java	2009-01-15 14:00:00.046637971 +0100
++++ cldr_tools/org/unicode/cldr/util/CachingEntityResolver.java	2009-01-15 14:23:33.261692759 +0100
+@@ -86,7 +86,10 @@
+             
+             if((gCacheDir==null)||(gCacheDir.length()<=0)) {
+                 gCacheDir = null;
+-            }
++            } else 
++		try {
++			gCacheDir = (new File(gCacheDir)).getAbsolutePath();
++		} catch (Exception ex) {};
+             if((gOverrideDir==null)||(gOverrideDir.length()<=0)) {
+                 gOverrideDir = null;
+             }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cmap.pl	Wed Feb 04 12:34:16 2009 +0100
@@ -0,0 +1,87 @@
+#!/usr/bin/perl
+
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+
+use warnings;
+use strict;
+
+use Encode;
+use Data::Dumper;
+
+my @cs;
+
+foreach (@ARGV) {
+
+	open (F, "<", $_) or die $!;
+	
+	my $cp;
+	
+
+	while(<F>) {
+		next if /^#/ or /^\s*$/;
+
+		if (/<code_set_name>/i) {
+			($cp) = /<code_set_name>\s*"(.*)"/;
+			next;
+		}
+
+		last if /^CHARMAP/;
+	}
+
+	my $last_code;
+	my $len = 0;
+
+
+	print "static struct cmap_char cs". ($#cs+1) . "[] = {\n";
+
+	while(<F>) {
+		next if /^#/ or /^\s*$/;
+		last if (/^END CHARMAP/);
+
+		chomp;
+		my ($name, $code) = /<(.*)>\s*(.*)\s*$/;
+
+		next if defined $last_code and $last_code eq $code;
+		$last_code = $code;
+
+		my $hexcode = $code;
+		$hexcode =~ s/\\x(\w\w)/chr(hex($1))/eg;
+		$hexcode = decode($cp,$hexcode);
+	
+		printf("\t{ 0x%04x, (unsigned char*)\"%s\", (unsigned char*)\"%s\" },\n", ord($hexcode), $code, $name);
+
+		$len++;
+	}
+
+	print "};\n\n";
+	
+	close(F);
+
+	my %o = ( name => $cp, len => $len );
+	push @cs, \%o;
+};
+
+print "struct cmap cmaps[] = {\n";
+
+printf "\t{ \"%s\", %d, cs%d },\n", $cs[$_]{name}, $cs[$_]{len}, $_ foreach (0 .. $#cs);
+
+print "\t{ 0,0,0 }\n";
+print "};\n";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/lcmp.pl	Wed Feb 04 12:34:16 2009 +0100
@@ -0,0 +1,169 @@
+#!/usr/bin/perl
+
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+
+use strict;
+use warnings;
+
+use Encode;
+use HTML::Entities;
+
+binmode(STDOUT, ":utf8");
+
+my %lall;
+
+foreach my $loc (sort @ARGV) {
+	next unless -f "/usr/lib/locale/$loc/$loc.so.3";
+
+	$lall{$loc}{$_} = { lcmp1($loc,$_) } foreach ('lc_charmap', 'lc_collate', 'lc_monetary', 'lc_numeric', 'lc_messages', 'lc_time', 'lc_ctype');
+}
+
+sub lcmp1 {
+	my ($loc,$lc) = @_;
+	my ($lang,$ter,$enc) = ($loc =~ /(..)_(..)\.(.*)/);
+	my %ret;
+
+	open(LCMP1, "-|:bytes", "./ldump $lc /usr/lib/locale/$loc/$loc.so.3") or die $!;
+	open(LCMP2, "-|:bytes", "./ldump $lc locale/$loc.so.3") or die $!;
+
+	while (my $l1 = <LCMP1>) {
+		my $l2 = <LCMP2>;
+
+		chomp ($l1);
+		chomp ($l2);
+
+		my ($name1, $v1) = ($l1 =~ /^([^\t]*)\t(.*)$/);
+		my ($name2, $v2) = ($l2 =~ /^([^\t]*)\t(.*)$/);
+
+		my ($desc1, $desc2) = ($name1, $name2);
+
+		($name1,$desc1) = ($1,$2) if $name1 =~ /^(.*) \/\* (.*) \*\//;
+		($name2,$desc2) = ($1,$2) if $name2 =~ /^(.*) \/\* (.*) \*\//;
+
+		$name1 eq $name2 or die "$name1 != $name2";
+
+		$v1 = decode($enc, $1) if $v1 =~ /('.*')/;
+		$v2 = decode($enc, $1) if $v2 =~ /('.*')/;
+
+		$ret{$name1} = { desc => $desc1, v1 => $v1, v2 => $v2 } if ($v1 ne $v2);
+	}
+
+	return %ret;
+}
+
+
+# make HTML report
+
+print <<EOF;
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
+	<head>
+		<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+		<link rel="stylesheet" type="text/css" href="http://www.netbeans.org/netbeans.css" />
+	</head>
+	<body>
+		<h1>System Locales versus CLDR 1.6</h1>
+		<table border="1">
+			<tr>
+				<th colspan="2">Locale</th>
+				<th>LC_CHARMAP</th>
+				<th>LC_COLLATE</th>
+				<th>LC_MONETARY</th>
+				<th>LC_NUMERIC</th>
+				<th>LC_MESSAGES</th>
+				<th>LC_TIME</th>
+				<th>LC_CTYPE</th>
+			</tr>
+EOF
+
+
+foreach my $loc (sort @ARGV) {
+	my ($lang,$terr,$enc) = ($loc =~ /(..)_(..)\.(.*)/);
+
+	print <<EOF;
+			<tr>
+				<th align="right"><a href="#$loc">${lang}_$terr</a></th>
+				<th align="left">$enc</th></th>
+EOF
+	if (not defined %{$lall{$loc}}) {
+		print "\t\t\t\t<td colspan=\"7\">new locale</td>\n";
+		next;
+	}
+	
+	foreach ('lc_charmap', 'lc_collate', 'lc_monetary', 'lc_numeric', 'lc_messages', 'lc_time', 'lc_ctype') {
+		my $n = keys %{$lall{$loc}{$_}};
+	
+		if ($n == 0) {
+		    print "\t\t\t\t<td class=\"ok\"><a href=\"#$loc-$_\">$n diffs</a></td>\n";
+		} elsif ($n < 5) {
+		    print "\t\t\t\t<td class=\"warn\"><a href=\"#$loc-$_\">$n diffs</a></td>\n";
+		} else {
+		    print "\t\t\t\t<td class=\"problem\"><a href=\"#$loc-$_\">$n diffs</a></td>\n";
+		}
+		
+
+		print <<EOF;
+EOF
+	}
+
+	print <<EOF;
+			</tr>
+EOF
+}
+	
+print <<EOF;
+		</table>
+EOF
+
+
+foreach my $loc (sort keys %lall) {
+	next unless defined %{$lall{$loc}};
+
+print <<EOF;
+
+		<h2><a name="$loc">$loc</a></h2>
+EOF
+	foreach my $lc ('lc_charmap', 'lc_collate', 'lc_monetary', 'lc_numeric', 'lc_messages', 'lc_time', 'lc_ctype') {
+		print <<EOF;
+		<h3><a name="$loc-$lc">\U$lc\E ($loc)</a></h3>
+		<table border="1">
+			<tr><th>Attribute</th><th alt="OpenSolaris">OSo</th><th>CLDR 1.6</th></tr>
+EOF
+	
+		my @d = sort keys %{$lall{$loc}{$lc}};
+		print <<EOF foreach @d[0..($#d<100 ? $#d : 100)];
+			<tr>
+				<td title="$lall{$loc}{$lc}{$_}{desc}">$_</td>
+				<td>$lall{$loc}{$lc}{$_}{v1}</td>
+				<td>$lall{$loc}{$lc}{$_}{v2}</td>
+			</tr>
+EOF
+
+		print <<EOF;
+		
+		</table>
+EOF
+	}
+}
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ldump.c	Wed Feb 04 12:34:16 2009 +0100
@@ -0,0 +1,391 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2009, by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#include <locale.h>
+#include <sys/localedef.h>
+#include <langinfo.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <link.h>
+#include <errno.h>
+
+
+
+struct cmap_char {
+	wchar_t wc;
+	const unsigned char *native;
+	const unsigned char *name;
+};
+
+struct cmap {
+	const char *codeset;
+	unsigned int len;
+	const struct cmap_char *chars;
+};
+
+#include "cmap.dat"
+
+
+
+static void
+usage(void)
+{
+	printf("Usage: ldump [ lc_charmap | lc_collate | lc_monetary | lc_numeric | lc_messages | lc_time | lc_ctype ] <lib1>\n");
+	exit(1);
+}
+
+static void
+die(const char *format, ...)
+{
+	va_list args;
+
+	fprintf(stderr, "ldump: ");
+	va_start(args, format);
+	vfprintf(stderr, format, args);
+	va_end(args);
+	fprintf(stderr, ": %s\n", strerror(errno));
+
+	exit(1);
+}
+
+
+
+static void
+die2(const char *format, ...)
+{
+	va_list args;
+
+	fprintf(stderr, "ldump: ");
+	va_start(args, format);
+	vfprintf(stderr, format, args);
+	va_end(args);
+	fprintf(stderr, "\n");
+
+	exit(1);
+}
+
+
+
+static _LC_locale_t*
+get_lp(const char *path)
+{
+	void *handle;
+        _LC_locale_t *(*fn)(void);
+
+	if ((handle = dlopen(path, RTLD_LAZY)) == NULL)
+		die("dlopen(%s)", path);
+
+	if ((fn = (_LC_locale_t *(*)(void))dlsym(handle, "instantiate")) == NULL)
+		die2("%s: not locale object");
+
+	return fn();
+}
+
+#define LDUMP_REPORT(arg,type,fmt,name) printf("%s%s%s%s\t" fmt "\n", #arg, (name) == NULL ? "":" /* ",(name) == NULL ? "":name, (name) == NULL ? "":" */", ((type)p->arg))
+
+#define LDUMP_STR(arg,name) LDUMP_REPORT(arg,const char*,"'%s'",name)
+#define LDUMP_SIZET(arg,name) LDUMP_REPORT(arg,size_t,"%d",name)
+#define LDUMP_UCHAR(arg,name) LDUMP_REPORT(arg,unsigned char,"%c",name)
+#define LDUMP_HEX(arg,name) LDUMP_REPORT(arg,unsigned int,"0x%X",name)
+#define LDUMP_INT(arg,name) LDUMP_REPORT(arg,int,"%d",name)
+#define LDUMP_BIT(arg,n,name) LDUMP_HEX(arg,name)		/* FIXME */
+#define LDUMP_WCHAR(arg,name) LDUMP_REPORT(arg,wchat_t,"%lc",name)
+
+static void
+lc_charmap(_LC_charmap_t *p)
+{
+	LDUMP_STR(cm_csname, "codeset name");
+	/*	_LC_fc_type_t	cm_fc_type;	 file code type */
+	/*	_LC_pc_type_t	cm_pc_type;	 process code type */
+	LDUMP_SIZET(cm_mb_cur_max, "max encoding length for this codeset");
+	LDUMP_SIZET(cm_mb_cur_min, "min encoding length for this codeset");
+	LDUMP_UCHAR(cm_reserved, "reserved");
+	LDUMP_UCHAR(cm_def_width, "default width");
+	LDUMP_UCHAR(cm_base_max, "base area size: 0, 127, or 255");
+	LDUMP_UCHAR(cm_tbl_ent, "# of extended table entries");
+}
+
+	
+static void
+lc_collate(const _LC_collate_t *p)
+{
+	LDUMP_HEX(co_nord, "number of collation orders supported in this locale");
+	LDUMP_HEX(co_r_order, "relative weight flag");
+	LDUMP_HEX(co_ext, "extinfo flag");
+
+	/* LDUMP_BIT(co_sort, sizeof(wchar_t), "sort order processing flags"); */
+	LDUMP_HEX(co_wc_min, "min process code");
+	LDUMP_HEX(co_wc_max, "max process code");
+	LDUMP_HEX(co_hbound, "max process code with");
+	LDUMP_HEX(co_col_min, "min unique coll weight");
+	LDUMP_HEX(co_col_max, "max unique coll weight");
+
+	/*
+	_const _LC_weight_t	*co_coltbl; array of collation weights
+	_const _LC_collel_t	**co_cetbl; array of collating elements
+	*/
+	LDUMP_HEX(co_nsubs, "number of sub strs");
+	/* _const _LC_subs_t	*co_subs;	substitution strs   */
+	/* _const void	*co_extinfo;	extended info */
+}
+
+
+static void
+lc_ctype(_LC_ctype_t *p)
+{
+	char *type[] = { "alnum", "alpha", "cntrl", "digit", "graph", "lower", "print", "punct", "space", "upper", "xdigit", "blank" };
+	wctype_t wt[12];
+	const struct cmap *cm = cmaps;
+	unsigned int i, j;
+
+	for (i = 0; i < 12; i++)
+		wt[i] = METHOD(p,wctype)(p, type[i]);
+
+	while (cm->codeset)
+		if (strcmp(cm->codeset, p->cmapp->cm_csname) == 0)
+			break;
+		else
+			cm++;
+
+	assert(cm->codeset != NULL);
+
+
+	for (i = 0; i < cm->len; i++) {
+		printf("U+%04X /* %s */\t", (unsigned int)cm->chars[i].wc, cm->chars[i].name);
+
+		for (j = 0; j < 12; j++)
+			if (METHOD_NATIVE(p, iswctype)(p, cm->chars[i].wc, wt[j]))
+				printf("is%s ", type[j]);
+
+		printf("\n");
+	}
+}
+
+
+
+static void
+lc_monetary(const _LC_monetary_t *p)
+{
+	LDUMP_STR(int_curr_symbol, "international currency symbol");
+	LDUMP_STR(currency_symbol, "national currency symbol");
+	LDUMP_STR(mon_decimal_point, "currency decimal point");
+	LDUMP_STR(mon_thousands_sep, "currency thousands separator");
+	LDUMP_STR(mon_grouping, "currency digits grouping");
+	LDUMP_STR(positive_sign, "currency plus sign");
+	LDUMP_STR(negative_sign, "currency minus sign");
+
+	LDUMP_INT(int_frac_digits, "internat currency fract digits");
+	LDUMP_INT(frac_digits, "currency fractional digits");
+	LDUMP_INT(p_cs_precedes, "currency plus location");
+	LDUMP_INT(p_sep_by_space, "currency plus space ind.");
+	LDUMP_INT(n_cs_precedes, "currency minus location");
+	LDUMP_INT(n_sep_by_space, "currency minus space ind.");
+	LDUMP_INT(p_sign_posn, "currency plus position");
+	LDUMP_INT(n_sign_posn, "currency minus position");
+
+	LDUMP_INT(int_p_cs_precedes, "int'l currency plus location");
+	LDUMP_INT(int_p_sep_by_space, "int'l currency plus space ind.");
+	LDUMP_INT(int_n_cs_precedes, "int'l currency minus location");
+	LDUMP_INT(int_n_sep_by_space, "int'l currency minus space ind.");
+	LDUMP_INT(int_p_sign_posn, "int'l currency plus position");
+	LDUMP_INT(int_n_sign_posn, "int'l currency minus position");
+}
+
+static void
+lc_numeric(const _LC_numeric_t *p)
+{
+	LDUMP_STR(decimal_point, NULL);
+	LDUMP_STR(thousands_sep, NULL);
+	LDUMP_STR(grouping, NULL);
+}
+
+static void
+lc_messages(const _LC_messages_t *p)
+{
+	LDUMP_STR(yesexpr, "POSIX: Expression for affirmative.");
+	LDUMP_STR(noexpr, "POSIX: Expression for negative.");
+	LDUMP_STR(yesstr, "X/OPEN: colon sep str for affirmative. ");
+	LDUMP_STR(nostr, "X/OPEN: colon sep str for negative. ");
+}
+
+static void
+lc_time(const _LC_time_t *p)
+{
+	LDUMP_STR(d_fmt, NULL);
+	LDUMP_STR(t_fmt, NULL);
+	LDUMP_STR(d_t_fmt, NULL);
+	LDUMP_STR(t_fmt_ampm, NULL);
+	LDUMP_STR(abday[0], NULL);
+	LDUMP_STR(abday[1], NULL);
+	LDUMP_STR(abday[2], NULL);
+	LDUMP_STR(abday[3], NULL);
+	LDUMP_STR(abday[4], NULL);
+	LDUMP_STR(abday[5], NULL);
+	LDUMP_STR(abday[6], NULL);
+	LDUMP_STR(day[0], NULL);
+	LDUMP_STR(day[1], NULL);
+	LDUMP_STR(day[2], NULL);
+	LDUMP_STR(day[3], NULL);
+	LDUMP_STR(day[4], NULL);
+	LDUMP_STR(day[5], NULL);
+	LDUMP_STR(day[6], NULL);
+	LDUMP_STR(abmon[0], NULL);
+	LDUMP_STR(abmon[1], NULL);
+	LDUMP_STR(abmon[2], NULL);
+	LDUMP_STR(abmon[3], NULL);
+	LDUMP_STR(abmon[4], NULL);
+	LDUMP_STR(abmon[5], NULL);
+	LDUMP_STR(abmon[6], NULL);
+	LDUMP_STR(abmon[7], NULL);
+	LDUMP_STR(abmon[8], NULL);
+	LDUMP_STR(abmon[9], NULL);
+	LDUMP_STR(abmon[10], NULL);
+	LDUMP_STR(abmon[11], NULL);
+	LDUMP_STR(mon[0], NULL);
+	LDUMP_STR(mon[1], NULL);
+	LDUMP_STR(mon[2], NULL);
+	LDUMP_STR(mon[3], NULL);
+	LDUMP_STR(mon[4], NULL);
+	LDUMP_STR(mon[5], NULL);
+	LDUMP_STR(mon[6], NULL);
+	LDUMP_STR(mon[7], NULL);
+	LDUMP_STR(mon[8], NULL);
+	LDUMP_STR(mon[9], NULL);
+	LDUMP_STR(mon[10], NULL);
+	LDUMP_STR(mon[11], NULL);
+	LDUMP_STR(am_pm[0], NULL);
+	LDUMP_STR(am_pm[1], NULL);
+	/* LDUMP_STR(*era;	NULL terminated array of strings */
+	LDUMP_STR(era_d_fmt, NULL);
+	LDUMP_STR(alt_digits, NULL);
+	LDUMP_STR(era_d_t_fmt, NULL);
+	LDUMP_STR(era_t_fmt, NULL);
+	LDUMP_STR(date_fmt, NULL);
+}
+
+
+/*
+#define LDUMP_STR2(arg,name) if (strcmp(p1[arg], p2[arg]) != 0) LDUMP_REPORT(p1[arg],p2[arg],"%s",name)
+
+static void
+lc_nl_info(int n, char *p1[], char *p2[])
+{
+	LDUMP_STR2(DAY_1, "sunday");
+	LDUMP_STR2(DAY_2, "monday");
+	LDUMP_STR2(DAY_3, "tuesday");
+	LDUMP_STR2(DAY_4, "wednesday");
+	LDUMP_STR2(DAY_5, "thursday");
+	LDUMP_STR2(DAY_6, "friday");
+	LDUMP_STR2(DAY_7, "saturday");
+
+	LDUMP_STR2(ABDAY_1, "sunday (abbrev.)");
+	LDUMP_STR2(ABDAY_2, "monday (abbrev.)");
+	LDUMP_STR2(ABDAY_3, "tuesday (abbrev.)");
+	LDUMP_STR2(ABDAY_4, "wednesday (abbrev.)");
+	LDUMP_STR2(ABDAY_5, "thursday (abbrev.)");
+	LDUMP_STR2(ABDAY_6, "friday (abbrev.)");
+	LDUMP_STR2(ABDAY_7, "saturday (abbrev.)");
+
+	LDUMP_STR2(MON_1, "january");
+	LDUMP_STR2(MON_2, "february");
+	LDUMP_STR2(MON_3, "march");
+	LDUMP_STR2(MON_4, "april");
+	LDUMP_STR2(MON_5, "may");
+	LDUMP_STR2(MON_6, "june");
+	LDUMP_STR2(MON_7, "july");
+	LDUMP_STR2(MON_8, "august");
+	LDUMP_STR2(MON_9, "september");
+	LDUMP_STR2(MON_10, "october");
+	LDUMP_STR2(MON_11, "november");
+	LDUMP_STR2(MON_12, "december");
+
+	LDUMP_STR2(ABMON_1, "january (abbrev.)");
+	LDUMP_STR2(ABMON_2, "february (abbrev.)");
+	LDUMP_STR2(ABMON_3, "march (abbrev.)");
+	LDUMP_STR2(ABMON_4, "april (abbrev.)");
+	LDUMP_STR2(ABMON_5, "may (abbrev.)");
+	LDUMP_STR2(ABMON_6, "june (abbrev.)");
+	LDUMP_STR2(ABMON_7, "july (abbrev.)");
+	LDUMP_STR2(ABMON_8, "august (abbrev.)");
+	LDUMP_STR2(ABMON_9, "september (abbrev.)");
+	LDUMP_STR2(ABMON_1, "october (abbrev.)");
+	LDUMP_STR2(ABMON_1, "november (abbrev.)");
+	LDUMP_STR2(ABMON_1, "december (abbrev.)");
+
+	LDUMP_STR2(RADIXCHAR, "separator for thousand");
+	LDUMP_STR2(YESSTR, "affirmative response for yes/no queries");
+	LDUMP_STR2(NOSTR, "negative response for yes/no queries");
+	LDUMP_STR2(CRNCYSTR, "currency symbol");
+
+	LDUMP_STR2(D_T_FMT, "string for formatting date and time");
+	LDUMP_STR2(D_FMT, "date format");
+	LDUMP_STR2(T_FMT, "time format");
+	LDUMP_STR2(AM_STR, "am string");
+	LDUMP_STR2(PM_STR, "pm string");
+
+	LDUMP_STR2(CODESET, "am or pm time format string");
+	LDUMP_STR2(ERA, "era date format string");
+	LDUMP_STR2(ERA_D_T_FMT, "era time format string");
+	LDUMP_STR2(ALT_DIGITS, "affirmative response expression");
+	LDUMP_STR2(NOEXPR, "strftime format for date(1)");
+}
+*/
+
+
+int
+main(int argc, char* argv[])
+{
+	_LC_locale_t *lp;
+
+	if (argc != 3)
+		usage();
+
+	lp = get_lp(argv[2]);
+
+
+	if (strcmp("lc_charmap", argv[1]) == 0)
+		lc_charmap(lp->lc_charmap);
+	else if (strcmp("lc_collate", argv[1]) == 0)
+		lc_collate(lp->lc_collate);
+	else if (strcmp("lc_monetary", argv[1]) == 0)
+		lc_monetary(lp->lc_monetary);
+	else if (strcmp("lc_numeric", argv[1]) == 0)
+		lc_numeric(lp->lc_numeric);
+	else if (strcmp("lc_messages", argv[1]) == 0)
+		lc_messages(lp->lc_messages);
+	else if (strcmp("lc_time", argv[1]) == 0)
+		lc_time(lp->lc_time);
+	else if (strcmp("lc_ctype", argv[1]) == 0)
+		lc_ctype(lp->lc_ctype);
+	else
+		assert(0);
+
+/*        lc_nl_info(lp1->no_of_items, lp1->nl_info, lp2->nl_info); */
+
+	return 0;
+}