--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgignore Thu Aug 14 15:12:38 2008 +0800
@@ -0,0 +1,4 @@
+syntax: glob
+tags
+*.o
+*.so
--- a/src/lib/iconv/zh/Makefile Wed Aug 13 15:04:27 2008 +0200
+++ b/src/lib/iconv/zh/Makefile Thu Aug 14 15:12:38 2008 +0800
@@ -86,6 +86,18 @@
$(CC) $(CFLAGS) -DUCS_2LE -c -o $@ $?
$(CC) $(CFLAGS_64) -DUCS_2LE -c -o $(MACH64)/$@ $?
+GBK%UCS-2BE.o: zh_CN.gbk%UTF-8.c
+ $(CC) $(CFLAGS) -DUCS_2BE -c -o $@ $?
+ $(CC) $(CFLAGS_64) -DUCS_2BE -c -o $(MACH64)/$@ $?
+
+GBK%UCS-4LE.o: zh_CN.gbk%UTF-8.c
+ $(CC) $(CFLAGS) -DUCS_4LE -c -o $@ $?
+ $(CC) $(CFLAGS_64) -DUCS_4LE -c -o $(MACH64)/$@ $?
+
+GBK%UCS-4BE.o: zh_CN.gbk%UTF-8.c
+ $(CC) $(CFLAGS) -DUCS_4BE -c -o $@ $?
+ $(CC) $(CFLAGS_64) -DUCS_4BE -c -o $(MACH64)/$@ $?
+
UCS-2LE%gb2312.o: UTF-8%zh_CN.euc.c
$(CC) $(CFLAGS) -DUCS_2LE -c -o $@ $?
$(CC) $(CFLAGS_64) -DUCS_2LE -c -o $(MACH64)/$@ $?
@@ -94,6 +106,18 @@
$(CC) $(CFLAGS) -DUCS_2LE -c -o $@ $?
$(CC) $(CFLAGS_64) -DUCS_2LE -c -o $(MACH64)/$@ $?
+UCS-2BE%GBK.o: UTF-8%zh_CN.gbk.c
+ $(CC) $(CFLAGS) -DUCS_2BE -c -o $@ $?
+ $(CC) $(CFLAGS_64) -DUCS_2BE -c -o $(MACH64)/$@ $?
+
+UCS-4LE%GBK.o: UTF-8%zh_CN.gbk.c
+ $(CC) $(CFLAGS) -DUCS_4LE -c -o $@ $?
+ $(CC) $(CFLAGS_64) -DUCS_4LE -c -o $(MACH64)/$@ $?
+
+UCS-4BE%GBK.o: UTF-8%zh_CN.gbk.c
+ $(CC) $(CFLAGS) -DUCS_4BE -c -o $@ $?
+ $(CC) $(CFLAGS_64) -DUCS_4BE -c -o $(MACH64)/$@ $?
+
cns11643%UCS-2LE.o: zh_TW-euc%UTF-8.c
$(CC) $(CFLAGS) -DUCS_2LE -c -o $@ $?
$(CC) $(CFLAGS_64) -DUCS_2LE -c -o $(MACH64)/$@ $?
@@ -213,11 +237,12 @@
PROGS += zh_TW-cp937%UTF-8.so UTF-8%zh_TW-cp937.so
PROGS += zh_CN-cp935%UTF-8.so UTF-8%zh_CN-cp935.so
PROGS += gb2312%UCS-2LE.so UCS-2LE%gb2312.so
-PROGS += GBK%UCS-2LE.so UCS-2LE%GBK.so
PROGS += cns11643%UCS-2LE.so BIG5%UCS-2LE.so Big5-HKSCS%UCS-2LE.so
PROGS += UCS-2LE%cns11643.so UCS-2LE%BIG5.so UCS-2LE%Big5-HKSCS.so
PROGS += UTF-8%zh_TW-euc.so UTF-8%zh_TW-big5.so UTF-8%zh_HK.hkscs.so
PROGS += zh_TW-iso2022-CN-EXT%zh_TW-euc.so zh_TW-iso2022-CN-EXT%zh_TW-big5.so
+PROGS += GBK%UCS-2LE.so GBK%UCS-2BE.so GBK%UCS-4LE.so GBK%UCS-4BE.so
+PROGS += UCS-2LE%GBK.so UCS-2BE%GBK.so UCS-4LE%GBK.so UCS-4BE%GBK.so
install: all $(TARGETDIRS)
for i in $(PROGS); do \
--- a/src/lib/iconv/zh/UTF-8%zh_CN.gbk.c Wed Aug 13 15:04:27 2008 +0200
+++ b/src/lib/iconv/zh/UTF-8%zh_CN.gbk.c Thu Aug 14 15:12:38 2008 +0800
@@ -30,6 +30,7 @@
#include <sys/types.h>
#include <unicode_gb18030.h> /* Unicode to GBK mapping table */
#include "common_defs.h"
+#include "ucs4.h"
#define MSB 0x80 /* most significant bit */
#define ONEBYTE 0xff /* right most byte */
@@ -44,13 +45,11 @@
char keepc[6]; /* maximum # byte of UTF8 code */
short ustate;
int _errno; /* internal errno */
- boolean little_endian;
- boolean bom_written;
} _iconv_st;
enum _USTATE { U0, U1, U2, U3, U4, U5, U6, U7 };
-int get_gbk_by_unicode(char c1, char c2, int* unidx, unsigned long* gbkcode);
+int get_gbk_by_unicode(unsigned long, int*, unsigned long*);
/*
@@ -68,12 +67,6 @@
st->ustate = U0;
st->_errno = 0;
- st->little_endian = false;
- st->bom_written = false;
-#if defined(UCS_2LE)
- st->little_endian = true;
- st->bom_written = true;
-#endif
return ((void *) st);
}
@@ -90,7 +83,87 @@
free(st);
}
+#if defined(UCS_2LE) || defined (UCS_2BE) || defined (UCS_4LE) || defined (UCS_4BE)
+size_t
+_icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft)
+{
+ unsigned char c1, c2, c3, c4;
+ int n, unidx;
+ unsigned long unichr;
+ unsigned long gbkcode;
+ int uconv_num = 0;
+ if (st == NULL) {
+ errno = EBADF;
+ return ((size_t) -1);
+ }
+
+ if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
+ st->ustate = U0;
+ st->_errno = 0;
+ return ((size_t) 0);
+ }
+
+ st->_errno = 0; /* reset internal errno */
+ errno = 0; /* reset external errno */
+
+ while (*inbytesleft > ICV_FETCH_UCS_SIZE-1 && *outbytesleft > 0) {
+
+ int size = 0;
+ int uconv_num_internal = 0;
+
+ c1 = *(*inbuf + size++);
+ c2 = *(*inbuf + size++);
+#if defined(UCS_4LE) || defined (UCS_4BE)
+ c3 = *(*inbuf + size++);
+ c4 = *(*inbuf + size++);
+#endif
+
+#if defined(UCS_2LE)
+ unichr = (unsigned long) (c1 | (c2<<8));
+#elif defined(UCS_2BE)
+ unichr = (unsigned long) ((c1<<8) | c2);
+#elif defined(UCS_4LE)
+ unichr = (unsigned long) (c1 | (c2<<8) | (c3)<<16 | (c4<<24));
+#else
+ unichr = (unsigned long) ((c1<<24) | (c2<<16) | (c3<<8) | c4);
+#endif
+
+ if (unichr < MSB) { /* ASCII */
+ **outbuf = (char) unichr;
+ (*outbuf)++;
+ (*outbytesleft)--;
+ } else {
+ n = get_gbk_by_unicode(unichr, &unidx, &gbkcode);
+ if ( n == -1 ) { /* invalid unicode codepoint */
+ st->_errno = errno = EILSEQ;
+ return ((size_t)-1);
+ }
+
+ n = unicode_to_gbk(unidx, gbkcode, *outbuf, *outbytesleft, &uconv_num_internal);
+ if (n > 0) {
+ (*outbuf) += n;
+ (*outbytesleft) -= n;
+
+ uconv_num += uconv_num_internal;
+ } else {
+ return ((size_t)-1);
+ }
+ }
+
+ (*inbuf) += size;
+ (*inbytesleft) -= size;
+ }
+
+ if ( *inbytesleft >0 ) {
+ errno = *outbytesleft? EINVAL: E2BIG;
+ return ((size_t)-1);
+ }
+
+ return uconv_num;
+}
+#else
/*
* Actual conversion; called from iconv()
*/
@@ -121,6 +194,7 @@
{
char c1, c2;
int n, unidx;
+ unsigned long unichr;
unsigned long gbkcode;
int uconv_num = 0;
int utf8_len;
@@ -150,14 +224,7 @@
switch (st->ustate) {
case U0: /* assuming ASCII in the beginning */
- /*
- * Code conversion for UCS-2LE to support Samba
- */
- if (st->little_endian) {
- st->ustate = U1;
- st->keepc[0] = **inbuf;
- }
- else if ((**inbuf & MSB) == 0) { /* ASCII */
+ if ((**inbuf & MSB) == 0) { /* ASCII */
**outbuf = **inbuf;
(*outbuf)++;
(*outbytesleft)--;
@@ -185,30 +252,12 @@
}
break;
case U1: /* 2 byte unicode */
- if ((**inbuf & 0xc0) == MSB || st->little_endian) {
+ if ((**inbuf & 0xc0) == MSB) {
utf8_len = 2;
st->keepc[1] = **inbuf;
- /*
- * Code conversion for UCS-2LE to support Samba
- */
- if (st->little_endian) {
- c1 = st->keepc[1];
- c2 = st->keepc[0];
-
- /*
- * It's ASCII
- */
- if (c1 == 0 && (c2 & MSB) == 0) {
- *(*outbuf)++ = c2;
- (*outbytesleft)--;
- st->ustate = U0;
- break;
- }
- } else {
- c1 = (st->keepc[0]&0x1c)>>2;
- c2 = ((st->keepc[0]&0x03)<<6) | ((st->keepc[1])&0x3f);
- }
+ c1 = (st->keepc[0]&0x1c)>>2;
+ c2 = ((st->keepc[0]&0x03)<<6) | ((st->keepc[1])&0x3f);
st->ustate = U4;
#ifdef DEBUG
@@ -253,7 +302,8 @@
}
break;
case U4:
- n = get_gbk_by_unicode(c1, c2, &unidx, &gbkcode);
+ unichr = (unsigned long) ((c1 & ONEBYTE) << 8) + (c2 & ONEBYTE);
+ n = get_gbk_by_unicode(unichr, &unidx, &gbkcode);
if ( n == -1 ) { /* unicode is either 0xFFFE or 0xFFFF */
st->_errno = errno = EILSEQ;
break;
@@ -264,7 +314,7 @@
break;
}
*/
- n = utf8_to_gbk(unidx, gbkcode, *outbuf, *outbytesleft, &uconv_num_internal);
+ n = unicode_to_gbk(unidx, gbkcode, *outbuf, *outbytesleft, &uconv_num_internal);
if (n > 0) {
(*outbuf) += n;
(*outbytesleft) -= n;
@@ -384,6 +434,7 @@
return uconv_num;
}
+#endif /* UCS_2LE || UCS_2BE || UCS_4LE || UCS_4BE */
/*
@@ -395,14 +446,9 @@
* Since binary search of the UTF8 to GBK table is necessary, might as well
* return index and GBK code matching to the unicode.
*/
-int get_gbk_by_unicode(char c1, char c2, int* unidx, unsigned long* gbkcode)
+int get_gbk_by_unicode(unsigned long unicode, int* unidx, unsigned long* gbkcode)
{
- unsigned long unicode;
-
- unicode = (unsigned long) ((c1 & ONEBYTE) << 8) + (c2 & ONEBYTE);
-
- /* 0xfffe and 0xffff should not be allowed */
- if ( unicode == 0xFFFE || unicode == 0xFFFF ) return -1;
+ if ( unicode > UCS4_MAXVAL || ext_ucs4_lsw(unicode) > UCS4_PPRC_MAXVAL ) return -1;
*unidx = binsearch(unicode, unicode_gbk_tab, UNICODEMAX);
if ((*unidx) >= 0)
@@ -410,7 +456,7 @@
else
return(1); /* match from unicode to GBK not found */
#ifdef DEBUG
- fprintf(stderr, "Unicode=%04x, idx=%5d, Big-5=%x ", unicode, *unidx, *big5code);
+ fprintf(stderr, "Unicode=%04x, idx=%5d, Big-5=%x ", unicode, *unidx, *gbkcode);
#endif
return(0);
@@ -424,7 +470,7 @@
* Return: > 0 - converted with enough space in output buffer
* = 0 - no space in outbuf
*/
-int utf8_to_gbk(int unidx, unsigned long gbkcode, char* buf, size_t buflen, int *uconv_num)
+int unicode_to_gbk(int unidx, unsigned long gbkcode, char* buf, size_t buflen, int *uconv_num)
{
unsigned long val; /* GBK value */
char c[GBK_LEN_MAX];
@@ -469,14 +515,14 @@
/* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
-int binsearch(unsigned long x, table_t v[], int n)
+static int binsearch(unsigned long x, table_t v[], int n)
{
int low, high, mid;
low = 0;
high = n - 1;
while (low <= high) {
- mid = (low + high) / 2;
+ mid = (high - low) / 2 + low;
if (x < v[mid].key)
high = mid - 1;
else if (x > v[mid].key)
@@ -486,3 +532,7 @@
}
return (-1); /* no match */
}
+
+/*
+vi:ts=8:ai:expandtab
+*/
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/iconv/zh/uni_common.c Thu Aug 14 15:12:38 2008 +0800
@@ -0,0 +1,229 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright(c) 2001 Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+static int unichr_to_utf8 (st, unichr, buf, buflen, uconv_num)
+_iconv_st *st;
+unsigned long unichr;
+char *buf;
+size_t buflen;
+int *uconv_num;
+{
+ if (unichr < 0x0080) {
+ if (buflen < 1) {
+ errno = E2BIG;
+ return(0);
+ }
+ *buf = (char) unichr;
+ return(1);
+ }
+
+ if (unichr >= 0x0080 && unichr <= 0x07ff) {
+ if (buflen < 2) {
+ errno = E2BIG;
+ return(0);
+ }
+ *buf = (char)((unichr >> 6) & 0x1f) | 0xc0;
+ *(buf+1) = (char)(unichr & 0x3f) | 0x80;
+ return(2);
+ }
+
+ if (unichr >= 0x0800 && unichr <= 0xffff) {
+ if (buflen < 3) {
+ errno = E2BIG;
+ return(0);
+ }
+ *buf = (char)((unichr >> 12) & 0xf) | 0xe0;
+ *(buf+1) = (char)((unichr >>6) & 0x3f) | 0x80;
+ *(buf+2) = (char)(unichr & 0x3f) | 0x80;
+ return(3);
+ }
+
+ if (unichr >= 0x10000 && unichr <= 0x10ffff) {
+ if (buflen < 4) {
+ errno = E2BIG;
+ return(0);
+ }
+
+ *buf = (char)((unichr >> 18) & 0x7) | 0xf0;
+ *(buf+1) = (char)((unichr >> 12) & 0x3f) | 0x80;
+ *(buf+2) = (char)((unichr >>6) & 0x3f) | 0x80;
+ *(buf+3) = (char)(unichr & 0x3f) | 0x80;
+ return(4);
+ }
+
+ /* unrecognized unicode character */
+ if (buflen < 3) {
+ errno = E2BIG;
+ return(0);
+ }
+ *buf = (char)UTF8_NON_ID_CHAR1;
+ *(buf+1) = (char)UTF8_NON_ID_CHAR2;
+ *(buf+2) = (char)UTF8_NON_ID_CHAR3;
+
+ /* non-identical conversions */
+ *uconv_num = 1;
+
+ return(3);
+}
+
+static int unichr_to_ucs_2le (st, unichr, buf, buflen, uconv_num)
+_iconv_st *st;
+unsigned long unichr;
+char *buf;
+size_t buflen;
+int *uconv_num;
+{
+ int size = 0;
+
+ if (unichr > 0x00ffff) {
+ unichr = ICV_CHAR_UCS2_REPLACEMENT;
+ *uconv_num = 1;
+ }
+
+ if (!st->bom_written) {
+ if (buflen < 4)
+ return 0;
+
+ *(buf + size++) = (uchar_t)0xff;
+ *(buf + size++) = (uchar_t)0xfe;
+ st->bom_written = true;
+ }
+
+ if (buflen < 2)
+ return 0;
+
+ *(buf + size++) = (uchar_t)(unichr & 0xff);
+ *(buf + size++) = (uchar_t)((unichr >> 8) & 0xff);
+
+ return size;
+}
+
+static int unichr_to_ucs_2be (st, unichr, buf, buflen, uconv_num)
+_iconv_st *st;
+unsigned long unichr;
+char *buf;
+size_t buflen;
+int *uconv_num;
+{
+ int size = 0;
+
+ if (unichr > 0x00ffff) {
+ unichr = ICV_CHAR_UCS2_REPLACEMENT;
+ *uconv_num = 1;
+ }
+
+ if (!st->bom_written) {
+ if (buflen < 4)
+ return 0;
+
+ *(buf + size++) = (uchar_t)0xfe;
+ *(buf + size++) = (uchar_t)0xff;
+ st->bom_written = true;
+ }
+
+ if (buflen < 2)
+ return 0;
+
+ *(buf + size++) = (uchar_t)((unichr >> 8) & 0xff);
+ *(buf + size++) = (uchar_t)(unichr & 0xff);
+
+ return size;
+}
+
+static int unichr_to_ucs_4le (st, unichr, buf, buflen, uconv_num)
+_iconv_st *st;
+unsigned long unichr;
+char *buf;
+size_t buflen;
+int *uconv_num;
+{
+ int size = 0;
+
+ if (unichr == 0xffffffff) {
+ unichr = ICV_CHAR_UCS2_REPLACEMENT;
+ *uconv_num = 1;
+ }
+
+ if (!st->bom_written) {
+ if (buflen < 8)
+ return 0;
+
+ *(buf + size++) = (uchar_t)0xff;
+ *(buf + size++) = (uchar_t)0xfe;
+ *(buf + size++) = (uchar_t)0;
+ *(buf + size++) = (uchar_t)0;
+ st->bom_written = true;
+ }
+
+ if (buflen < 4)
+ return 0;
+
+ *(buf + size++) = (uchar_t)(unichr & 0xff);
+ *(buf + size++) = (uchar_t)((unichr >> 8) & 0xff);
+ *(buf + size++) = (uchar_t)((unichr >> 16) & 0xff);
+ *(buf + size++) = (uchar_t)((unichr >> 24) & 0xff);
+
+ return size;
+}
+
+static int unichr_to_ucs_4be (st, unichr, buf, buflen, uconv_num)
+_iconv_st *st;
+unsigned long unichr;
+char *buf;
+size_t buflen;
+int *uconv_num;
+{
+ int size = 0;
+
+ if (unichr == 0xffffffff) {
+ unichr = ICV_CHAR_UCS2_REPLACEMENT;
+ *uconv_num = 1;
+ }
+
+ if (!st->bom_written) {
+ if (buflen < 8)
+ return 0;
+
+ *(buf + size++) = (uchar_t)0;
+ *(buf + size++) = (uchar_t)0;
+ *(buf + size++) = (uchar_t)0xfe;
+ *(buf + size++) = (uchar_t)0xff;
+ st->bom_written = true;
+ }
+
+ if (buflen < 4)
+ return 0;
+
+ *(buf + size++) = (uchar_t)((unichr >> 24) & 0xff);
+ *(buf + size++) = (uchar_t)((unichr >> 16) & 0xff);
+ *(buf + size++) = (uchar_t)((unichr >> 8) & 0xff);
+ *(buf + size++) = (uchar_t)(unichr & 0xff);
+
+ return size;
+}
+
+/*
+vi:ts=8:ai:expandtab
+*/
--- a/src/lib/iconv/zh/zh_CN.gbk%UTF-8.c Wed Aug 13 15:04:27 2008 +0200
+++ b/src/lib/iconv/zh/zh_CN.gbk%UTF-8.c Thu Aug 14 15:12:38 2008 +0800
@@ -46,17 +46,28 @@
#define UTF8_NON_ID_CHAR2 0xBF
#define UTF8_NON_ID_CHAR3 0xBD
+#if defined UCS_2LE
+ #define output_char unichr_to_ucs_2le
+#elif defined UCS_2BE
+ #define output_char unichr_to_ucs_2be
+#elif defined UCS_4LE
+ #define output_char unichr_to_ucs_4le
+#elif defined UCS_4BE
+ #define output_char unichr_to_ucs_4be
+#else
+ #define output_char unichr_to_utf8
+#endif
+
typedef struct _icv_state {
char keepc[GBK_LEN_MAX]; /* maximum # byte of GBK2K code */
short cstate; /* state machine id */
int _errno; /* internal errno */
- boolean little_endian;
boolean bom_written;
} _iconv_st;
enum _CSTATE { C0, C1, C2, C3 };
-
+static unsigned long gbk_to_unicode (_iconv_st *);
/*
* Open; called from iconv_open()
@@ -73,11 +84,10 @@
st->cstate = C0;
st->_errno = 0;
- st->little_endian = false;
- st->bom_written = false;
-#if defined(UCS_2LE)
- st->little_endian = true;
+#if defined(UCS_2LE) || defined(UCS_2BE) || defined(UCS_4LE) || defined(UCS_4BE)
st->bom_written = true;
+#else
+ st->bom_written = false;
#endif
return ((void *) st);
}
@@ -162,34 +172,13 @@
st->cstate = C1;
}
} else { /* real ASCII */
- /*
- * Code conversion for UCS-2LE to support Samba
- */
- if (st->little_endian) {
- if (!st->bom_written) {
- if (*outbytesleft < 4)
- errno = E2BIG;
- else {
- *(*outbuf)++ = (uchar_t)0xff;
- *(*outbuf)++ = (uchar_t)0xfe;
- *outbytesleft -= 2;
-
- st->bom_written = true;
- }
- }
-
- if (*outbytesleft < 2)
- errno = E2BIG;
- else {
- *(*outbuf)++ = **inbuf;
- *(*outbuf)++ = (uchar_t)0x0;
- *outbytesleft -= 2;
- }
- } else {
- **outbuf = **inbuf;
- (*outbuf)++;
- (*outbytesleft)--;
- }
+ int uconv_num_internal = 0;
+ n = output_char (st, **inbuf, *outbuf,
+ *outbytesleft, &uconv_num_internal);
+ if (n > 0) {
+ (*outbuf) += n;
+ (*outbytesleft) -= n;
+ }
}
break;
case C1: /* GBK2 characters: 2nd byte */
@@ -198,8 +187,9 @@
st->keepc[1] = (**inbuf);
st->keepc[2] = st->keepc[3] = 0;
- n = gbk_to_utf8(st, *outbuf,
- *outbytesleft, &uconv_num_internal);
+
+ n = output_char (st, gbk_to_unicode (st), *outbuf,
+ *outbytesleft, &uconv_num_internal);
if (n > 0) {
(*outbuf) += n;
(*outbytesleft) -= n;
@@ -233,7 +223,9 @@
int uconv_num_internal = 0;
st->keepc[3] = **inbuf;
- n = gbk_to_utf8(st, *outbuf, *outbytesleft, &uconv_num_internal);
+
+ n = output_char (st, gbk_to_unicode (st), *outbuf,
+ *outbytesleft, &uconv_num_internal);
if ( n > 0 ) {
(*outbuf) += n;
@@ -289,7 +281,7 @@
* Return: = 0 - valid GBK2 2nd byte
* = 1 - invalid GBK2 2nd byte
*/
-int gbk_2nd_byte(inbuf)
+static int gbk_2nd_byte(inbuf)
char inbuf;
{
unsigned int buf = (unsigned int) (inbuf & ONEBYTE);
@@ -301,32 +293,21 @@
return(1);
}
-
-/*
- * GBK code --> ISO/IEC 10646-2000 (Unicode)
- * Unicode --> UTF8 (FSS-UTF)
- * (File System Safe Universal Character Set Transformation Format)
- * Return: > 0 - converted with enough space in output buffer
- * = 0 - no space in outbuf
- */
-int gbk_to_utf8(st, buf, buflen, uconv_num)
+static unsigned long gbk_to_unicode (st)
_iconv_st *st;
-char *buf;
-size_t buflen;
-int *uconv_num;
{
- unsigned long gbk_val; /* GBK value */
- int unidx; /* Unicode index */
- unsigned long uni_val; /* Unicode */
+ unsigned long gbk_val; /* GBK value */
+ int unidx; /* Unicode index */
+ unsigned long uni_val = 0xffffffff; /* Unicode */
int isgbk4 = 1;
char *keepc = st->keepc;
if ( keepc[2] == 0 && keepc[3] == 0 )
isgbk4 = 0;
- if ( ! isgbk4 )
+ if ( ! isgbk4 ) {
gbk_val = ((keepc[0]&ONEBYTE) << 8) + (keepc[1]&ONEBYTE);
- else {
+ } else {
int i;
gbk_val = keepc[0] & ONEBYTE;
@@ -342,82 +323,18 @@
if ( unidx >= 0 ) uni_val = gbk_unicode_tab[unidx].value;
}
- /*
- * Code conversion for UCS-2LE to support Samba
- */
- if (st->little_endian) {
- int size = 0;
-
- if (unidx < 0 || uni_val > 0x00ffff) {
- uni_val = ICV_CHAR_UCS2_REPLACEMENT;
- *uconv_num = 1;
- }
-
- if (!st->bom_written) {
- if (buflen < 4)
- return 0;
-
- *(buf + size++) = (uchar_t)0xff;
- *(buf + size++) = (uchar_t)0xfe;
- st->bom_written = true;
- }
-
- if (buflen < 2)
- return 0;
-
- *(buf + size++) = (uchar_t)(uni_val & 0xff);
- *(buf + size++) = (uchar_t)((uni_val >> 8) & 0xff);
-
- return size;
- }
-
- if (unidx >= 0) { /* do Unicode to UTF8 conversion */
- if (uni_val >= 0x0080 && uni_val <= 0x07ff) {
- if (buflen < 2) {
- errno = E2BIG;
- return(0);
- }
- *buf = (char)((uni_val >> 6) & 0x1f) | 0xc0;
- *(buf+1) = (char)(uni_val & 0x3f) | 0x80;
- return(2);
- }
- if (uni_val >= 0x0800 && uni_val <= 0xffff) {
- if (buflen < 3) {
- errno = E2BIG;
- return(0);
- }
- *buf = (char)((uni_val >> 12) & 0xf) | 0xe0;
- *(buf+1) = (char)((uni_val >>6) & 0x3f) | 0x80;
- *(buf+2) = (char)(uni_val & 0x3f) | 0x80;
- return(3);
- }
- }
-
- /* can't find a match in GBK --> UTF8 table or illegal UTF8 code */
- if (buflen < 3) {
- errno = E2BIG;
- return(0);
- }
- *buf = (char)UTF8_NON_ID_CHAR1;
- *(buf+1) = (char)UTF8_NON_ID_CHAR2;
- *(buf+2) = (char)UTF8_NON_ID_CHAR3;
-
- /* non-identical conversions */
- *uconv_num = 1;
-
- return(3);
+ return uni_val;
}
-
/* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
-int binsearch(unsigned long x, table_t v[], int n)
+static int binsearch(unsigned long x, table_t v[], int n)
{
int low, high, mid;
low = 0;
high = n - 1;
while (low <= high) {
- mid = (low + high) / 2;
+ mid = (high - low) / 2 + low;
if (x < v[mid].key)
high = mid - 1;
else if (x > v[mid].key)
@@ -427,3 +344,9 @@
}
return (-1); /* no match */
}
+
+#include "uni_common.c"
+
+/*
+vi:ts=8:ai:expandtab
+*/