src/cmd/fsexam/src/auto-detect.h
author yz157939@agc105
Fri, 25 Apr 2008 17:02:23 +0800
changeset 147 8c4ef02c14b8
permissions -rw-r--r--
replace new version of fsexam, old version 0.3.1 is not used any more
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
147
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
     1
/*
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
     2
 * CDDL HEADER START
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
     3
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
     4
 * The contents of this file are subject to the terms of the
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
     5
 * Common Development and Distribution License (the "License").
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
     6
 * You may not use this file except in compliance with the License.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
     7
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
     8
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
     9
 * or http://www.opensolaris.org/os/licensing.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    10
 * See the License for the specific language governing permissions
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    11
 * and limitations under the License.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    12
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    13
 * When distributing Covered Code, include this CDDL HEADER in each
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    14
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    15
 * If applicable, add the following below this CDDL HEADER, with the
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    16
 * fields enclosed by brackets "[]" replaced with your own identifying
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    17
 * information: Portions Copyright [yyyy] [name of copyright owner]
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    18
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    19
 * CDDL HEADER END
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    20
 */
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    21
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    22
/*
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    23
 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    24
 * Use is subject to license terms.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    25
 */
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    26
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    27
/*
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    28
 * auto-detect.h
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    29
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    30
 * Auto detect the filename/filecontent/string's encoding
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    31
 */
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    32
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    33
#ifndef _AUTO_DETECT_H
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    34
#define _AUTO_DETECT_H
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    35
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    36
#define INACCURACY  0.001
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    37
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    38
#ifdef HAVE_AUTO_EF_H
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    39
#include <auto_ef.h>
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    40
#define DEFAULT_DETECTING_FLAG (AE_LEVEL_2)
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    41
#else
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    42
#define DEFAULT_DETECTING_FLAG 0
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    43
#endif
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    44
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    45
typedef struct _EncodingPair EncodingPair;
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    46
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    47
struct _EncodingPair{
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    48
    gchar   *encoding_name;
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    49
    double  score;
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    50
};
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    51
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    52
/*====================================================================
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    53
 *  Function Name:  file_isutf8
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    54
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    55
 *  Parameters:
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    56
 *      const gchar *filename: the name of file
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    57
 *      gint   flags: used by underlying library to detect encoding. 
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    58
 *              eg: auto_ef use  detect level
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    59
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    60
 *  Desc:
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    61
 *      Determine whether the file's content is UTF-8 or not.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    62
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    63
 *  Return value:
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    64
 *      True if file content is UTF-8, otherwise False.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    65
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    66
 *  Author:     Yandong Yao 2006/09/06
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    67
 ========================================================================*/ 
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    68
gboolean file_isutf8 (const gchar *filename, gint flags);
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    69
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    70
/*====================================================================
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    71
 *  Function Name:  file_encoding_detect
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    72
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    73
 *  Parameters:
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    74
 *      const gchar *filename
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    75
 *      gint flags: used by underlying library to detect encoding. 
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    76
 *                  eg: auto_ef use detect level
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    77
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    78
 *  Desc:
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    79
 *      Detect the file's content encoding, and return detected result.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    80
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    81
 *  Return value:
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    82
 *      Return one list of EncodingPair
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    83
 *      The encoding with higher score will appear ahead of encoding with lower score.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    84
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    85
 *      Need free the returned GList manually use auto_encoding_free when don't
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    86
 *      use it again.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    87
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    88
 *  Author:     Yandong Yao 2006/09/06
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    89
 ========================================================================*/ 
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    90
GList * file_encoding_detect (const gchar *filename, gint flags);
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    91
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    92
/*====================================================================
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    93
 *  Function Name:  str_isutf8
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    94
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    95
 *  Parameters:
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    96
 *      const gchar *string: one null-terminated string.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    97
 *      gint flag: used by underlying library to detect encoding. 
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    98
 *                  eg: auto_ef use detect level
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
    99
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   100
 *  Desc:   
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   101
 *      Determine whether one string is UTF-8 or not. Internally use 
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   102
 *      g_utf8_validate now.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   103
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   104
 *  Return value:
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   105
 *      True if is UTF-8, otherwise False.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   106
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   107
 *  Author:     Yandong Yao 2006/09/06
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   108
 ========================================================================*/ 
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   109
gboolean str_isutf8 (const gchar *string, gint flags);
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   110
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   111
/*====================================================================
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   112
 *  Function Name:  str_encoding_detect
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   113
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   114
 *  Parameters:
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   115
 *      const gchar *string: one null-terminated string
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   116
 *      gint flag: used by underlying library to detect encoding. 
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   117
 *                  eg: auto_ef use detect level
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   118
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   119
 *  Desc:
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   120
 *      Detect the possible encoding of one string and return one list which 
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   121
 *      contain the encoding name and its score. Higher score pair is ahead 
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   122
 *      of lower score pair
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   123
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   124
 *      Need free the returned GList manually use auto_encoding_free when don't
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   125
 *      use it again.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   126
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   127
 *  Return value:
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   128
 *
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   129
 *  Author:     Yandong Yao 2006/09/06
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   130
 ========================================================================*/ 
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   131
GList * str_encoding_detect (const gchar *string, gint flags);
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   132
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   133
/*==================================================================
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   134
 *  Free encoding pair in list and list itself.
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   135
 ==================================================================*/
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   136
void auto_encoding_free (GList *list);
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   137
8c4ef02c14b8 replace new version of fsexam, old version 0.3.1 is not used any more
yz157939@agc105
parents:
diff changeset
   138
#endif  //_AUTO_DETECT_H