root/ext/mbstring/oniguruma/enc/euc_kr.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. euckr_mbc_enc_len
  2. euckr_mbc_to_code
  3. euckr_code_to_mbc
  4. euckr_mbc_case_fold
  5. euckr_is_mbc_ambiguous
  6. euckr_is_code_ctype
  7. euckr_left_adjust_char_head
  8. euckr_is_allowed_reverse_match

   1 /**********************************************************************
   2   euc_kr.c -  Oniguruma (regular expression library)
   3 **********************************************************************/
   4 /*-
   5  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
   6  * All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  */
  29 
  30 #include "regenc.h"
  31 
  32 static const int EncLen_EUCKR[] = {
  33   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  34   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  35   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  36   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  37   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  38   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  39   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  40   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  41   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  42   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  43   1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  44   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  45   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  46   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  47   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  48   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
  49 };
  50 
  51 static int
  52 euckr_mbc_enc_len(const UChar* p)
  53 {
  54   return EncLen_EUCKR[*p];
  55 }
  56 
  57 static OnigCodePoint
  58 euckr_mbc_to_code(const UChar* p, const UChar* end)
  59 {
  60   return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end);
  61 }
  62 
  63 static int
  64 euckr_code_to_mbc(OnigCodePoint code, UChar *buf)
  65 {
  66   return onigenc_mb2_code_to_mbc(ONIG_ENCODING_EUC_KR, code, buf);
  67 }
  68 
  69 static int
  70 euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
  71                     UChar* lower)
  72 {
  73   return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_KR, flag,
  74                                    pp, end, lower);
  75 }
  76 
  77 #if 0
  78 static int
  79 euckr_is_mbc_ambiguous(OnigCaseFoldType flag,
  80                        const UChar** pp, const UChar* end)
  81 {
  82   return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end);
  83 }
  84 #endif
  85 
  86 static int
  87 euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype)
  88 {
  89   return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype);
  90 }
  91 
  92 #define euckr_islead(c)    ((c) < 0xa1 || (c) == 0xff)
  93 
  94 static UChar*
  95 euckr_left_adjust_char_head(const UChar* start, const UChar* s)
  96 {
  97   /* Assumed in this encoding,
  98      mb-trail bytes don't mix with single bytes.
  99   */
 100   const UChar *p;
 101   int len;
 102 
 103   if (s <= start) return (UChar* )s;
 104   p = s;
 105 
 106   while (!euckr_islead(*p) && p > start) p--;
 107   len = enclen(ONIG_ENCODING_EUC_KR, p);
 108   if (p + len > s) return (UChar* )p;
 109   p += len;
 110   return (UChar* )(p + ((s - p) & ~1));
 111 }
 112 
 113 static int
 114 euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
 115 {
 116   const UChar c = *s;
 117   if (c <= 0x7e) return TRUE;
 118   else           return FALSE;
 119 }
 120 
 121 OnigEncodingType OnigEncodingEUC_KR = {
 122   euckr_mbc_enc_len,
 123   "EUC-KR",   /* name */
 124   2,          /* max enc length */
 125   1,          /* min enc length */
 126   onigenc_is_mbc_newline_0x0a,
 127   euckr_mbc_to_code,
 128   onigenc_mb2_code_to_mbclen,
 129   euckr_code_to_mbc,
 130   euckr_mbc_case_fold,
 131   onigenc_ascii_apply_all_case_fold,
 132   onigenc_ascii_get_case_fold_codes_by_str,
 133   onigenc_minimum_property_name_to_ctype,
 134   euckr_is_code_ctype,
 135   onigenc_not_support_get_ctype_code_range,
 136   euckr_left_adjust_char_head,
 137   euckr_is_allowed_reverse_match
 138 };
 139 
 140 /* Same with OnigEncodingEUC_KR except the name */
 141 OnigEncodingType OnigEncodingEUC_CN = {
 142   euckr_mbc_enc_len,
 143   "EUC-CN",   /* name */
 144   2,          /* max enc length */
 145   1,          /* min enc length */
 146   onigenc_is_mbc_newline_0x0a,
 147   euckr_mbc_to_code,
 148   onigenc_mb2_code_to_mbclen,
 149   euckr_code_to_mbc,
 150   euckr_mbc_case_fold,
 151   onigenc_ascii_apply_all_case_fold,
 152   onigenc_ascii_get_case_fold_codes_by_str,
 153   onigenc_minimum_property_name_to_ctype,
 154   euckr_is_code_ctype,
 155   onigenc_not_support_get_ctype_code_range,
 156   euckr_left_adjust_char_head,
 157   euckr_is_allowed_reverse_match
 158 };

/* [<][>][^][v][top][bottom][index][help] */