root/ext/mbstring/oniguruma/regenc.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. onigenc_init
  2. onigenc_get_default_encoding
  3. onigenc_set_default_encoding
  4. onigenc_get_right_adjust_char_head
  5. onigenc_get_right_adjust_char_head_with_prev
  6. onigenc_get_prev_char_head
  7. onigenc_step_back
  8. onigenc_step
  9. onigenc_strlen
  10. onigenc_strlen_null
  11. onigenc_str_bytelen_null
  12. onigenc_set_default_caseconv_table
  13. onigenc_get_left_adjust_char_head
  14. onigenc_ascii_apply_all_case_fold
  15. onigenc_ascii_get_case_fold_codes_by_str
  16. ss_apply_all_case_fold
  17. onigenc_apply_all_case_fold_with_map
  18. onigenc_get_case_fold_codes_by_str_with_map
  19. onigenc_not_support_get_ctype_code_range
  20. onigenc_is_mbc_newline_0x0a
  21. onigenc_ascii_mbc_case_fold
  22. onigenc_ascii_is_mbc_ambiguous
  23. onigenc_single_byte_mbc_enc_len
  24. onigenc_single_byte_mbc_to_code
  25. onigenc_single_byte_code_to_mbclen
  26. onigenc_single_byte_code_to_mbc
  27. onigenc_single_byte_left_adjust_char_head
  28. onigenc_always_true_is_allowed_reverse_match
  29. onigenc_always_false_is_allowed_reverse_match
  30. onigenc_mbn_mbc_to_code
  31. onigenc_mbn_mbc_case_fold
  32. onigenc_mbn_is_mbc_ambiguous
  33. onigenc_mb2_code_to_mbclen
  34. onigenc_mb4_code_to_mbclen
  35. onigenc_mb2_code_to_mbc
  36. onigenc_mb4_code_to_mbc
  37. onigenc_minimum_property_name_to_ctype
  38. onigenc_mb2_is_code_ctype
  39. onigenc_mb4_is_code_ctype
  40. onigenc_with_ascii_strncmp
  41. resize_property_list
  42. onigenc_property_list_add_property
  43. onigenc_property_list_init

   1 /**********************************************************************
   2   regenc.c -  Oniguruma (regular expression library)
   3 **********************************************************************/
   4 /*-
   5  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
   6  * All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  */
  29 
  30 #include "regint.h"
  31 
  32 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
  33 
  34 extern int
  35 onigenc_init(void)
  36 {
  37   return 0;
  38 }
  39 
  40 extern OnigEncoding
  41 onigenc_get_default_encoding(void)
  42 {
  43   return OnigEncDefaultCharEncoding;
  44 }
  45 
  46 extern int
  47 onigenc_set_default_encoding(OnigEncoding enc)
  48 {
  49   OnigEncDefaultCharEncoding = enc;
  50   return 0;
  51 }
  52 
  53 extern UChar*
  54 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
  55 {
  56   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
  57   if (p < s) {
  58     p += enclen(enc, p);
  59   }
  60   return p;
  61 }
  62 
  63 extern UChar*
  64 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
  65                                    const UChar* start, const UChar* s, const UChar** prev)
  66 {
  67   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
  68 
  69   if (p < s) {
  70     if (prev) *prev = (const UChar* )p;
  71     p += enclen(enc, p);
  72   }
  73   else {
  74     if (prev) *prev = (const UChar* )NULL; /* Sorry */
  75   }
  76   return p;
  77 }
  78 
  79 extern UChar*
  80 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
  81 {
  82   if (s <= start)
  83     return (UChar* )NULL;
  84 
  85   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
  86 }
  87 
  88 extern UChar*
  89 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
  90 {
  91   while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
  92     if (s <= start)
  93       return (UChar* )NULL;
  94 
  95     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
  96   }
  97   return (UChar* )s;
  98 }
  99 
 100 extern UChar*
 101 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
 102 {
 103   UChar* q = (UChar* )p;
 104   while (n-- > 0) {
 105     q += ONIGENC_MBC_ENC_LEN(enc, q);
 106   }
 107   return (q <= end ? q : NULL);
 108 }
 109 
 110 extern int
 111 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
 112 {
 113   int n = 0;
 114   UChar* q = (UChar* )p;
 115   
 116   while (q < end) {
 117     q += ONIGENC_MBC_ENC_LEN(enc, q);
 118     n++;
 119   }
 120   return n;
 121 }
 122 
 123 extern int
 124 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
 125 {
 126   int n = 0;
 127   UChar* p = (UChar* )s;
 128   
 129   while (1) {
 130     if (*p == '\0') {
 131       UChar* q;
 132       int len = ONIGENC_MBC_MINLEN(enc);
 133 
 134       if (len == 1) return n;
 135       q = p + 1;
 136       while (len > 1) {
 137         if (*q != '\0') break;
 138         q++;
 139         len--;
 140       }
 141       if (len == 1) return n;
 142     }
 143     p += ONIGENC_MBC_ENC_LEN(enc, p);
 144     n++;
 145   }
 146 }
 147 
 148 extern int
 149 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
 150 {
 151   UChar* start = (UChar* )s;
 152   UChar* p = (UChar* )s;
 153 
 154   while (1) {
 155     if (*p == '\0') {
 156       UChar* q;
 157       int len = ONIGENC_MBC_MINLEN(enc);
 158 
 159       if (len == 1) return (int )(p - start);
 160       q = p + 1;
 161       while (len > 1) {
 162         if (*q != '\0') break;
 163         q++;
 164         len--;
 165       }
 166       if (len == 1) return (int )(p - start);
 167     }
 168     p += ONIGENC_MBC_ENC_LEN(enc, p);
 169   }
 170 }
 171 
 172 const UChar OnigEncAsciiToLowerCaseTable[] = {
 173   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
 174   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
 175   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
 176   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
 177   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
 178   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
 179   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
 180   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
 181   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
 182   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
 183   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
 184   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
 185   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
 186   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
 187   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
 188   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
 189   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
 190   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
 191   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
 192   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
 193   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
 194   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
 195   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
 196   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
 197   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
 198   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
 199   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
 200   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
 201   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
 202   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
 203   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
 204   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
 205 };
 206 
 207 #ifdef USE_UPPER_CASE_TABLE
 208 const UChar OnigEncAsciiToUpperCaseTable[256] = {
 209   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
 210   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
 211   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
 212   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
 213   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
 214   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
 215   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
 216   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
 217   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
 218   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
 219   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
 220   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
 221   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
 222   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
 223   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
 224   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
 225   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
 226   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
 227   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
 228   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
 229   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
 230   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
 231   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
 232   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
 233   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
 234   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
 235   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
 236   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
 237   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
 238   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
 239   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
 240   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
 241 };
 242 #endif
 243 
 244 const unsigned short OnigEncAsciiCtypeTable[256] = {
 245   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
 246   0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
 247   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
 248   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
 249   0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
 250   0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
 251   0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
 252   0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
 253   0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
 254   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
 255   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
 256   0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
 257   0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
 258   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
 259   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
 260   0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
 261   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 262   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 263   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 264   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 265   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 266   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 267   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 268   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 269   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 270   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 271   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 272   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 273   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 274   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 275   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 276   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
 277 };
 278 
 279 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
 280   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
 281   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
 282   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
 283   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
 284   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
 285   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
 286   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
 287   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
 288   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
 289   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
 290   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
 291   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
 292   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
 293   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
 294   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
 295   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
 296   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
 297   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
 298   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
 299   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
 300   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
 301   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
 302   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
 303   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
 304   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
 305   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
 306   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
 307   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
 308   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
 309   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
 310   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
 311   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
 312 };
 313 
 314 #ifdef USE_UPPER_CASE_TABLE
 315 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
 316   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
 317   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
 318   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
 319   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
 320   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
 321   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
 322   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
 323   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
 324   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
 325   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
 326   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
 327   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
 328   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
 329   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
 330   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
 331   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
 332   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
 333   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
 334   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
 335   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
 336   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
 337   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
 338   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
 339   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
 340   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
 341   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
 342   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
 343   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
 344   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
 345   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
 346   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
 347   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
 348 };
 349 #endif
 350 
 351 extern void
 352 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
 353 {
 354   /* nothing */
 355   /* obsoleted. */
 356 }
 357 
 358 extern UChar*
 359 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
 360 {
 361   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
 362 }
 363 
 364 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
 365   { 0x41, 0x61 },
 366   { 0x42, 0x62 },
 367   { 0x43, 0x63 },
 368   { 0x44, 0x64 },
 369   { 0x45, 0x65 },
 370   { 0x46, 0x66 },
 371   { 0x47, 0x67 },
 372   { 0x48, 0x68 },
 373   { 0x49, 0x69 },
 374   { 0x4a, 0x6a },
 375   { 0x4b, 0x6b },
 376   { 0x4c, 0x6c },
 377   { 0x4d, 0x6d },
 378   { 0x4e, 0x6e },
 379   { 0x4f, 0x6f },
 380   { 0x50, 0x70 },
 381   { 0x51, 0x71 },
 382   { 0x52, 0x72 },
 383   { 0x53, 0x73 },
 384   { 0x54, 0x74 },
 385   { 0x55, 0x75 },
 386   { 0x56, 0x76 },
 387   { 0x57, 0x77 },
 388   { 0x58, 0x78 },
 389   { 0x59, 0x79 },
 390   { 0x5a, 0x7a }
 391 };
 392 
 393 extern int
 394 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
 395                                   OnigApplyAllCaseFoldFunc f, void* arg)
 396 {
 397   OnigCodePoint code;
 398   int i, r;
 399 
 400   for (i = 0;
 401        i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
 402        i++) {
 403     code = OnigAsciiLowerMap[i].to;
 404     r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
 405     if (r != 0) return r;
 406 
 407     code = OnigAsciiLowerMap[i].from;
 408     r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
 409     if (r != 0) return r;
 410   }
 411 
 412   return 0;
 413 }
 414 
 415 extern int
 416 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
 417          const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
 418          OnigCaseFoldCodeItem items[])
 419 {
 420   if (0x41 <= *p && *p <= 0x5a) {
 421     items[0].byte_len = 1;
 422     items[0].code_len = 1;
 423     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
 424     return 1;
 425   }
 426   else if (0x61 <= *p && *p <= 0x7a) {
 427     items[0].byte_len = 1;
 428     items[0].code_len = 1;
 429     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
 430     return 1;
 431   }
 432   else
 433     return 0;
 434 }
 435 
 436 static int
 437 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
 438                        OnigApplyAllCaseFoldFunc f, void* arg)
 439 {
 440   static OnigCodePoint ss[] = { 0x73, 0x73 };
 441 
 442   return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
 443 }
 444 
 445 extern int
 446 onigenc_apply_all_case_fold_with_map(int map_size,
 447     const OnigPairCaseFoldCodes map[],
 448     int ess_tsett_flag, OnigCaseFoldType flag,
 449     OnigApplyAllCaseFoldFunc f, void* arg)
 450 {
 451   OnigCodePoint code;
 452   int i, r;
 453 
 454   r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
 455   if (r != 0) return r;
 456 
 457   for (i = 0; i < map_size; i++) {
 458     code = map[i].to;
 459     r = (*f)(map[i].from, &code, 1, arg);
 460     if (r != 0) return r;
 461 
 462     code = map[i].from;
 463     r = (*f)(map[i].to, &code, 1, arg);
 464     if (r != 0) return r;
 465   }
 466 
 467   if (ess_tsett_flag != 0)
 468     return ss_apply_all_case_fold(flag, f, arg);
 469 
 470   return 0;
 471 }
 472 
 473 extern int
 474 onigenc_get_case_fold_codes_by_str_with_map(int map_size,
 475     const OnigPairCaseFoldCodes map[],
 476     int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
 477     const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
 478 {
 479   if (0x41 <= *p && *p <= 0x5a) {
 480     items[0].byte_len = 1;
 481     items[0].code_len = 1;
 482     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
 483     if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
 484         && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
 485       /* SS */
 486       items[1].byte_len = 2;
 487       items[1].code_len = 1;
 488       items[1].code[0] = (OnigCodePoint )0xdf;
 489       return 2;
 490     }
 491     else
 492       return 1;
 493   }
 494   else if (0x61 <= *p && *p <= 0x7a) {
 495     items[0].byte_len = 1;
 496     items[0].code_len = 1;
 497     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
 498     if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
 499         && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
 500       /* ss */
 501       items[1].byte_len = 2;
 502       items[1].code_len = 1;
 503       items[1].code[0] = (OnigCodePoint )0xdf;
 504       return 2;
 505     }
 506     else
 507       return 1;
 508   }
 509   else if (*p == 0xdf && ess_tsett_flag != 0) {
 510     items[0].byte_len = 1;
 511     items[0].code_len = 2;
 512     items[0].code[0] = (OnigCodePoint )'s';
 513     items[0].code[1] = (OnigCodePoint )'s';
 514 
 515     items[1].byte_len = 1;
 516     items[1].code_len = 2;
 517     items[1].code[0] = (OnigCodePoint )'S';
 518     items[1].code[1] = (OnigCodePoint )'S';
 519 
 520     items[2].byte_len = 1;
 521     items[2].code_len = 2;
 522     items[2].code[0] = (OnigCodePoint )'s';
 523     items[2].code[1] = (OnigCodePoint )'S';
 524 
 525     items[3].byte_len = 1;
 526     items[3].code_len = 2;
 527     items[3].code[0] = (OnigCodePoint )'S';
 528     items[3].code[1] = (OnigCodePoint )'s';
 529 
 530     return 4;
 531   }
 532   else {
 533     int i;
 534 
 535     for (i = 0; i < map_size; i++) {
 536       if (*p == map[i].from) {
 537         items[0].byte_len = 1;
 538         items[0].code_len = 1;
 539         items[0].code[0] = map[i].to;
 540         return 1;
 541       }
 542       else if (*p == map[i].to) {
 543         items[0].byte_len = 1;
 544         items[0].code_len = 1;
 545         items[0].code[0] = map[i].from;
 546         return 1;
 547       }
 548     }
 549   }
 550 
 551   return 0;
 552 }
 553 
 554 
 555 extern int
 556 onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
 557          OnigCodePoint* sb_out ARG_UNUSED,
 558          const OnigCodePoint* ranges[] ARG_UNUSED)
 559 {
 560   return ONIG_NO_SUPPORT_CONFIG;
 561 }
 562 
 563 extern int
 564 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
 565 {
 566   if (p < end) {
 567     if (*p == 0x0a) return 1;
 568   }
 569   return 0;
 570 }
 571 
 572 /* for single byte encodings */
 573 extern int
 574 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
 575             const UChar*end ARG_UNUSED, UChar* lower)
 576 {
 577   *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
 578 
 579   (*p)++;
 580   return 1; /* return byte length of converted char to lower */
 581 }
 582 
 583 #if 0
 584 extern int
 585 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
 586                                const UChar** pp, const UChar* end)
 587 {
 588   const UChar* p = *pp;
 589 
 590   (*pp)++;
 591   return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
 592 }
 593 #endif
 594 
 595 extern int
 596 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
 597 {
 598   return 1;
 599 }
 600 
 601 extern OnigCodePoint
 602 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
 603 {
 604   return (OnigCodePoint )(*p);
 605 }
 606 
 607 extern int
 608 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
 609 {
 610   return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
 611 }
 612 
 613 extern int
 614 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
 615 {
 616   *buf = (UChar )(code & 0xff);
 617   return 1;
 618 }
 619 
 620 extern UChar*
 621 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
 622                                           const UChar* s)
 623 {
 624   return (UChar* )s;
 625 }
 626 
 627 extern int
 628 onigenc_always_true_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
 629                                              const UChar* end ARG_UNUSED)
 630 {
 631   return TRUE;
 632 }
 633 
 634 extern int
 635 onigenc_always_false_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
 636                                               const UChar* end ARG_UNUSED)
 637 {
 638   return FALSE;
 639 }
 640 
 641 extern OnigCodePoint
 642 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
 643 {
 644   int c, i, len;
 645   OnigCodePoint n;
 646 
 647   len = enclen(enc, p);
 648   n = (OnigCodePoint )(*p++);
 649   if (len == 1) return n;
 650 
 651   for (i = 1; i < len; i++) {
 652     if (p >= end) break;
 653     c = *p++;
 654     n <<= 8;  n += c;
 655   }
 656   return n;
 657 }
 658 
 659 extern int
 660 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
 661                           const UChar** pp, const UChar* end ARG_UNUSED,
 662                           UChar* lower)
 663 {
 664   int len;
 665   const UChar *p = *pp;
 666 
 667   if (ONIGENC_IS_MBC_ASCII(p)) {
 668     *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
 669     (*pp)++;
 670     return 1;
 671   }
 672   else {
 673     int i;
 674 
 675     len = enclen(enc, p);
 676     for (i = 0; i < len; i++) {
 677       *lower++ = *p++;
 678     }
 679     (*pp) += len;
 680     return len; /* return byte length of converted to lower char */
 681   }
 682 }
 683 
 684 #if 0
 685 extern int
 686 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
 687                              const UChar** pp, const UChar* end)
 688 {
 689   const UChar* p = *pp;
 690 
 691   if (ONIGENC_IS_MBC_ASCII(p)) {
 692     (*pp)++;
 693     return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
 694   }
 695 
 696   (*pp) += enclen(enc, p);
 697   return FALSE;
 698 }
 699 #endif
 700 
 701 extern int
 702 onigenc_mb2_code_to_mbclen(OnigCodePoint code)
 703 {
 704   if ((code & 0xff00) != 0) return 2;
 705   else return 1;
 706 }
 707 
 708 extern int
 709 onigenc_mb4_code_to_mbclen(OnigCodePoint code)
 710 {
 711        if ((code & 0xff000000) != 0) return 4;
 712   else if ((code & 0xff0000) != 0) return 3;
 713   else if ((code & 0xff00) != 0) return 2;
 714   else return 1;
 715 }
 716 
 717 extern int
 718 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
 719 {
 720   UChar *p = buf;
 721 
 722   if ((code & 0xff00) != 0) {
 723     *p++ = (UChar )((code >>  8) & 0xff);
 724   }
 725   *p++ = (UChar )(code & 0xff);
 726 
 727 #if 1
 728   if (enclen(enc, buf) != (p - buf))
 729     return ONIGERR_INVALID_CODE_POINT_VALUE;
 730 #endif
 731   return p - buf;
 732 }
 733 
 734 extern int
 735 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
 736 {
 737   UChar *p = buf;
 738 
 739   if ((code & 0xff000000) != 0) {
 740     *p++ = (UChar )((code >> 24) & 0xff);
 741   }
 742   if ((code & 0xff0000) != 0 || p != buf) {
 743     *p++ = (UChar )((code >> 16) & 0xff);
 744   }
 745   if ((code & 0xff00) != 0 || p != buf) {
 746     *p++ = (UChar )((code >> 8) & 0xff);
 747   }
 748   *p++ = (UChar )(code & 0xff);
 749 
 750 #if 1
 751   if (enclen(enc, buf) != (p - buf))
 752     return ONIGERR_INVALID_CODE_POINT_VALUE;
 753 #endif
 754   return p - buf;
 755 }
 756 
 757 extern int
 758 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
 759 {
 760   static PosixBracketEntryType PBS[] = {
 761     { (UChar* )"Alnum",  ONIGENC_CTYPE_ALNUM,  5 },
 762     { (UChar* )"Alpha",  ONIGENC_CTYPE_ALPHA,  5 },
 763     { (UChar* )"Blank",  ONIGENC_CTYPE_BLANK,  5 },
 764     { (UChar* )"Cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
 765     { (UChar* )"Digit",  ONIGENC_CTYPE_DIGIT,  5 },
 766     { (UChar* )"Graph",  ONIGENC_CTYPE_GRAPH,  5 },
 767     { (UChar* )"Lower",  ONIGENC_CTYPE_LOWER,  5 },
 768     { (UChar* )"Print",  ONIGENC_CTYPE_PRINT,  5 },
 769     { (UChar* )"Punct",  ONIGENC_CTYPE_PUNCT,  5 },
 770     { (UChar* )"Space",  ONIGENC_CTYPE_SPACE,  5 },
 771     { (UChar* )"Upper",  ONIGENC_CTYPE_UPPER,  5 },
 772     { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
 773     { (UChar* )"ASCII",  ONIGENC_CTYPE_ASCII,  5 },
 774     { (UChar* )"Word",   ONIGENC_CTYPE_WORD,   4 },
 775     { (UChar* )NULL, -1, 0 }
 776   };
 777 
 778   PosixBracketEntryType *pb;
 779   int len;
 780 
 781   len = onigenc_strlen(enc, p, end);
 782   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
 783     if (len == pb->len &&
 784         onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
 785       return pb->ctype;
 786   }
 787 
 788   return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
 789 }
 790 
 791 extern int
 792 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
 793                           unsigned int ctype)
 794 {
 795   if (code < 128)
 796     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
 797   else {
 798     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
 799       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
 800     }
 801   }
 802 
 803   return FALSE;
 804 }
 805 
 806 extern int
 807 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
 808                           unsigned int ctype)
 809 {
 810   if (code < 128)
 811     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
 812   else {
 813     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
 814       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
 815     }
 816   }
 817 
 818   return FALSE;
 819 }
 820 
 821 extern int
 822 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
 823                            const UChar* sascii /* ascii */, int n)
 824 {
 825   int x, c;
 826 
 827   while (n-- > 0) {
 828     if (p >= end) return (int )(*sascii);
 829 
 830     c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
 831     x = *sascii - c;
 832     if (x) return x;
 833 
 834     sascii++;
 835     p += enclen(enc, p);
 836   }
 837   return 0;
 838 }
 839 
 840 /* Property management */
 841 static int
 842 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
 843 {
 844   int size;
 845   const OnigCodePoint **list = *plist;
 846 
 847   size = sizeof(OnigCodePoint*) * new_size;
 848   if (IS_NULL(list)) {
 849     list = (const OnigCodePoint** )xmalloc(size);
 850   }
 851   else {
 852     list = (const OnigCodePoint** )xrealloc((void* )list, size);
 853   }
 854 
 855   if (IS_NULL(list)) return ONIGERR_MEMORY;
 856 
 857   *plist = list;
 858   *psize = new_size;
 859 
 860   return 0;
 861 }
 862 
 863 extern int
 864 onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
 865      hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
 866      int *psize)
 867 {
 868 #define PROP_INIT_SIZE     16
 869 
 870   int r;
 871 
 872   if (*psize <= *pnum) {
 873     int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
 874     r = resize_property_list(new_size, plist, psize);
 875     if (r != 0) return r;
 876   }
 877 
 878   (*plist)[*pnum] = prop;
 879 
 880   if (ONIG_IS_NULL(*table)) {
 881     *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
 882     if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
 883   }
 884 
 885   *pnum = *pnum + 1;
 886   onig_st_insert_strend(*table, name, name + strlen((char* )name),
 887                         (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
 888   return 0;
 889 }
 890 
 891 extern int
 892 onigenc_property_list_init(int (*f)(void))
 893 {
 894   int r;
 895 
 896   THREAD_ATOMIC_START;
 897 
 898   r = f();
 899 
 900   THREAD_ATOMIC_END;
 901   return r;
 902 }

/* [<][>][^][v][top][bottom][index][help] */