This source file includes following definitions.
- prop_lookup
- php_unicode_is_prop
- case_lookup
- php_turkish_toupper
- php_turkish_tolower
- php_unicode_toupper
- php_unicode_tolower
- php_unicode_totitle
- php_unicode_convert_case
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 #ifdef HAVE_CONFIG_H
34 #include "config.h"
35 #endif
36
37 #include "php.h"
38 #include "php_ini.h"
39
40 #if HAVE_MBSTRING
41
42
43 #include "mbstring.h"
44 #include "php_unicode.h"
45 #include "unicode_data.h"
46
47 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
48
49
50
51
52 static unsigned long masks32[32] = {
53 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
54 0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800,
55 0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000,
56 0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000,
57 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
58 0x40000000, 0x80000000
59 };
60
61
62 static int prop_lookup(unsigned long code, unsigned long n)
63 {
64 long l, r, m;
65
66
67
68
69
70
71 if ((l = _ucprop_offsets[n]) == 0xffff)
72 return 0;
73
74
75
76
77
78 for (m = 1; n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++)
79 ;
80
81 r = _ucprop_offsets[n + m] - 1;
82
83 while (l <= r) {
84
85
86
87
88 m = (l + r) >> 1;
89 m -= (m & 1);
90 if (code > _ucprop_ranges[m + 1])
91 l = m + 2;
92 else if (code < _ucprop_ranges[m])
93 r = m - 2;
94 else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
95 return 1;
96 }
97 return 0;
98
99 }
100
101 MBSTRING_API int php_unicode_is_prop(unsigned long code, unsigned long mask1,
102 unsigned long mask2)
103 {
104 unsigned long i;
105
106 if (mask1 == 0 && mask2 == 0)
107 return 0;
108
109 for (i = 0; mask1 && i < 32; i++) {
110 if ((mask1 & masks32[i]) && prop_lookup(code, i))
111 return 1;
112 }
113
114 for (i = 32; mask2 && i < _ucprop_size; i++) {
115 if ((mask2 & masks32[i & 31]) && prop_lookup(code, i))
116 return 1;
117 }
118
119 return 0;
120 }
121
122 static unsigned long case_lookup(unsigned long code, long l, long r, int field)
123 {
124 long m;
125
126
127
128
129 while (l <= r) {
130
131
132
133
134 m = (l + r) >> 1;
135 m -= (m % 3);
136 if (code > _uccase_map[m])
137 l = m + 3;
138 else if (code < _uccase_map[m])
139 r = m - 3;
140 else if (code == _uccase_map[m])
141 return _uccase_map[m + field];
142 }
143
144 return code;
145 }
146
147 MBSTRING_API unsigned long php_turkish_toupper(unsigned long code, long l, long r, int field)
148 {
149 if (code == 0x0069L) {
150 return 0x0130L;
151 }
152 return case_lookup(code, l, r, field);
153 }
154
155 MBSTRING_API unsigned long php_turkish_tolower(unsigned long code, long l, long r, int field)
156 {
157 if (code == 0x0049L) {
158 return 0x0131L;
159 }
160 return case_lookup(code, l, r, field);
161 }
162
163 MBSTRING_API unsigned long php_unicode_toupper(unsigned long code, enum mbfl_no_encoding enc)
164 {
165 int field;
166 long l, r;
167
168 if (php_unicode_is_upper(code))
169 return code;
170
171 if (php_unicode_is_lower(code)) {
172
173
174
175 field = 2;
176 l = _uccase_len[0];
177 r = (l + _uccase_len[1]) - 3;
178
179 if (enc == mbfl_no_encoding_8859_9) {
180 return php_turkish_toupper(code, l, r, field);
181 }
182
183 } else {
184
185
186
187 field = 1;
188 l = _uccase_len[0] + _uccase_len[1];
189 r = _uccase_size - 3;
190 }
191 return case_lookup(code, l, r, field);
192 }
193
194 MBSTRING_API unsigned long php_unicode_tolower(unsigned long code, enum mbfl_no_encoding enc)
195 {
196 int field;
197 long l, r;
198
199 if (php_unicode_is_lower(code))
200 return code;
201
202 if (php_unicode_is_upper(code)) {
203
204
205
206 field = 1;
207 l = 0;
208 r = _uccase_len[0] - 3;
209
210 if (enc == mbfl_no_encoding_8859_9) {
211 return php_turkish_tolower(code, l, r, field);
212 }
213
214 } else {
215
216
217
218 field = 2;
219 l = _uccase_len[0] + _uccase_len[1];
220 r = _uccase_size - 3;
221 }
222 return case_lookup(code, l, r, field);
223 }
224
225 MBSTRING_API unsigned long php_unicode_totitle(unsigned long code, enum mbfl_no_encoding enc)
226 {
227 int field;
228 long l, r;
229
230 if (php_unicode_is_title(code))
231 return code;
232
233
234
235
236 field = 2;
237
238 if (php_unicode_is_upper(code)) {
239
240
241
242 l = 0;
243 r = _uccase_len[0] - 3;
244 } else {
245
246
247
248 l = _uccase_len[0];
249 r = (l + _uccase_len[1]) - 3;
250 }
251 return case_lookup(code, l, r, field);
252
253 }
254
255
256 #define BE_ARY_TO_UINT32(ptr) (\
257 ((unsigned char*)(ptr))[0]<<24 |\
258 ((unsigned char*)(ptr))[1]<<16 |\
259 ((unsigned char*)(ptr))[2]<< 8 |\
260 ((unsigned char*)(ptr))[3] )
261
262 #define UINT32_TO_BE_ARY(ptr,val) { \
263 unsigned int v = val; \
264 ((unsigned char*)(ptr))[0] = (v>>24) & 0xff,\
265 ((unsigned char*)(ptr))[1] = (v>>16) & 0xff,\
266 ((unsigned char*)(ptr))[2] = (v>> 8) & 0xff,\
267 ((unsigned char*)(ptr))[3] = (v ) & 0xff;\
268 }
269
270 MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, size_t srclen, size_t *ret_len,
271 const char *src_encoding)
272 {
273 char *unicode, *newstr;
274 size_t unicode_len;
275 unsigned char *unicode_ptr;
276 size_t i;
277 enum mbfl_no_encoding _src_encoding = mbfl_name2no_encoding(src_encoding);
278
279 if (_src_encoding == mbfl_no_encoding_invalid) {
280 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", src_encoding);
281 return NULL;
282 }
283
284 unicode = php_mb_convert_encoding(srcstr, srclen, "UCS-4BE", src_encoding, &unicode_len);
285 if (unicode == NULL)
286 return NULL;
287
288 unicode_ptr = (unsigned char *)unicode;
289
290 switch(case_mode) {
291 case PHP_UNICODE_CASE_UPPER:
292 for (i = 0; i < unicode_len; i+=4) {
293 UINT32_TO_BE_ARY(&unicode_ptr[i],
294 php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding));
295 }
296 break;
297
298 case PHP_UNICODE_CASE_LOWER:
299 for (i = 0; i < unicode_len; i+=4) {
300 UINT32_TO_BE_ARY(&unicode_ptr[i],
301 php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding));
302 }
303 break;
304
305 case PHP_UNICODE_CASE_TITLE: {
306 int mode = 0;
307
308 for (i = 0; i < unicode_len; i+=4) {
309 int res = php_unicode_is_prop(
310 BE_ARY_TO_UINT32(&unicode_ptr[i]),
311 UC_MN|UC_ME|UC_CF|UC_LM|UC_SK|UC_LU|UC_LL|UC_LT|UC_PO|UC_OS, 0);
312 if (mode) {
313 if (res) {
314 UINT32_TO_BE_ARY(&unicode_ptr[i],
315 php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding));
316 } else {
317 mode = 0;
318 }
319 } else {
320 if (res) {
321 mode = 1;
322 UINT32_TO_BE_ARY(&unicode_ptr[i],
323 php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding));
324 }
325 }
326 }
327 } break;
328
329 }
330
331 newstr = php_mb_convert_encoding(unicode, unicode_len, src_encoding, "UCS-4BE", ret_len);
332 efree(unicode);
333
334 return newstr;
335 }
336
337
338 #endif
339
340
341
342
343
344
345
346
347