1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | http://www.php.net/license/3_01.txt                                  |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
14   +----------------------------------------------------------------------+
15*/
16
17/* $Id$ */
18
19#ifdef HAVE_CONFIG_H
20#include "config.h"
21#endif
22
23#include <unicode/ustring.h>
24#include <unicode/udata.h>
25#include <unicode/putil.h>
26#include <unicode/ures.h>
27
28#include "php_intl.h"
29#include "locale.h"
30#include "locale_class.h"
31#include "locale_methods.h"
32#include "intl_convert.h"
33#include "intl_data.h"
34
35#include <zend_API.h>
36#include <zend.h>
37#include <php.h>
38#include "main/php_ini.h"
39#include "zend_smart_str.h"
40
41ZEND_EXTERN_MODULE_GLOBALS( intl )
42
43/* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44#define SEPARATOR "_"
45#define SEPARATOR1 "-"
46#define DELIMITER "-_"
47#define EXTLANG_PREFIX "a"
48#define PRIVATE_PREFIX "x"
49#define DISP_NAME "name"
50
51#define MAX_NO_VARIANT  15
52#define MAX_NO_EXTLANG  3
53#define MAX_NO_PRIVATE  15
54#define MAX_NO_LOOKUP_LANG_TAG  100
55
56#define LOC_NOT_FOUND 1
57
58/* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59#define VARIANT_KEYNAME_LEN  11
60#define EXTLANG_KEYNAME_LEN  10
61#define PRIVATE_KEYNAME_LEN  11
62
63/* Based on IANA registry at the time of writing this code
64*
65*/
66static const char * const LOC_GRANDFATHERED[] = {
67    "art-lojban",       "i-klingon",        "i-lux",            "i-navajo",     "no-bok",       "no-nyn",
68    "cel-gaulish",      "en-GB-oed",        "i-ami",
69    "i-bnn",        "i-default",        "i-enochian",
70    "i-mingo",      "i-pwn",        "i-tao",
71    "i-tay",        "i-tsu",        "sgn-BE-fr",
72    "sgn-BE-nl",        "sgn-CH-de",        "zh-cmn",
73    "zh-cmn-Hans",      "zh-cmn-Hant",      "zh-gan" ,
74    "zh-guoyu",         "zh-hakka",         "zh-min",
75    "zh-min-nan",       "zh-wuu",       "zh-xiang",
76    "zh-yue",       NULL
77};
78
79/* Based on IANA registry at the time of writing this code
80*  This array lists the preferred values for the grandfathered tags if applicable
81*  This is in sync with the array LOC_GRANDFATHERED
82*  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
83*/
84static const int        LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85static const char * const   LOC_PREFERRED_GRANDFATHERED[]  = {
86    "jbo",          "tlh",          "lb",
87    "nv",           "nb",           "nn",
88    NULL
89};
90
91/*returns TRUE if a is an ID separator FALSE otherwise*/
92#define isIDSeparator(a) (a == '_' || a == '-')
93#define isKeywordSeparator(a) (a == '@' )
94#define isEndOfTag(a) (a == '\0' )
95
96#define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97
98/*returns TRUE if one of the special prefixes is here (s=string)
99  'x-' or 'i-' */
100#define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101#define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102
103/* Dot terminates it because of POSIX form  where dot precedes the codepage
104 * except for variant */
105#define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
106
107/* {{{ return the offset of 'key' in the array 'list'.
108 * returns -1 if not present */
109static int16_t findOffset(const char* const* list, const char* key)
110{
111    const char* const* anchor = list;
112    while (*list != NULL) {
113        if (strcmp(key, *list) == 0) {
114            return (int16_t)(list - anchor);
115        }
116        list++;
117    }
118
119    return -1;
120
121}
122/*}}}*/
123
124static char* getPreferredTag(const char* gf_tag)
125{
126    char* result = NULL;
127    int grOffset = 0;
128
129    grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130    if(grOffset < 0) {
131        return NULL;
132    }
133    if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
134        /* return preferred tag */
135        result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
136    } else {
137        /* Return correct grandfathered language tag */
138        result = estrdup( LOC_GRANDFATHERED[grOffset] );
139    }
140    return result;
141}
142
143/* {{{
144* returns the position of next token for lookup
145* or -1 if no token
146* strtokr equivalent search for token in reverse direction
147*/
148static int getStrrtokenPos(char* str, int savedPos)
149{
150    int result =-1;
151    int i;
152
153    for(i=savedPos-1; i>=0; i--) {
154        if(isIDSeparator(*(str+i)) ){
155            /* delimiter found; check for singleton */
156            if(i>=2 && isIDSeparator(*(str+i-2)) ){
157                /* a singleton; so send the position of token before the singleton */
158                result = i-2;
159            } else {
160                result = i;
161            }
162            break;
163        }
164    }
165    if(result < 1){
166        /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
167        result =-1;
168    }
169    return result;
170}
171/* }}} */
172
173/* {{{
174* returns the position of a singleton if present
175* returns -1 if no singleton
176* strtok equivalent search for singleton
177*/
178static int getSingletonPos(const char* str)
179{
180    int result =-1;
181    int i=0;
182    int len = 0;
183
184    if( str && ((len=strlen(str))>0) ){
185        for( i=0; i<len ; i++){
186            if( isIDSeparator(*(str+i)) ){
187                if( i==1){
188                    /* string is of the form x-avy or a-prv1 */
189                    result =0;
190                    break;
191                } else {
192                    /* delimiter found; check for singleton */
193                    if( isIDSeparator(*(str+i+2)) ){
194                        /* a singleton; so send the position of separator before singleton */
195                        result = i+1;
196                        break;
197                    }
198                }
199            }
200        }/* end of for */
201
202    }
203    return result;
204}
205/* }}} */
206
207/* {{{ proto static string Locale::getDefault(  )
208   Get default locale */
209/* }}} */
210/* {{{ proto static string locale_get_default( )
211   Get default locale */
212PHP_NAMED_FUNCTION(zif_locale_get_default)
213{
214    RETURN_STRING( intl_locale_get_default(  ) );
215}
216
217/* }}} */
218
219/* {{{ proto static string Locale::setDefault( string $locale )
220   Set default locale */
221/* }}} */
222/* {{{ proto static string locale_set_default( string $locale )
223   Set default locale */
224PHP_NAMED_FUNCTION(zif_locale_set_default)
225{
226    zend_string* locale_name;
227    zend_string *ini_name;
228    char *default_locale = NULL;
229
230    if(zend_parse_parameters( ZEND_NUM_ARGS(),  "S", &locale_name) == FAILURE)
231    {
232        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
233                "locale_set_default: unable to parse input params", 0 );
234
235        RETURN_FALSE;
236    }
237
238    if (ZSTR_LEN(locale_name) == 0) {
239        default_locale = (char *)uloc_getDefault();
240        locale_name = zend_string_init(default_locale, strlen(default_locale), 0);
241    }
242
243    ini_name = zend_string_init(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME) - 1, 0);
244    zend_alter_ini_entry(ini_name, locale_name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
245    zend_string_release(ini_name);
246    if (default_locale != NULL) {
247        zend_string_release(locale_name);
248    }
249
250    RETURN_TRUE;
251}
252/* }}} */
253
254/* {{{
255* Gets the value from ICU
256* common code shared by get_primary_language,get_script or get_region or get_variant
257* result = 0 if error, 1 if successful , -1 if no value
258*/
259static char* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
260{
261    char*       tag_value   = NULL;
262    int32_t         tag_value_len   = 512;
263
264    int     singletonPos    = 0;
265    char*           mod_loc_name    = NULL;
266    int         grOffset    = 0;
267
268    int32_t         buflen          = 512;
269    UErrorCode      status          = U_ZERO_ERROR;
270
271
272    if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
273        /* Handle  grandfathered languages */
274        grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
275        if( grOffset >= 0 ){
276            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
277                return estrdup(loc_name);
278            } else {
279                /* Since Grandfathered , no value , do nothing , retutn NULL */
280                return NULL;
281            }
282        }
283
284    if( fromParseLocale==1 ){
285        /* Handle singletons */
286        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
287            if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
288                return estrdup(loc_name);
289            }
290        }
291
292        singletonPos = getSingletonPos( loc_name );
293        if( singletonPos == 0){
294            /* singleton at start of script, region , variant etc.
295             * or invalid singleton at start of language */
296            return NULL;
297        } else if( singletonPos > 0 ){
298            /* singleton at some position except at start
299             * strip off the singleton and rest of the loc_name */
300            mod_loc_name = estrndup ( loc_name , singletonPos-1);
301        }
302    } /* end of if fromParse */
303
304    } /* end of if != LOC_CANONICAL_TAG */
305
306    if( mod_loc_name == NULL){
307        mod_loc_name = estrdup(loc_name );
308    }
309
310    /* Proceed to ICU */
311    do{
312        tag_value = erealloc( tag_value , buflen  );
313        tag_value_len = buflen;
314
315        if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
316            buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
317        }
318        if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
319            buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
320        }
321        if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
322            buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
323        }
324        if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
325            buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
326        }
327        if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
328            buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
329        }
330
331        if( U_FAILURE( status ) ) {
332            if( status == U_BUFFER_OVERFLOW_ERROR ) {
333                status = U_ZERO_ERROR;
334                continue;
335            }
336
337            /* Error in retriving data */
338            *result = 0;
339            if( tag_value ){
340                efree( tag_value );
341            }
342            if( mod_loc_name ){
343                efree( mod_loc_name);
344            }
345            return NULL;
346        }
347    } while( buflen > tag_value_len );
348
349    if(  buflen ==0 ){
350        /* No value found */
351        *result = -1;
352        if( tag_value ){
353            efree( tag_value );
354        }
355        if( mod_loc_name ){
356            efree( mod_loc_name);
357        }
358        return NULL;
359    } else {
360        *result = 1;
361    }
362
363    if( mod_loc_name ){
364        efree( mod_loc_name);
365    }
366    return tag_value;
367}
368/* }}} */
369
370/* {{{
371* Gets the value from ICU , called when PHP userspace function is called
372* common code shared by get_primary_language,get_script or get_region or get_variant
373*/
374static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
375{
376
377    const char* loc_name            = NULL;
378    size_t         loc_name_len     = 0;
379
380    char*       tag_value       = NULL;
381    char*       empty_result    = "";
382
383    int         result          = 0;
384    char*       msg             = NULL;
385
386    UErrorCode  status              = U_ZERO_ERROR;
387
388    intl_error_reset( NULL );
389
390    if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
391    &loc_name ,&loc_name_len ) == FAILURE) {
392        spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
393        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 );
394        efree(msg);
395
396        RETURN_FALSE;
397    }
398
399    if(loc_name_len == 0) {
400        loc_name = intl_locale_get_default();
401    }
402
403    /* Call ICU get */
404    tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
405
406    /* No value found */
407    if( result == -1 ) {
408        if( tag_value){
409            efree( tag_value);
410        }
411        RETURN_STRING( empty_result);
412    }
413
414    /* value found */
415    if( tag_value){
416        RETVAL_STRING( tag_value );
417        //???
418        efree(tag_value);
419        return;
420    }
421
422    /* Error encountered while fetching the value */
423    if( result ==0) {
424        spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
425        intl_error_set( NULL, status, msg , 1 );
426        efree(msg);
427        RETURN_NULL();
428    }
429
430}
431/* }}} */
432
433/* {{{ proto static string Locale::getScript($locale)
434 * gets the script for the $locale
435 }}} */
436/* {{{ proto static string locale_get_script($locale)
437 * gets the script for the $locale
438 */
439PHP_FUNCTION( locale_get_script )
440{
441    get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
442}
443/* }}} */
444
445/* {{{ proto static string Locale::getRegion($locale)
446 * gets the region for the $locale
447 }}} */
448/* {{{ proto static string locale_get_region($locale)
449 * gets the region for the $locale
450 */
451PHP_FUNCTION( locale_get_region )
452{
453    get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
454}
455/* }}} */
456
457/* {{{ proto static string Locale::getPrimaryLanguage($locale)
458 * gets the primary language for the $locale
459 }}} */
460/* {{{ proto static string locale_get_primary_language($locale)
461 * gets the primary language for the $locale
462 */
463PHP_FUNCTION(locale_get_primary_language )
464{
465    get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
466}
467/* }}} */
468
469
470/* {{{
471 * common code shared by display_xyz functions to  get the value from ICU
472 }}} */
473static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
474{
475    const char* loc_name            = NULL;
476    size_t         loc_name_len     = 0;
477
478    const char* disp_loc_name       = NULL;
479    size_t      disp_loc_name_len   = 0;
480    int         free_loc_name       = 0;
481
482    UChar*      disp_name       = NULL;
483    int32_t     disp_name_len   = 0;
484
485    char*       mod_loc_name        = NULL;
486
487    int32_t     buflen              = 512;
488    UErrorCode  status              = U_ZERO_ERROR;
489
490    zend_string* u8str;
491
492    char*       msg                 = NULL;
493    int         grOffset        = 0;
494
495    intl_error_reset( NULL );
496
497    if(zend_parse_parameters( ZEND_NUM_ARGS(), "s|s",
498        &loc_name, &loc_name_len ,
499        &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
500    {
501        spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
502        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 );
503        efree(msg);
504        RETURN_FALSE;
505    }
506
507    if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
508        /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
509        spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
510        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 );
511        efree(msg);
512        RETURN_FALSE;
513    }
514
515    if(loc_name_len == 0) {
516        loc_name = intl_locale_get_default();
517    }
518
519    if( strcmp(tag_name, DISP_NAME) != 0 ){
520        /* Handle grandfathered languages */
521        grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
522        if( grOffset >= 0 ){
523            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
524                mod_loc_name = getPreferredTag( loc_name );
525            } else {
526                /* Since Grandfathered, no value, do nothing, retutn NULL */
527                RETURN_FALSE;
528            }
529        }
530    } /* end of if != LOC_CANONICAL_TAG */
531
532    if( mod_loc_name==NULL ){
533        mod_loc_name = estrdup( loc_name );
534    }
535
536    /* Check if disp_loc_name passed , if not use default locale */
537    if( !disp_loc_name){
538        disp_loc_name = estrdup(intl_locale_get_default());
539        free_loc_name = 1;
540    }
541
542    /* Get the disp_value for the given locale */
543    do{
544        disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
545        disp_name_len = buflen;
546
547        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
548            buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
549        } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
550            buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
551        } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
552            buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
553        } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
554            buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
555        } else if( strcmp(tag_name , DISP_NAME)==0 ){
556            buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
557        }
558
559        /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
560        if( U_FAILURE( status ) )
561        {
562            if( status == U_BUFFER_OVERFLOW_ERROR )
563            {
564                status = U_ZERO_ERROR;
565                continue;
566            }
567
568            spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
569            intl_error_set( NULL, status, msg , 1 );
570            efree(msg);
571            if( disp_name){
572                efree( disp_name );
573            }
574            if( mod_loc_name){
575                efree( mod_loc_name );
576            }
577            if (free_loc_name) {
578                efree((void *)disp_loc_name);
579                disp_loc_name = NULL;
580            }
581            RETURN_FALSE;
582        }
583    } while( buflen > disp_name_len );
584
585    if( mod_loc_name){
586        efree( mod_loc_name );
587    }
588    if (free_loc_name) {
589        efree((void *)disp_loc_name);
590        disp_loc_name = NULL;
591    }
592    /* Convert display locale name from UTF-16 to UTF-8. */
593    u8str = intl_convert_utf16_to_utf8(disp_name, buflen, &status );
594    efree( disp_name );
595    if( !u8str )
596    {
597        spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
598        intl_error_set( NULL, status, msg , 1 );
599        efree(msg);
600        RETURN_FALSE;
601    }
602
603    RETVAL_NEW_STR( u8str );
604}
605/* }}} */
606
607/* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
608* gets the name for the $locale in $in_locale or default_locale
609 }}} */
610/* {{{ proto static string get_display_name($locale[, $in_locale = null])
611* gets the name for the $locale in $in_locale or default_locale
612*/
613PHP_FUNCTION(locale_get_display_name)
614{
615    get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
616}
617/* }}} */
618
619/* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
620* gets the language for the $locale in $in_locale or default_locale
621 }}} */
622/* {{{ proto static string get_display_language($locale[, $in_locale = null])
623* gets the language for the $locale in $in_locale or default_locale
624*/
625PHP_FUNCTION(locale_get_display_language)
626{
627    get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
628}
629/* }}} */
630
631/* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
632* gets the script for the $locale in $in_locale or default_locale
633 }}} */
634/* {{{ proto static string get_display_script($locale, $in_locale = null)
635* gets the script for the $locale in $in_locale or default_locale
636*/
637PHP_FUNCTION(locale_get_display_script)
638{
639    get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
640}
641/* }}} */
642
643/* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
644* gets the region for the $locale in $in_locale or default_locale
645 }}} */
646/* {{{ proto static string get_display_region($locale, $in_locale = null)
647* gets the region for the $locale in $in_locale or default_locale
648*/
649PHP_FUNCTION(locale_get_display_region)
650{
651    get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
652}
653/* }}} */
654
655/* {{{
656* proto static string Locale::getDisplayVariant($locale, $in_locale = null)
657* gets the variant for the $locale in $in_locale or default_locale
658 }}} */
659/* {{{
660* proto static string get_display_variant($locale, $in_locale = null)
661* gets the variant for the $locale in $in_locale or default_locale
662*/
663PHP_FUNCTION(locale_get_display_variant)
664{
665    get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
666}
667/* }}} */
668
669 /* {{{ proto static array getKeywords(string $locale) {
670 * return an associative array containing keyword-value
671 * pairs for this locale. The keys are keys to the array (doh!)
672 * }}}*/
673 /* {{{ proto static array locale_get_keywords(string $locale) {
674 * return an associative array containing keyword-value
675 * pairs for this locale. The keys are keys to the array (doh!)
676 */
677PHP_FUNCTION( locale_get_keywords )
678{
679    UEnumeration*   e        = NULL;
680    UErrorCode      status   = U_ZERO_ERROR;
681
682    const char*     kw_key        = NULL;
683    int32_t         kw_key_len    = 0;
684
685    const char*         loc_name        = NULL;
686    size_t              loc_name_len    = 0;
687
688/*
689    ICU expects the buffer to be allocated  before calling the function
690    and so the buffer size has been explicitly specified
691    ICU uloc.h #define  ULOC_KEYWORD_AND_VALUES_CAPACITY   100
692    hence the kw_value buffer size is 100
693*/
694    zend_string *kw_value_str;
695    int32_t     kw_value_len = 100;
696
697    intl_error_reset( NULL );
698
699    if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
700        &loc_name, &loc_name_len ) == FAILURE)
701    {
702        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
703             "locale_get_keywords: unable to parse input params", 0 );
704
705        RETURN_FALSE;
706    }
707
708    if(loc_name_len == 0) {
709        loc_name = intl_locale_get_default();
710    }
711
712    /* Get the keywords */
713    e = uloc_openKeywords( loc_name, &status );
714    if( e != NULL )
715    {
716        /* Traverse it, filling the return array. */
717        array_init( return_value );
718
719        while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
720            kw_value_len = 100;
721            kw_value_str = zend_string_alloc(kw_value_len, 0);
722
723            /* Get the keyword value for each keyword */
724            kw_value_len=uloc_getKeywordValue( loc_name, kw_key, ZSTR_VAL(kw_value_str), kw_value_len, &status );
725            if (status == U_BUFFER_OVERFLOW_ERROR) {
726                status = U_ZERO_ERROR;
727                kw_value_str = zend_string_extend(kw_value_str, kw_value_len, 0);
728                kw_value_len=uloc_getKeywordValue( loc_name,kw_key, ZSTR_VAL(kw_value_str), kw_value_len+1, &status );
729            } else if(!U_FAILURE(status)) {
730                kw_value_str = zend_string_truncate(kw_value_str, kw_value_len, 0);
731            }
732            if (U_FAILURE(status)) {
733                intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 );
734                if( kw_value_str){
735                    zend_string_free( kw_value_str );
736                }
737                zval_dtor(return_value);
738                RETURN_FALSE;
739            }
740
741            add_assoc_str( return_value, (char *)kw_key, kw_value_str);
742        } /* end of while */
743
744    } /* end of if e!=NULL */
745
746    uenum_close( e );
747}
748/* }}} */
749
750 /* {{{ proto static string Locale::canonicalize($locale)
751 * @return string the canonicalized locale
752 * }}} */
753 /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
754 * @param string $locale    The locale string to canonicalize
755 */
756PHP_FUNCTION(locale_canonicalize)
757{
758    get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
759}
760/* }}} */
761
762/* {{{ append_key_value
763* Internal function which is called from locale_compose
764* gets the value for the key_name and appends to the loc_name
765* returns 1 if successful , -1 if not found ,
766* 0 if array element is not a string , -2 if buffer-overflow
767*/
768static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
769{
770    zval *ele_value;
771
772    if ((ele_value = zend_hash_str_find(hash_arr , key_name, strlen(key_name))) != NULL ) {
773        if(Z_TYPE_P(ele_value)!= IS_STRING ){
774            /* element value is not a string */
775            return FAILURE;
776        }
777        if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
778           strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
779            /* not lang or grandfathered tag */
780            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
781        }
782        smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
783        return SUCCESS;
784    }
785
786    return LOC_NOT_FOUND;
787}
788/* }}} */
789
790/* {{{ append_prefix , appends the prefix needed
791* e.g. private adds 'x'
792*/
793static void add_prefix(smart_str* loc_name, char* key_name)
794{
795    if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
796        smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
797        smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
798    }
799}
800/* }}} */
801
802/* {{{ append_multiple_key_values
803* Internal function which is called from locale_compose
804* gets the multiple values for the key_name and appends to the loc_name
805* used for 'variant','extlang','private'
806* returns 1 if successful , -1 if not found ,
807* 0 if array element is not a string , -2 if buffer-overflow
808*/
809static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name)
810{
811    zval    *ele_value;
812    int     i       = 0;
813    int     isFirstSubtag   = 0;
814    int     max_value   = 0;
815
816    /* Variant/ Extlang/Private etc. */
817    if ((ele_value = zend_hash_str_find( hash_arr , key_name , strlen(key_name))) != NULL) {
818        if( Z_TYPE_P(ele_value) == IS_STRING ){
819            add_prefix( loc_name , key_name);
820
821            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
822            smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
823            return SUCCESS;
824        } else if(Z_TYPE_P(ele_value) == IS_ARRAY ) {
825            HashTable *arr = HASH_OF(ele_value);
826            zval *data;
827
828            ZEND_HASH_FOREACH_VAL(arr, data) {
829                if(Z_TYPE_P(data) != IS_STRING) {
830                    return FAILURE;
831                }
832                if (isFirstSubtag++ == 0){
833                    add_prefix(loc_name , key_name);
834                }
835                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
836                smart_str_appendl(loc_name, Z_STRVAL_P(data) , Z_STRLEN_P(data));
837            } ZEND_HASH_FOREACH_END();
838            return SUCCESS;
839        } else {
840            return FAILURE;
841        }
842    } else {
843        char cur_key_name[31];
844        /* Decide the max_value: the max. no. of elements allowed */
845        if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
846            max_value  = MAX_NO_VARIANT;
847        }
848        if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
849            max_value  = MAX_NO_EXTLANG;
850        }
851        if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
852            max_value  = MAX_NO_PRIVATE;
853        }
854
855        /* Multiple variant values as variant0, variant1 ,variant2 */
856        isFirstSubtag = 0;
857        for( i=0 ; i< max_value; i++ ){
858            snprintf( cur_key_name , 30, "%s%d", key_name , i);
859            if ((ele_value = zend_hash_str_find( hash_arr , cur_key_name , strlen(cur_key_name))) != NULL) {
860                if( Z_TYPE_P(ele_value)!= IS_STRING ){
861                    /* variant is not a string */
862                    return FAILURE;
863                }
864                /* Add the contents */
865                if (isFirstSubtag++ == 0){
866                    add_prefix(loc_name , cur_key_name);
867                }
868                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
869                smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
870            }
871        } /* end of for */
872    } /* end of else */
873
874    return SUCCESS;
875}
876/* }}} */
877
878/*{{{
879* If applicable sets error message and aborts locale_compose gracefully
880* returns 0  if locale_compose needs to be aborted
881* otherwise returns 1
882*/
883static int handleAppendResult( int result, smart_str* loc_name)
884{
885    intl_error_reset( NULL );
886    if( result == FAILURE) {
887        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
888             "locale_compose: parameter array element is not a string", 0 );
889        smart_str_free(loc_name);
890        return 0;
891    }
892    return 1;
893}
894/* }}} */
895
896#define RETURN_SMART_STR(str) smart_str_0((str)); RETURN_NEW_STR((str)->s)
897/* {{{ proto static string Locale::composeLocale($array)
898* Creates a locale by combining the parts of locale-ID passed
899* }}} */
900/* {{{ proto static string compose_locale($array)
901* Creates a locale by combining the parts of locale-ID passed
902* }}} */
903PHP_FUNCTION(locale_compose)
904{
905    smart_str       loc_name_s = {0};
906    smart_str *loc_name = &loc_name_s;
907    zval*           arr = NULL;
908    HashTable*      hash_arr = NULL;
909    int             result = 0;
910
911    intl_error_reset( NULL );
912
913    if(zend_parse_parameters( ZEND_NUM_ARGS(), "a",
914        &arr) == FAILURE)
915    {
916        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
917             "locale_compose: unable to parse input params", 0 );
918        RETURN_FALSE;
919    }
920
921    hash_arr = HASH_OF( arr );
922
923    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
924        RETURN_FALSE;
925
926    /* Check for grandfathered first */
927    result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
928    if( result == SUCCESS){
929        RETURN_SMART_STR(loc_name);
930    }
931    if( !handleAppendResult( result, loc_name)){
932        RETURN_FALSE;
933    }
934
935    /* Not grandfathered */
936    result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
937    if( result == LOC_NOT_FOUND ){
938        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
939        "locale_compose: parameter array does not contain 'language' tag.", 0 );
940        smart_str_free(loc_name);
941        RETURN_FALSE;
942    }
943    if( !handleAppendResult( result, loc_name)){
944        RETURN_FALSE;
945    }
946
947    /* Extlang */
948    result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG);
949    if( !handleAppendResult( result, loc_name)){
950        RETURN_FALSE;
951    }
952
953    /* Script */
954    result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
955    if( !handleAppendResult( result, loc_name)){
956        RETURN_FALSE;
957    }
958
959    /* Region */
960    result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
961    if( !handleAppendResult( result, loc_name)){
962        RETURN_FALSE;
963    }
964
965    /* Variant */
966    result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG);
967    if( !handleAppendResult( result, loc_name)){
968        RETURN_FALSE;
969    }
970
971    /* Private */
972    result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG);
973    if( !handleAppendResult( result, loc_name)){
974        RETURN_FALSE;
975    }
976
977    RETURN_SMART_STR(loc_name);
978}
979/* }}} */
980
981
982/*{{{
983* Parses the locale and returns private subtags  if existing
984* else returns NULL
985* e.g. for locale='en_US-x-prv1-prv2-prv3'
986* returns a pointer to the string 'prv1-prv2-prv3'
987*/
988static char* get_private_subtags(const char* loc_name)
989{
990    char*   result =NULL;
991    int     singletonPos = 0;
992    int     len =0;
993    const char*     mod_loc_name =NULL;
994
995    if( loc_name && (len = strlen(loc_name)>0 ) ){
996        mod_loc_name = loc_name ;
997        len   = strlen(mod_loc_name);
998        while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
999
1000            if( singletonPos!=-1){
1001                if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
1002                    /* private subtag start found */
1003                    if( singletonPos + 2 ==  len){
1004                        /* loc_name ends with '-x-' ; return  NULL */
1005                    }
1006                    else{
1007                        /* result = mod_loc_name + singletonPos +2; */
1008                        result = estrndup(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ) );
1009                    }
1010                    break;
1011                }
1012                else{
1013                    if( singletonPos + 1 >=  len){
1014                        /* String end */
1015                        break;
1016                    } else {
1017                        /* singleton found but not a private subtag , hence check further in the string for the private subtag */
1018                        mod_loc_name = mod_loc_name + singletonPos +1;
1019                        len = strlen(mod_loc_name);
1020                    }
1021                }
1022            }
1023
1024        } /* end of while */
1025    }
1026
1027    return result;
1028}
1029/* }}} */
1030
1031/* {{{ code used by locale_parse
1032*/
1033static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name)
1034{
1035    char*   key_value   = NULL;
1036    char*   cur_key_name    = NULL;
1037    char*   token           = NULL;
1038    char*   last_ptr    = NULL;
1039
1040    int result      = 0;
1041    int     cur_result      = 0;
1042    int     cnt         = 0;
1043
1044
1045    if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1046        key_value = get_private_subtags( loc_name );
1047        result = 1;
1048    } else {
1049        key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1050    }
1051    if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1052        ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1053        if( result > 0 && key_value){
1054            /* Tokenize on the "_" or "-"  */
1055            token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
1056            if( cur_key_name ){
1057                efree( cur_key_name);
1058            }
1059            cur_key_name = (char*)ecalloc( 25,  25);
1060            sprintf( cur_key_name , "%s%d", key_name , cnt++);
1061            add_assoc_string( hash_arr, cur_key_name , token);
1062            /* tokenize on the "_" or "-" and stop  at singleton if any */
1063            while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1064                sprintf( cur_key_name , "%s%d", key_name , cnt++);
1065                add_assoc_string( hash_arr, cur_key_name , token);
1066            }
1067/*
1068            if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1069            }
1070*/
1071        }
1072    } else {
1073        if( result == 1 ){
1074            add_assoc_string( hash_arr, key_name , key_value);
1075            cur_result = 1;
1076        }
1077    }
1078
1079    if( cur_key_name ){
1080        efree( cur_key_name);
1081    }
1082    /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1083    if( key_value){
1084        efree(key_value);
1085    }
1086    return cur_result;
1087}
1088/* }}} */
1089
1090/* {{{ proto static array Locale::parseLocale($locale)
1091* parses a locale-id into an array the different parts of it
1092 }}} */
1093/* {{{ proto static array parse_locale($locale)
1094* parses a locale-id into an array the different parts of it
1095*/
1096PHP_FUNCTION(locale_parse)
1097{
1098    const char* loc_name        = NULL;
1099    size_t         loc_name_len    = 0;
1100    int         grOffset        = 0;
1101
1102    intl_error_reset( NULL );
1103
1104    if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1105        &loc_name, &loc_name_len ) == FAILURE)
1106    {
1107        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1108             "locale_parse: unable to parse input params", 0 );
1109
1110        RETURN_FALSE;
1111    }
1112
1113    if(loc_name_len == 0) {
1114        loc_name = intl_locale_get_default();
1115    }
1116
1117    array_init( return_value );
1118
1119    grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1120    if( grOffset >= 0 ){
1121        add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG, (char *)loc_name);
1122    }
1123    else{
1124        /* Not grandfathered */
1125        add_array_entry( loc_name , return_value , LOC_LANG_TAG);
1126        add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG);
1127        add_array_entry( loc_name , return_value , LOC_REGION_TAG);
1128        add_array_entry( loc_name , return_value , LOC_VARIANT_TAG);
1129        add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG);
1130    }
1131}
1132/* }}} */
1133
1134/* {{{ proto static array Locale::getAllVariants($locale)
1135* gets an array containing the list of variants, or null
1136 }}} */
1137/* {{{ proto static array locale_get_all_variants($locale)
1138* gets an array containing the list of variants, or null
1139*/
1140PHP_FUNCTION(locale_get_all_variants)
1141{
1142    const char*     loc_name        = NULL;
1143    size_t          loc_name_len    = 0;
1144
1145    int result      = 0;
1146    char*   token       = NULL;
1147    char*   variant     = NULL;
1148    char*   saved_ptr   = NULL;
1149
1150    intl_error_reset( NULL );
1151
1152    if(zend_parse_parameters( ZEND_NUM_ARGS(), "s",
1153    &loc_name, &loc_name_len ) == FAILURE)
1154    {
1155        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1156         "locale_parse: unable to parse input params", 0 );
1157
1158        RETURN_FALSE;
1159    }
1160
1161    if(loc_name_len == 0) {
1162        loc_name = intl_locale_get_default();
1163    }
1164
1165
1166    array_init( return_value );
1167
1168    /* If the locale is grandfathered, stop, no variants */
1169    if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1170        /* ("Grandfathered Tag. No variants."); */
1171    }
1172    else {
1173    /* Call ICU variant */
1174        variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1175        if( result > 0 && variant){
1176            /* Tokenize on the "_" or "-" */
1177            token = php_strtok_r( variant , DELIMITER , &saved_ptr);
1178            add_next_index_stringl( return_value, token , strlen(token));
1179            /* tokenize on the "_" or "-" and stop  at singleton if any */
1180            while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1181                add_next_index_stringl( return_value, token , strlen(token));
1182            }
1183        }
1184        if( variant ){
1185            efree( variant );
1186        }
1187    }
1188
1189
1190}
1191/* }}} */
1192
1193/*{{{
1194* Converts to lower case and also replaces all hyphens with the underscore
1195*/
1196static int strToMatch(const char* str ,char *retstr)
1197{
1198    char*   anchor  = NULL;
1199    const char*     anchor1 = NULL;
1200    int     result  = 0;
1201
1202    if( (!str) || str[0] == '\0'){
1203        return result;
1204    } else {
1205    anchor = retstr;
1206    anchor1 = str;
1207        while( (*str)!='\0' ){
1208        if( *str == '-' ){
1209            *retstr =  '_';
1210        } else {
1211            *retstr = tolower(*str);
1212        }
1213            str++;
1214            retstr++;
1215    }
1216    *retstr = '\0';
1217    retstr=  anchor;
1218    str=  anchor1;
1219    result = 1;
1220    }
1221
1222    return(result);
1223}
1224/* }}} */
1225
1226/* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1227* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1228*/
1229/* }}} */
1230/* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1231* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1232*/
1233PHP_FUNCTION(locale_filter_matches)
1234{
1235    char*           lang_tag        = NULL;
1236    size_t          lang_tag_len    = 0;
1237    const char*     loc_range       = NULL;
1238    size_t          loc_range_len   = 0;
1239
1240    int     result      = 0;
1241    char*       token       = 0;
1242    char*       chrcheck    = NULL;
1243
1244    char*           can_lang_tag    = NULL;
1245    char*           can_loc_range   = NULL;
1246
1247    char*           cur_lang_tag    = NULL;
1248    char*           cur_loc_range   = NULL;
1249
1250    zend_bool   boolCanonical   = 0;
1251    UErrorCode  status      = U_ZERO_ERROR;
1252
1253    intl_error_reset( NULL );
1254
1255    if(zend_parse_parameters( ZEND_NUM_ARGS(), "ss|b",
1256        &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1257        &boolCanonical) == FAILURE)
1258    {
1259        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1260        "locale_filter_matches: unable to parse input params", 0 );
1261
1262        RETURN_FALSE;
1263    }
1264
1265    if(loc_range_len == 0) {
1266        loc_range = intl_locale_get_default();
1267    }
1268
1269    if( strcmp(loc_range,"*")==0){
1270        RETURN_TRUE;
1271    }
1272
1273    if( boolCanonical ){
1274        /* canonicalize loc_range */
1275        can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1276        if( result ==0) {
1277            intl_error_set( NULL, status,
1278                "locale_filter_matches : unable to canonicalize loc_range" , 0 );
1279            RETURN_FALSE;
1280        }
1281
1282        /* canonicalize lang_tag */
1283        can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1284        if( result ==0) {
1285            intl_error_set( NULL, status,
1286                "locale_filter_matches : unable to canonicalize lang_tag" , 0 );
1287            RETURN_FALSE;
1288        }
1289
1290        /* Convert to lower case for case-insensitive comparison */
1291        cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1292
1293        /* Convert to lower case for case-insensitive comparison */
1294        result = strToMatch( can_lang_tag , cur_lang_tag);
1295        if( result == 0) {
1296            efree( cur_lang_tag );
1297            efree( can_lang_tag );
1298            RETURN_FALSE;
1299        }
1300
1301        cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1302        result = strToMatch( can_loc_range , cur_loc_range );
1303        if( result == 0) {
1304            efree( cur_lang_tag );
1305            efree( can_lang_tag );
1306            efree( cur_loc_range );
1307            efree( can_loc_range );
1308            RETURN_FALSE;
1309        }
1310
1311        /* check if prefix */
1312        token   = strstr( cur_lang_tag , cur_loc_range );
1313
1314        if( token && (token==cur_lang_tag) ){
1315            /* check if the char. after match is SEPARATOR */
1316            chrcheck = token + (strlen(cur_loc_range));
1317            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1318                if( cur_lang_tag){
1319                    efree( cur_lang_tag );
1320                }
1321                if( cur_loc_range){
1322                    efree( cur_loc_range );
1323                }
1324                if( can_lang_tag){
1325                    efree( can_lang_tag );
1326                }
1327                if( can_loc_range){
1328                    efree( can_loc_range );
1329                }
1330                RETURN_TRUE;
1331            }
1332        }
1333
1334        /* No prefix as loc_range */
1335        if( cur_lang_tag){
1336            efree( cur_lang_tag );
1337        }
1338        if( cur_loc_range){
1339            efree( cur_loc_range );
1340        }
1341        if( can_lang_tag){
1342            efree( can_lang_tag );
1343        }
1344        if( can_loc_range){
1345            efree( can_loc_range );
1346        }
1347        RETURN_FALSE;
1348
1349    } /* end of if isCanonical */
1350    else{
1351        /* Convert to lower case for case-insensitive comparison */
1352        cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1353
1354        result = strToMatch( lang_tag , cur_lang_tag);
1355        if( result == 0) {
1356            efree( cur_lang_tag );
1357            RETURN_FALSE;
1358        }
1359        cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1360        result = strToMatch( loc_range , cur_loc_range );
1361        if( result == 0) {
1362            efree( cur_lang_tag );
1363            efree( cur_loc_range );
1364            RETURN_FALSE;
1365        }
1366
1367        /* check if prefix */
1368        token   = strstr( cur_lang_tag , cur_loc_range );
1369
1370        if( token && (token==cur_lang_tag) ){
1371            /* check if the char. after match is SEPARATOR */
1372            chrcheck = token + (strlen(cur_loc_range));
1373            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1374                if( cur_lang_tag){
1375                    efree( cur_lang_tag );
1376                }
1377                if( cur_loc_range){
1378                    efree( cur_loc_range );
1379                }
1380                RETURN_TRUE;
1381            }
1382        }
1383
1384        /* No prefix as loc_range */
1385        if( cur_lang_tag){
1386            efree( cur_lang_tag );
1387        }
1388        if( cur_loc_range){
1389            efree( cur_loc_range );
1390        }
1391        RETURN_FALSE;
1392
1393    }
1394}
1395/* }}} */
1396
1397static void array_cleanup( char* arr[] , int arr_size)
1398{
1399    int i=0;
1400    for( i=0; i< arr_size; i++ ){
1401        if( arr[i*2] ){
1402            efree( arr[i*2]);
1403        }
1404    }
1405    efree(arr);
1406}
1407
1408#define LOOKUP_CLEAN_RETURN(value)  array_cleanup(cur_arr, cur_arr_len); return (value)
1409/* {{{
1410* returns the lookup result to lookup_loc_range_src_php
1411* internal function
1412*/
1413static zend_string* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize )
1414{
1415    int i = 0;
1416    int cur_arr_len = 0;
1417    int result = 0;
1418
1419    char* lang_tag = NULL;
1420    zval* ele_value = NULL;
1421    char** cur_arr = NULL;
1422
1423    char* cur_loc_range = NULL;
1424    char* can_loc_range = NULL;
1425    int saved_pos = 0;
1426
1427    zend_string* return_value = NULL;
1428
1429    cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1430    ZEND_HASH_FOREACH_VAL(hash_arr, ele_value) {
1431    /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1432        if(Z_TYPE_P(ele_value)!= IS_STRING) {
1433            /* element value is not a string */
1434            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0);
1435            LOOKUP_CLEAN_RETURN(NULL);
1436        }
1437        cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_P(ele_value), Z_STRLEN_P(ele_value));
1438        result = strToMatch(Z_STRVAL_P(ele_value), cur_arr[cur_arr_len*2]);
1439        if(result == 0) {
1440            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0);
1441            LOOKUP_CLEAN_RETURN(NULL);
1442        }
1443        cur_arr[cur_arr_len*2+1] = Z_STRVAL_P(ele_value);
1444        cur_arr_len++ ;
1445    } ZEND_HASH_FOREACH_END(); /* end of for */
1446
1447    /* Canonicalize array elements */
1448    if(canonicalize) {
1449        for(i=0; i<cur_arr_len; i++) {
1450            lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1451            if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1452                if(lang_tag) {
1453                    efree(lang_tag);
1454                }
1455                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1456                LOOKUP_CLEAN_RETURN(NULL);
1457            }
1458            cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1459            result = strToMatch(lang_tag, cur_arr[i*2]);
1460            efree(lang_tag);
1461            if(result == 0) {
1462                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1463                LOOKUP_CLEAN_RETURN(NULL);
1464            }
1465        }
1466
1467    }
1468
1469    if(canonicalize) {
1470        /* Canonicalize the loc_range */
1471        can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1472        if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1473            /* Error */
1474            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 );
1475            if(can_loc_range) {
1476                efree(can_loc_range);
1477            }
1478            LOOKUP_CLEAN_RETURN(NULL);
1479        } else {
1480            loc_range = can_loc_range;
1481        }
1482    }
1483
1484    cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1485    /* convert to lower and replace hyphens */
1486    result = strToMatch(loc_range, cur_loc_range);
1487    if(can_loc_range) {
1488        efree(can_loc_range);
1489    }
1490    if(result == 0) {
1491        intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0);
1492        LOOKUP_CLEAN_RETURN(NULL);
1493    }
1494
1495    /* Lookup for the lang_tag match */
1496    saved_pos = strlen(cur_loc_range);
1497    while(saved_pos > 0) {
1498        for(i=0; i< cur_arr_len; i++){
1499            if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1500                /* Match found */
1501                char *str = canonicalize ? cur_arr[i*2] : cur_arr[i*2+1];
1502                return_value = zend_string_init(str, strlen(str), 0);
1503                efree(cur_loc_range);
1504                LOOKUP_CLEAN_RETURN(return_value);
1505            }
1506        }
1507        saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1508    }
1509
1510    /* Match not found */
1511    efree(cur_loc_range);
1512    LOOKUP_CLEAN_RETURN(NULL);
1513}
1514/* }}} */
1515
1516/* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1517* Searchs the items in $langtag for the best match to the language
1518* range
1519*/
1520/* }}} */
1521/* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1522* Searchs the items in $langtag for the best match to the language
1523* range
1524*/
1525PHP_FUNCTION(locale_lookup)
1526{
1527    zend_string*    fallback_loc_str    = NULL;
1528    const char*     loc_range           = NULL;
1529    size_t          loc_range_len       = 0;
1530
1531    zval*       arr             = NULL;
1532    HashTable*  hash_arr        = NULL;
1533    zend_bool   boolCanonical   = 0;
1534    zend_string*    result_str  = NULL;
1535
1536    intl_error_reset( NULL );
1537
1538    if(zend_parse_parameters( ZEND_NUM_ARGS(), "as|bS", &arr, &loc_range, &loc_range_len,
1539        &boolCanonical, &fallback_loc_str) == FAILURE) {
1540        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_lookup: unable to parse input params", 0 );
1541        RETURN_FALSE;
1542    }
1543
1544    if(loc_range_len == 0) {
1545        loc_range = intl_locale_get_default();
1546    }
1547
1548    hash_arr = HASH_OF(arr);
1549
1550    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1551        RETURN_EMPTY_STRING();
1552    }
1553
1554    result_str = lookup_loc_range(loc_range, hash_arr, boolCanonical);
1555    if(result_str == NULL || ZSTR_VAL(result_str)[0] == '\0') {
1556        if( fallback_loc_str ) {
1557            result_str = zend_string_copy(fallback_loc_str);
1558        } else {
1559            RETURN_EMPTY_STRING();
1560        }
1561    }
1562
1563    RETURN_STR(result_str);
1564}
1565/* }}} */
1566
1567/* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1568* Tries to find out best available locale based on HTTP �Accept-Language� header
1569*/
1570/* }}} */
1571/* {{{ proto string locale_accept_from_http(string $http_accept)
1572* Tries to find out best available locale based on HTTP �Accept-Language� header
1573*/
1574PHP_FUNCTION(locale_accept_from_http)
1575{
1576    UEnumeration *available;
1577    char *http_accept = NULL;
1578    size_t http_accept_len;
1579    UErrorCode status = 0;
1580    int len;
1581    char resultLocale[INTL_MAX_LOCALE_LEN+1];
1582    UAcceptResult outResult;
1583
1584    if(zend_parse_parameters( ZEND_NUM_ARGS(), "s", &http_accept, &http_accept_len) == FAILURE)
1585    {
1586        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1587        "locale_accept_from_http: unable to parse input parameters", 0 );
1588        RETURN_FALSE;
1589    }
1590
1591    available = ures_openAvailableLocales(NULL, &status);
1592    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1593    len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1594                        &outResult, http_accept, available, &status);
1595    uenum_close(available);
1596    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1597    if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1598        RETURN_FALSE;
1599    }
1600    RETURN_STRINGL(resultLocale, len);
1601}
1602/* }}} */
1603
1604/*
1605 * Local variables:
1606 * tab-width: 4
1607 * c-basic-offset: 4
1608 * End:
1609 * vim600: noet sw=4 ts=4 fdm=marker
1610 * vim<600: noet sw=4 ts=4
1611 *can_loc_len
1612*/
1613