1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | http://www.php.net/license/3_01.txt                                  |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
14   +----------------------------------------------------------------------+
15*/
16
17/* $Id$ */
18
19#ifdef HAVE_CONFIG_H
20#include "config.h"
21#endif
22
23#include <unicode/ustring.h>
24#include <unicode/udata.h>
25#include <unicode/putil.h>
26#include <unicode/ures.h>
27
28#include "php_intl.h"
29#include "locale.h"
30#include "locale_class.h"
31#include "locale_methods.h"
32#include "intl_convert.h"
33#include "intl_data.h"
34
35#include <zend_API.h>
36#include <zend.h>
37#include <php.h>
38#include "main/php_ini.h"
39#include "ext/standard/php_smart_str.h"
40
41ZEND_EXTERN_MODULE_GLOBALS( intl )
42
43/* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44#define SEPARATOR "_"
45#define SEPARATOR1 "-"
46#define DELIMITER "-_"
47#define EXTLANG_PREFIX "a"
48#define PRIVATE_PREFIX "x"
49#define DISP_NAME "name"
50
51#define MAX_NO_VARIANT  15
52#define MAX_NO_EXTLANG  3
53#define MAX_NO_PRIVATE  15
54#define MAX_NO_LOOKUP_LANG_TAG  100
55
56#define LOC_NOT_FOUND 1
57
58/* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59#define VARIANT_KEYNAME_LEN  11
60#define EXTLANG_KEYNAME_LEN  10
61#define PRIVATE_KEYNAME_LEN  11
62
63/* Based on IANA registry at the time of writing this code
64*
65*/
66static const char * const LOC_GRANDFATHERED[] = {
67    "art-lojban",       "i-klingon",        "i-lux",            "i-navajo",     "no-bok",       "no-nyn",
68    "cel-gaulish",      "en-GB-oed",        "i-ami",
69    "i-bnn",        "i-default",        "i-enochian",
70    "i-mingo",      "i-pwn",        "i-tao",
71    "i-tay",        "i-tsu",        "sgn-BE-fr",
72    "sgn-BE-nl",        "sgn-CH-de",        "zh-cmn",
73    "zh-cmn-Hans",      "zh-cmn-Hant",      "zh-gan" ,
74    "zh-guoyu",         "zh-hakka",         "zh-min",
75    "zh-min-nan",       "zh-wuu",       "zh-xiang",
76    "zh-yue",       NULL
77};
78
79/* Based on IANA registry at the time of writing this code
80*  This array lists the preferred values for the grandfathered tags if applicable
81*  This is in sync with the array LOC_GRANDFATHERED
82*  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
83*/
84static const int        LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85static const char * const   LOC_PREFERRED_GRANDFATHERED[]  = {
86    "jbo",          "tlh",          "lb",
87    "nv",           "nb",           "nn",
88    NULL
89};
90
91/*returns TRUE if a is an ID separator FALSE otherwise*/
92#define isIDSeparator(a) (a == '_' || a == '-')
93#define isKeywordSeparator(a) (a == '@' )
94#define isEndOfTag(a) (a == '\0' )
95
96#define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97
98/*returns TRUE if one of the special prefixes is here (s=string)
99  'x-' or 'i-' */
100#define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101#define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102
103/* Dot terminates it because of POSIX form  where dot precedes the codepage
104 * except for variant */
105#define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
106
107/* {{{ return the offset of 'key' in the array 'list'.
108 * returns -1 if not present */
109static int16_t findOffset(const char* const* list, const char* key)
110{
111    const char* const* anchor = list;
112    while (*list != NULL) {
113        if (strcmp(key, *list) == 0) {
114            return (int16_t)(list - anchor);
115        }
116        list++;
117    }
118
119    return -1;
120
121}
122/*}}}*/
123
124static char* getPreferredTag(const char* gf_tag)
125{
126    char* result = NULL;
127    int grOffset = 0;
128
129    grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130    if(grOffset < 0) {
131        return NULL;
132    }
133    if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
134        /* return preferred tag */
135        result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
136    } else {
137        /* Return correct grandfathered language tag */
138        result = estrdup( LOC_GRANDFATHERED[grOffset] );
139    }
140    return result;
141}
142
143/* {{{
144* returns the position of next token for lookup
145* or -1 if no token
146* strtokr equivalent search for token in reverse direction
147*/
148static int getStrrtokenPos(char* str, int savedPos)
149{
150    int result =-1;
151    int i;
152
153    for(i=savedPos-1; i>=0; i--) {
154        if(isIDSeparator(*(str+i)) ){
155            /* delimiter found; check for singleton */
156            if(i>=2 && isIDSeparator(*(str+i-2)) ){
157                /* a singleton; so send the position of token before the singleton */
158                result = i-2;
159            } else {
160                result = i;
161            }
162            break;
163        }
164    }
165    if(result < 1){
166        /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
167        result =-1;
168    }
169    return result;
170}
171/* }}} */
172
173/* {{{
174* returns the position of a singleton if present
175* returns -1 if no singleton
176* strtok equivalent search for singleton
177*/
178static int getSingletonPos(const char* str)
179{
180    int result =-1;
181    int i=0;
182    int len = 0;
183
184    if( str && ((len=strlen(str))>0) ){
185        for( i=0; i<len ; i++){
186            if( isIDSeparator(*(str+i)) ){
187                if( i==1){
188                    /* string is of the form x-avy or a-prv1 */
189                    result =0;
190                    break;
191                } else {
192                    /* delimiter found; check for singleton */
193                    if( isIDSeparator(*(str+i+2)) ){
194                        /* a singleton; so send the position of separator before singleton */
195                        result = i+1;
196                        break;
197                    }
198                }
199            }
200        }/* end of for */
201
202    }
203    return result;
204}
205/* }}} */
206
207/* {{{ proto static string Locale::getDefault(  )
208   Get default locale */
209/* }}} */
210/* {{{ proto static string locale_get_default( )
211   Get default locale */
212PHP_NAMED_FUNCTION(zif_locale_get_default)
213{
214    RETURN_STRING( intl_locale_get_default( TSRMLS_C ) );
215}
216
217/* }}} */
218
219/* {{{ proto static string Locale::setDefault( string $locale )
220   Set default locale */
221/* }}} */
222/* {{{ proto static string locale_set_default( string $locale )
223   Set default locale */
224PHP_NAMED_FUNCTION(zif_locale_set_default)
225{
226    zend_string* locale_name;
227    zend_string *ini_name;
228    char *default_locale = NULL;
229
230    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC,  "S", &locale_name) == FAILURE)
231    {
232        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
233                "locale_set_default: unable to parse input params", 0 TSRMLS_CC );
234
235        RETURN_FALSE;
236    }
237
238    if (locale_name->len == 0) {
239        default_locale = (char *)uloc_getDefault();
240        locale_name = zend_string_init(default_locale, strlen(default_locale), 0);
241    }
242
243    ini_name = zend_string_init(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME) - 1, 0);
244    zend_alter_ini_entry(ini_name, locale_name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
245    zend_string_release(ini_name);
246    if (default_locale != NULL) {
247        zend_string_release(locale_name);
248    }
249
250    RETURN_TRUE;
251}
252/* }}} */
253
254/* {{{
255* Gets the value from ICU
256* common code shared by get_primary_language,get_script or get_region or get_variant
257* result = 0 if error, 1 if successful , -1 if no value
258*/
259static char* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
260{
261    char*       tag_value   = NULL;
262    int32_t         tag_value_len   = 512;
263
264    int     singletonPos    = 0;
265    char*           mod_loc_name    = NULL;
266    int         grOffset    = 0;
267
268    int32_t         buflen          = 512;
269    UErrorCode      status          = U_ZERO_ERROR;
270
271
272    if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
273        /* Handle  grandfathered languages */
274        grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
275        if( grOffset >= 0 ){
276            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
277                return estrdup(loc_name);
278            } else {
279                /* Since Grandfathered , no value , do nothing , retutn NULL */
280                return NULL;
281            }
282        }
283
284    if( fromParseLocale==1 ){
285        /* Handle singletons */
286        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
287            if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
288                return estrdup(loc_name);
289            }
290        }
291
292        singletonPos = getSingletonPos( loc_name );
293        if( singletonPos == 0){
294            /* singleton at start of script, region , variant etc.
295             * or invalid singleton at start of language */
296            return NULL;
297        } else if( singletonPos > 0 ){
298            /* singleton at some position except at start
299             * strip off the singleton and rest of the loc_name */
300            mod_loc_name = estrndup ( loc_name , singletonPos-1);
301        }
302    } /* end of if fromParse */
303
304    } /* end of if != LOC_CANONICAL_TAG */
305
306    if( mod_loc_name == NULL){
307        mod_loc_name = estrdup(loc_name );
308    }
309
310    /* Proceed to ICU */
311    do{
312        tag_value = erealloc( tag_value , buflen  );
313        tag_value_len = buflen;
314
315        if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
316            buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
317        }
318        if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
319            buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
320        }
321        if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
322            buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
323        }
324        if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
325            buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
326        }
327        if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
328            buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
329        }
330
331        if( U_FAILURE( status ) ) {
332            if( status == U_BUFFER_OVERFLOW_ERROR ) {
333                status = U_ZERO_ERROR;
334                continue;
335            }
336
337            /* Error in retriving data */
338            *result = 0;
339            if( tag_value ){
340                efree( tag_value );
341            }
342            if( mod_loc_name ){
343                efree( mod_loc_name);
344            }
345            return NULL;
346        }
347    } while( buflen > tag_value_len );
348
349    if(  buflen ==0 ){
350        /* No value found */
351        *result = -1;
352        if( tag_value ){
353            efree( tag_value );
354        }
355        if( mod_loc_name ){
356            efree( mod_loc_name);
357        }
358        return NULL;
359    } else {
360        *result = 1;
361    }
362
363    if( mod_loc_name ){
364        efree( mod_loc_name);
365    }
366    return tag_value;
367}
368/* }}} */
369
370/* {{{
371* Gets the value from ICU , called when PHP userspace function is called
372* common code shared by get_primary_language,get_script or get_region or get_variant
373*/
374static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
375{
376
377    const char* loc_name            = NULL;
378    size_t         loc_name_len     = 0;
379
380    char*       tag_value       = NULL;
381    char*       empty_result    = "";
382
383    int         result          = 0;
384    char*       msg             = NULL;
385
386    UErrorCode  status              = U_ZERO_ERROR;
387
388    intl_error_reset( NULL TSRMLS_CC );
389
390    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
391    &loc_name ,&loc_name_len ) == FAILURE) {
392        spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
393        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
394        efree(msg);
395
396        RETURN_FALSE;
397    }
398
399    if(loc_name_len == 0) {
400        loc_name = intl_locale_get_default(TSRMLS_C);
401    }
402
403    /* Call ICU get */
404    tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
405
406    /* No value found */
407    if( result == -1 ) {
408        if( tag_value){
409            efree( tag_value);
410        }
411        RETURN_STRING( empty_result);
412    }
413
414    /* value found */
415    if( tag_value){
416        RETVAL_STRING( tag_value );
417        //???
418        efree(tag_value);
419        return;
420    }
421
422    /* Error encountered while fetching the value */
423    if( result ==0) {
424        spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
425        intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
426        efree(msg);
427        RETURN_NULL();
428    }
429
430}
431/* }}} */
432
433/* {{{ proto static string Locale::getScript($locale)
434 * gets the script for the $locale
435 }}} */
436/* {{{ proto static string locale_get_script($locale)
437 * gets the script for the $locale
438 */
439PHP_FUNCTION( locale_get_script )
440{
441    get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
442}
443/* }}} */
444
445/* {{{ proto static string Locale::getRegion($locale)
446 * gets the region for the $locale
447 }}} */
448/* {{{ proto static string locale_get_region($locale)
449 * gets the region for the $locale
450 */
451PHP_FUNCTION( locale_get_region )
452{
453    get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
454}
455/* }}} */
456
457/* {{{ proto static string Locale::getPrimaryLanguage($locale)
458 * gets the primary language for the $locale
459 }}} */
460/* {{{ proto static string locale_get_primary_language($locale)
461 * gets the primary language for the $locale
462 */
463PHP_FUNCTION(locale_get_primary_language )
464{
465    get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
466}
467/* }}} */
468
469
470/* {{{
471 * common code shared by display_xyz functions to  get the value from ICU
472 }}} */
473static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
474{
475    const char* loc_name            = NULL;
476    size_t         loc_name_len     = 0;
477
478    const char* disp_loc_name       = NULL;
479    size_t         disp_loc_name_len   = 0;
480    int         free_loc_name       = 0;
481
482    UChar*      disp_name       = NULL;
483    int32_t     disp_name_len   = 0;
484
485    char*       mod_loc_name        = NULL;
486
487    int32_t     buflen              = 512;
488    UErrorCode  status              = U_ZERO_ERROR;
489
490    char*       utf8value       = NULL;
491    int         utf8value_len       = 0;
492
493    char*       msg                 = NULL;
494    int         grOffset        = 0;
495
496    intl_error_reset( NULL TSRMLS_CC );
497
498    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
499        &loc_name, &loc_name_len ,
500        &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
501    {
502        spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
503        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
504        efree(msg);
505        RETURN_FALSE;
506    }
507
508    if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
509        /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
510        spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
511        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
512        efree(msg);
513        RETURN_FALSE;
514    }
515
516    if(loc_name_len == 0) {
517        loc_name = intl_locale_get_default(TSRMLS_C);
518    }
519
520    if( strcmp(tag_name, DISP_NAME) != 0 ){
521        /* Handle grandfathered languages */
522        grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
523        if( grOffset >= 0 ){
524            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
525                mod_loc_name = getPreferredTag( loc_name );
526            } else {
527                /* Since Grandfathered, no value, do nothing, retutn NULL */
528                RETURN_FALSE;
529            }
530        }
531    } /* end of if != LOC_CANONICAL_TAG */
532
533    if( mod_loc_name==NULL ){
534        mod_loc_name = estrdup( loc_name );
535    }
536
537    /* Check if disp_loc_name passed , if not use default locale */
538    if( !disp_loc_name){
539        disp_loc_name = estrdup(intl_locale_get_default(TSRMLS_C));
540        free_loc_name = 1;
541    }
542
543    /* Get the disp_value for the given locale */
544    do{
545        disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
546        disp_name_len = buflen;
547
548        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
549            buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
550        } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
551            buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
552        } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
553            buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
554        } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
555            buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
556        } else if( strcmp(tag_name , DISP_NAME)==0 ){
557            buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
558        }
559
560        /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
561        if( U_FAILURE( status ) )
562        {
563            if( status == U_BUFFER_OVERFLOW_ERROR )
564            {
565                status = U_ZERO_ERROR;
566                continue;
567            }
568
569            spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
570            intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
571            efree(msg);
572            if( disp_name){
573                efree( disp_name );
574            }
575            if( mod_loc_name){
576                efree( mod_loc_name );
577            }
578            if (free_loc_name) {
579                efree((void *)disp_loc_name);
580                disp_loc_name = NULL;
581            }
582            RETURN_FALSE;
583        }
584    } while( buflen > disp_name_len );
585
586    if( mod_loc_name){
587        efree( mod_loc_name );
588    }
589    if (free_loc_name) {
590        efree((void *)disp_loc_name);
591        disp_loc_name = NULL;
592    }
593    /* Convert display locale name from UTF-16 to UTF-8. */
594    intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
595    efree( disp_name );
596    if( U_FAILURE( status ) )
597    {
598        spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
599        intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
600        efree(msg);
601        RETURN_FALSE;
602    }
603
604    RETVAL_STRINGL( utf8value, utf8value_len );
605    //????
606    efree(utf8value);
607
608}
609/* }}} */
610
611/* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
612* gets the name for the $locale in $in_locale or default_locale
613 }}} */
614/* {{{ proto static string get_display_name($locale[, $in_locale = null])
615* gets the name for the $locale in $in_locale or default_locale
616*/
617PHP_FUNCTION(locale_get_display_name)
618{
619    get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
620}
621/* }}} */
622
623/* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
624* gets the language for the $locale in $in_locale or default_locale
625 }}} */
626/* {{{ proto static string get_display_language($locale[, $in_locale = null])
627* gets the language for the $locale in $in_locale or default_locale
628*/
629PHP_FUNCTION(locale_get_display_language)
630{
631    get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
632}
633/* }}} */
634
635/* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
636* gets the script for the $locale in $in_locale or default_locale
637 }}} */
638/* {{{ proto static string get_display_script($locale, $in_locale = null)
639* gets the script for the $locale in $in_locale or default_locale
640*/
641PHP_FUNCTION(locale_get_display_script)
642{
643    get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
644}
645/* }}} */
646
647/* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
648* gets the region for the $locale in $in_locale or default_locale
649 }}} */
650/* {{{ proto static string get_display_region($locale, $in_locale = null)
651* gets the region for the $locale in $in_locale or default_locale
652*/
653PHP_FUNCTION(locale_get_display_region)
654{
655    get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
656}
657/* }}} */
658
659/* {{{
660* proto static string Locale::getDisplayVariant($locale, $in_locale = null)
661* gets the variant for the $locale in $in_locale or default_locale
662 }}} */
663/* {{{
664* proto static string get_display_variant($locale, $in_locale = null)
665* gets the variant for the $locale in $in_locale or default_locale
666*/
667PHP_FUNCTION(locale_get_display_variant)
668{
669    get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
670}
671/* }}} */
672
673 /* {{{ proto static array getKeywords(string $locale) {
674 * return an associative array containing keyword-value
675 * pairs for this locale. The keys are keys to the array (doh!)
676 * }}}*/
677 /* {{{ proto static array locale_get_keywords(string $locale) {
678 * return an associative array containing keyword-value
679 * pairs for this locale. The keys are keys to the array (doh!)
680 */
681PHP_FUNCTION( locale_get_keywords )
682{
683    UEnumeration*   e        = NULL;
684    UErrorCode      status   = U_ZERO_ERROR;
685
686    const char*     kw_key        = NULL;
687    int32_t         kw_key_len    = 0;
688
689    const char*         loc_name        = NULL;
690    size_t              loc_name_len    = 0;
691
692/*
693    ICU expects the buffer to be allocated  before calling the function
694    and so the buffer size has been explicitly specified
695    ICU uloc.h #define  ULOC_KEYWORD_AND_VALUES_CAPACITY   100
696    hence the kw_value buffer size is 100
697*/
698    char*       kw_value        = NULL;
699    int32_t     kw_value_len    = 100;
700
701    intl_error_reset( NULL TSRMLS_CC );
702
703    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
704        &loc_name, &loc_name_len ) == FAILURE)
705    {
706        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
707             "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
708
709        RETURN_FALSE;
710    }
711
712    if(loc_name_len == 0) {
713        loc_name = intl_locale_get_default(TSRMLS_C);
714    }
715
716    /* Get the keywords */
717    e = uloc_openKeywords( loc_name, &status );
718    if( e != NULL )
719    {
720        /* Traverse it, filling the return array. */
721        array_init( return_value );
722
723        while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
724            kw_value = ecalloc( 1 , kw_value_len  );
725
726            /* Get the keyword value for each keyword */
727            kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len ,  &status );
728            if (status == U_BUFFER_OVERFLOW_ERROR) {
729                status = U_ZERO_ERROR;
730                kw_value = erealloc( kw_value , kw_value_len+1);
731                kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 ,  &status );
732            } else if(!U_FAILURE(status)) {
733                kw_value = erealloc( kw_value , kw_value_len+1);
734            }
735            if (U_FAILURE(status)) {
736                    intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 TSRMLS_CC );
737                if( kw_value){
738                    efree( kw_value );
739                }
740                zval_dtor(return_value);
741                RETURN_FALSE;
742            }
743
744            // TODO: avoid reallocation ???
745            add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len);
746            efree(kw_value);
747        } /* end of while */
748
749    } /* end of if e!=NULL */
750
751    uenum_close( e );
752}
753/* }}} */
754
755 /* {{{ proto static string Locale::canonicalize($locale)
756 * @return string the canonicalized locale
757 * }}} */
758 /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
759 * @param string $locale    The locale string to canonicalize
760 */
761PHP_FUNCTION(locale_canonicalize)
762{
763    get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
764}
765/* }}} */
766
767/* {{{ append_key_value
768* Internal function which is called from locale_compose
769* gets the value for the key_name and appends to the loc_name
770* returns 1 if successful , -1 if not found ,
771* 0 if array element is not a string , -2 if buffer-overflow
772*/
773static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
774{
775    zval *ele_value;
776
777    if ((ele_value = zend_hash_str_find(hash_arr , key_name, strlen(key_name))) != NULL ) {
778        if(Z_TYPE_P(ele_value)!= IS_STRING ){
779            /* element value is not a string */
780            return FAILURE;
781        }
782        if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
783           strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
784            /* not lang or grandfathered tag */
785            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
786        }
787        smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
788        return SUCCESS;
789    }
790
791    return LOC_NOT_FOUND;
792}
793/* }}} */
794
795/* {{{ append_prefix , appends the prefix needed
796* e.g. private adds 'x'
797*/
798static void add_prefix(smart_str* loc_name, char* key_name)
799{
800    if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
801        smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
802        smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
803    }
804}
805/* }}} */
806
807/* {{{ append_multiple_key_values
808* Internal function which is called from locale_compose
809* gets the multiple values for the key_name and appends to the loc_name
810* used for 'variant','extlang','private'
811* returns 1 if successful , -1 if not found ,
812* 0 if array element is not a string , -2 if buffer-overflow
813*/
814static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
815{
816    zval    *ele_value;
817    int     i       = 0;
818    int     isFirstSubtag   = 0;
819    int     max_value   = 0;
820
821    /* Variant/ Extlang/Private etc. */
822    if ((ele_value = zend_hash_str_find( hash_arr , key_name , strlen(key_name))) != NULL) {
823        if( Z_TYPE_P(ele_value) == IS_STRING ){
824            add_prefix( loc_name , key_name);
825
826            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
827            smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
828            return SUCCESS;
829        } else if(Z_TYPE_P(ele_value) == IS_ARRAY ) {
830            HashTable *arr = HASH_OF(ele_value);
831            zval *data;
832
833            ZEND_HASH_FOREACH_VAL(arr, data) {
834                if(Z_TYPE_P(data) != IS_STRING) {
835                    return FAILURE;
836                }
837                if (isFirstSubtag++ == 0){
838                    add_prefix(loc_name , key_name);
839                }
840                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
841                smart_str_appendl(loc_name, Z_STRVAL_P(data) , Z_STRLEN_P(data));
842            } ZEND_HASH_FOREACH_END();
843            return SUCCESS;
844        } else {
845            return FAILURE;
846        }
847    } else {
848        char cur_key_name[31];
849        /* Decide the max_value: the max. no. of elements allowed */
850        if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
851            max_value  = MAX_NO_VARIANT;
852        }
853        if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
854            max_value  = MAX_NO_EXTLANG;
855        }
856        if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
857            max_value  = MAX_NO_PRIVATE;
858        }
859
860        /* Multiple variant values as variant0, variant1 ,variant2 */
861        isFirstSubtag = 0;
862        for( i=0 ; i< max_value; i++ ){
863            snprintf( cur_key_name , 30, "%s%d", key_name , i);
864            if ((ele_value = zend_hash_str_find( hash_arr , cur_key_name , strlen(cur_key_name))) != NULL) {
865                if( Z_TYPE_P(ele_value)!= IS_STRING ){
866                    /* variant is not a string */
867                    return FAILURE;
868                }
869                /* Add the contents */
870                if (isFirstSubtag++ == 0){
871                    add_prefix(loc_name , cur_key_name);
872                }
873                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
874                smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
875            }
876        } /* end of for */
877    } /* end of else */
878
879    return SUCCESS;
880}
881/* }}} */
882
883/*{{{
884* If applicable sets error message and aborts locale_compose gracefully
885* returns 0  if locale_compose needs to be aborted
886* otherwise returns 1
887*/
888static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
889{
890    intl_error_reset( NULL TSRMLS_CC );
891    if( result == FAILURE) {
892        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
893             "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
894        smart_str_free(loc_name);
895        return 0;
896    }
897    return 1;
898}
899/* }}} */
900
901#define RETURN_SMART_STR(str) smart_str_0((str)); RETURN_STR((str)->s)
902/* {{{ proto static string Locale::composeLocale($array)
903* Creates a locale by combining the parts of locale-ID passed
904* }}} */
905/* {{{ proto static string compose_locale($array)
906* Creates a locale by combining the parts of locale-ID passed
907* }}} */
908PHP_FUNCTION(locale_compose)
909{
910    smart_str       loc_name_s = {0};
911    smart_str *loc_name = &loc_name_s;
912    zval*           arr = NULL;
913    HashTable*      hash_arr = NULL;
914    int             result = 0;
915
916    intl_error_reset( NULL TSRMLS_CC );
917
918    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
919        &arr) == FAILURE)
920    {
921        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
922             "locale_compose: unable to parse input params", 0 TSRMLS_CC );
923        RETURN_FALSE;
924    }
925
926    hash_arr = HASH_OF( arr );
927
928    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
929        RETURN_FALSE;
930
931    /* Check for grandfathered first */
932    result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
933    if( result == SUCCESS){
934        RETURN_SMART_STR(loc_name);
935    }
936    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
937        RETURN_FALSE;
938    }
939
940    /* Not grandfathered */
941    result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
942    if( result == LOC_NOT_FOUND ){
943        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
944        "locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
945        smart_str_free(loc_name);
946        RETURN_FALSE;
947    }
948    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
949        RETURN_FALSE;
950    }
951
952    /* Extlang */
953    result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
954    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
955        RETURN_FALSE;
956    }
957
958    /* Script */
959    result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
960    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
961        RETURN_FALSE;
962    }
963
964    /* Region */
965    result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
966    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
967        RETURN_FALSE;
968    }
969
970    /* Variant */
971    result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC);
972    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
973        RETURN_FALSE;
974    }
975
976    /* Private */
977    result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
978    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
979        RETURN_FALSE;
980    }
981
982    RETURN_SMART_STR(loc_name);
983}
984/* }}} */
985
986
987/*{{{
988* Parses the locale and returns private subtags  if existing
989* else returns NULL
990* e.g. for locale='en_US-x-prv1-prv2-prv3'
991* returns a pointer to the string 'prv1-prv2-prv3'
992*/
993static char* get_private_subtags(const char* loc_name)
994{
995    char*   result =NULL;
996    int     singletonPos = 0;
997    int     len =0;
998    const char*     mod_loc_name =NULL;
999
1000    if( loc_name && (len = strlen(loc_name)>0 ) ){
1001        mod_loc_name = loc_name ;
1002        len   = strlen(mod_loc_name);
1003        while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
1004
1005            if( singletonPos!=-1){
1006                if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
1007                    /* private subtag start found */
1008                    if( singletonPos + 2 ==  len){
1009                        /* loc_name ends with '-x-' ; return  NULL */
1010                    }
1011                    else{
1012                        /* result = mod_loc_name + singletonPos +2; */
1013                        result = estrndup(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ) );
1014                    }
1015                    break;
1016                }
1017                else{
1018                    if( singletonPos + 1 >=  len){
1019                        /* String end */
1020                        break;
1021                    } else {
1022                        /* singleton found but not a private subtag , hence check further in the string for the private subtag */
1023                        mod_loc_name = mod_loc_name + singletonPos +1;
1024                        len = strlen(mod_loc_name);
1025                    }
1026                }
1027            }
1028
1029        } /* end of while */
1030    }
1031
1032    return result;
1033}
1034/* }}} */
1035
1036/* {{{ code used by locale_parse
1037*/
1038static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
1039{
1040    char*   key_value   = NULL;
1041    char*   cur_key_name    = NULL;
1042    char*   token           = NULL;
1043    char*   last_ptr    = NULL;
1044
1045    int result      = 0;
1046    int     cur_result      = 0;
1047    int     cnt         = 0;
1048
1049
1050    if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1051        key_value = get_private_subtags( loc_name );
1052        result = 1;
1053    } else {
1054        key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1055    }
1056    if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1057        ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1058        if( result > 0 && key_value){
1059            /* Tokenize on the "_" or "-"  */
1060            token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
1061            if( cur_key_name ){
1062                efree( cur_key_name);
1063            }
1064            cur_key_name = (char*)ecalloc( 25,  25);
1065            sprintf( cur_key_name , "%s%d", key_name , cnt++);
1066            add_assoc_string( hash_arr, cur_key_name , token);
1067            /* tokenize on the "_" or "-" and stop  at singleton if any */
1068            while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1069                sprintf( cur_key_name , "%s%d", key_name , cnt++);
1070                add_assoc_string( hash_arr, cur_key_name , token);
1071            }
1072/*
1073            if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1074            }
1075*/
1076        }
1077    } else {
1078        if( result == 1 ){
1079            add_assoc_string( hash_arr, key_name , key_value);
1080            cur_result = 1;
1081        }
1082    }
1083
1084    if( cur_key_name ){
1085        efree( cur_key_name);
1086    }
1087    /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1088    if( key_value){
1089        efree(key_value);
1090    }
1091    return cur_result;
1092}
1093/* }}} */
1094
1095/* {{{ proto static array Locale::parseLocale($locale)
1096* parses a locale-id into an array the different parts of it
1097 }}} */
1098/* {{{ proto static array parse_locale($locale)
1099* parses a locale-id into an array the different parts of it
1100*/
1101PHP_FUNCTION(locale_parse)
1102{
1103    const char* loc_name        = NULL;
1104    size_t         loc_name_len    = 0;
1105    int         grOffset        = 0;
1106
1107    intl_error_reset( NULL TSRMLS_CC );
1108
1109    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1110        &loc_name, &loc_name_len ) == FAILURE)
1111    {
1112        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1113             "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1114
1115        RETURN_FALSE;
1116    }
1117
1118    if(loc_name_len == 0) {
1119        loc_name = intl_locale_get_default(TSRMLS_C);
1120    }
1121
1122    array_init( return_value );
1123
1124    grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1125    if( grOffset >= 0 ){
1126        add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG, (char *)loc_name);
1127    }
1128    else{
1129        /* Not grandfathered */
1130        add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
1131        add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
1132        add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
1133        add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
1134        add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
1135    }
1136}
1137/* }}} */
1138
1139/* {{{ proto static array Locale::getAllVariants($locale)
1140* gets an array containing the list of variants, or null
1141 }}} */
1142/* {{{ proto static array locale_get_all_variants($locale)
1143* gets an array containing the list of variants, or null
1144*/
1145PHP_FUNCTION(locale_get_all_variants)
1146{
1147    const char*     loc_name        = NULL;
1148    size_t          loc_name_len    = 0;
1149
1150    int result      = 0;
1151    char*   token       = NULL;
1152    char*   variant     = NULL;
1153    char*   saved_ptr   = NULL;
1154
1155    intl_error_reset( NULL TSRMLS_CC );
1156
1157    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1158    &loc_name, &loc_name_len ) == FAILURE)
1159    {
1160        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1161         "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1162
1163        RETURN_FALSE;
1164    }
1165
1166    if(loc_name_len == 0) {
1167        loc_name = intl_locale_get_default(TSRMLS_C);
1168    }
1169
1170
1171    array_init( return_value );
1172
1173    /* If the locale is grandfathered, stop, no variants */
1174    if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1175        /* ("Grandfathered Tag. No variants."); */
1176    }
1177    else {
1178    /* Call ICU variant */
1179        variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1180        if( result > 0 && variant){
1181            /* Tokenize on the "_" or "-" */
1182            token = php_strtok_r( variant , DELIMITER , &saved_ptr);
1183            add_next_index_stringl( return_value, token , strlen(token));
1184            /* tokenize on the "_" or "-" and stop  at singleton if any */
1185            while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1186                add_next_index_stringl( return_value, token , strlen(token));
1187            }
1188        }
1189        if( variant ){
1190            efree( variant );
1191        }
1192    }
1193
1194
1195}
1196/* }}} */
1197
1198/*{{{
1199* Converts to lower case and also replaces all hyphens with the underscore
1200*/
1201static int strToMatch(const char* str ,char *retstr)
1202{
1203    char*   anchor  = NULL;
1204    const char*     anchor1 = NULL;
1205    int     result  = 0;
1206    int     len     = 0;
1207
1208    if( (!str) || str[0] == '\0'){
1209        return result;
1210    } else {
1211    anchor = retstr;
1212    anchor1 = str;
1213        len = strlen(str);
1214        while( (*str)!='\0' ){
1215        if( *str == '-' ){
1216            *retstr =  '_';
1217        } else {
1218            *retstr = tolower(*str);
1219        }
1220            str++;
1221            retstr++;
1222    }
1223    *retstr = '\0';
1224    retstr=  anchor;
1225    str=  anchor1;
1226    result = 1;
1227    }
1228
1229    return(result);
1230}
1231/* }}} */
1232
1233/* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1234* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1235*/
1236/* }}} */
1237/* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1238* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1239*/
1240PHP_FUNCTION(locale_filter_matches)
1241{
1242    char*           lang_tag        = NULL;
1243    size_t          lang_tag_len    = 0;
1244    const char*     loc_range       = NULL;
1245    size_t          loc_range_len   = 0;
1246
1247    int     result      = 0;
1248    char*       token       = 0;
1249    char*       chrcheck    = NULL;
1250
1251    char*           can_lang_tag    = NULL;
1252    char*           can_loc_range   = NULL;
1253
1254    char*           cur_lang_tag    = NULL;
1255    char*           cur_loc_range   = NULL;
1256
1257    zend_bool   boolCanonical   = 0;
1258    UErrorCode  status      = U_ZERO_ERROR;
1259
1260    intl_error_reset( NULL TSRMLS_CC );
1261
1262    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
1263        &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1264        &boolCanonical) == FAILURE)
1265    {
1266        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1267        "locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
1268
1269        RETURN_FALSE;
1270    }
1271
1272    if(loc_range_len == 0) {
1273        loc_range = intl_locale_get_default(TSRMLS_C);
1274    }
1275
1276    if( strcmp(loc_range,"*")==0){
1277        RETURN_TRUE;
1278    }
1279
1280    if( boolCanonical ){
1281        /* canonicalize loc_range */
1282        can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1283        if( result ==0) {
1284            intl_error_set( NULL, status,
1285                "locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
1286            RETURN_FALSE;
1287        }
1288
1289        /* canonicalize lang_tag */
1290        can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1291        if( result ==0) {
1292            intl_error_set( NULL, status,
1293                "locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
1294            RETURN_FALSE;
1295        }
1296
1297        /* Convert to lower case for case-insensitive comparison */
1298        cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1299
1300        /* Convert to lower case for case-insensitive comparison */
1301        result = strToMatch( can_lang_tag , cur_lang_tag);
1302        if( result == 0) {
1303            efree( cur_lang_tag );
1304            efree( can_lang_tag );
1305            RETURN_FALSE;
1306        }
1307
1308        cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1309        result = strToMatch( can_loc_range , cur_loc_range );
1310        if( result == 0) {
1311            efree( cur_lang_tag );
1312            efree( can_lang_tag );
1313            efree( cur_loc_range );
1314            efree( can_loc_range );
1315            RETURN_FALSE;
1316        }
1317
1318        /* check if prefix */
1319        token   = strstr( cur_lang_tag , cur_loc_range );
1320
1321        if( token && (token==cur_lang_tag) ){
1322            /* check if the char. after match is SEPARATOR */
1323            chrcheck = token + (strlen(cur_loc_range));
1324            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1325                if( cur_lang_tag){
1326                    efree( cur_lang_tag );
1327                }
1328                if( cur_loc_range){
1329                    efree( cur_loc_range );
1330                }
1331                if( can_lang_tag){
1332                    efree( can_lang_tag );
1333                }
1334                if( can_loc_range){
1335                    efree( can_loc_range );
1336                }
1337                RETURN_TRUE;
1338            }
1339        }
1340
1341        /* No prefix as loc_range */
1342        if( cur_lang_tag){
1343            efree( cur_lang_tag );
1344        }
1345        if( cur_loc_range){
1346            efree( cur_loc_range );
1347        }
1348        if( can_lang_tag){
1349            efree( can_lang_tag );
1350        }
1351        if( can_loc_range){
1352            efree( can_loc_range );
1353        }
1354        RETURN_FALSE;
1355
1356    } /* end of if isCanonical */
1357    else{
1358        /* Convert to lower case for case-insensitive comparison */
1359        cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1360
1361        result = strToMatch( lang_tag , cur_lang_tag);
1362        if( result == 0) {
1363            efree( cur_lang_tag );
1364            RETURN_FALSE;
1365        }
1366        cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1367        result = strToMatch( loc_range , cur_loc_range );
1368        if( result == 0) {
1369            efree( cur_lang_tag );
1370            efree( cur_loc_range );
1371            RETURN_FALSE;
1372        }
1373
1374        /* check if prefix */
1375        token   = strstr( cur_lang_tag , cur_loc_range );
1376
1377        if( token && (token==cur_lang_tag) ){
1378            /* check if the char. after match is SEPARATOR */
1379            chrcheck = token + (strlen(cur_loc_range));
1380            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1381                if( cur_lang_tag){
1382                    efree( cur_lang_tag );
1383                }
1384                if( cur_loc_range){
1385                    efree( cur_loc_range );
1386                }
1387                RETURN_TRUE;
1388            }
1389        }
1390
1391        /* No prefix as loc_range */
1392        if( cur_lang_tag){
1393            efree( cur_lang_tag );
1394        }
1395        if( cur_loc_range){
1396            efree( cur_loc_range );
1397        }
1398        RETURN_FALSE;
1399
1400    }
1401}
1402/* }}} */
1403
1404static void array_cleanup( char* arr[] , int arr_size)
1405{
1406    int i=0;
1407    for( i=0; i< arr_size; i++ ){
1408        if( arr[i*2] ){
1409            efree( arr[i*2]);
1410        }
1411    }
1412    efree(arr);
1413}
1414
1415#define LOOKUP_CLEAN_RETURN(value)  array_cleanup(cur_arr, cur_arr_len); return (value)
1416/* {{{
1417* returns the lookup result to lookup_loc_range_src_php
1418* internal function
1419*/
1420static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize  TSRMLS_DC)
1421{
1422    int i = 0;
1423    int cur_arr_len = 0;
1424    int result = 0;
1425
1426    char* lang_tag = NULL;
1427    zval* ele_value = NULL;
1428    char** cur_arr = NULL;
1429
1430    char* cur_loc_range = NULL;
1431    char* can_loc_range = NULL;
1432    int saved_pos = 0;
1433
1434    char* return_value = NULL;
1435
1436    cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1437    ZEND_HASH_FOREACH_VAL(hash_arr, ele_value) {
1438    /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1439        if(Z_TYPE_P(ele_value)!= IS_STRING) {
1440            /* element value is not a string */
1441            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
1442            LOOKUP_CLEAN_RETURN(NULL);
1443        }
1444        cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_P(ele_value), Z_STRLEN_P(ele_value));
1445        result = strToMatch(Z_STRVAL_P(ele_value), cur_arr[cur_arr_len*2]);
1446        if(result == 0) {
1447            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
1448            LOOKUP_CLEAN_RETURN(NULL);
1449        }
1450        cur_arr[cur_arr_len*2+1] = Z_STRVAL_P(ele_value);
1451        cur_arr_len++ ;
1452    } ZEND_HASH_FOREACH_END(); /* end of for */
1453
1454    /* Canonicalize array elements */
1455    if(canonicalize) {
1456        for(i=0; i<cur_arr_len; i++) {
1457            lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1458            if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1459                if(lang_tag) {
1460                    efree(lang_tag);
1461                }
1462                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1463                LOOKUP_CLEAN_RETURN(NULL);
1464            }
1465            cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1466            result = strToMatch(lang_tag, cur_arr[i*2]);
1467            efree(lang_tag);
1468            if(result == 0) {
1469                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1470                LOOKUP_CLEAN_RETURN(NULL);
1471            }
1472        }
1473
1474    }
1475
1476    if(canonicalize) {
1477        /* Canonicalize the loc_range */
1478        can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1479        if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1480            /* Error */
1481            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
1482            if(can_loc_range) {
1483                efree(can_loc_range);
1484            }
1485            LOOKUP_CLEAN_RETURN(NULL);
1486        } else {
1487            loc_range = can_loc_range;
1488        }
1489    }
1490
1491    cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1492    /* convert to lower and replace hyphens */
1493    result = strToMatch(loc_range, cur_loc_range);
1494    if(can_loc_range) {
1495        efree(can_loc_range);
1496    }
1497    if(result == 0) {
1498        intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1499        LOOKUP_CLEAN_RETURN(NULL);
1500    }
1501
1502    /* Lookup for the lang_tag match */
1503    saved_pos = strlen(cur_loc_range);
1504    while(saved_pos > 0) {
1505        for(i=0; i< cur_arr_len; i++){
1506            if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1507                /* Match found */
1508                return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
1509                efree(cur_loc_range);
1510                LOOKUP_CLEAN_RETURN(return_value);
1511            }
1512        }
1513        saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1514    }
1515
1516    /* Match not found */
1517    efree(cur_loc_range);
1518    LOOKUP_CLEAN_RETURN(NULL);
1519}
1520/* }}} */
1521
1522/* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1523* Searchs the items in $langtag for the best match to the language
1524* range
1525*/
1526/* }}} */
1527/* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1528* Searchs the items in $langtag for the best match to the language
1529* range
1530*/
1531PHP_FUNCTION(locale_lookup)
1532{
1533    char*       fallback_loc        = NULL;
1534    size_t          fallback_loc_len    = 0;
1535    const char*     loc_range           = NULL;
1536    size_t          loc_range_len       = 0;
1537
1538    zval*       arr             = NULL;
1539    HashTable*  hash_arr        = NULL;
1540    zend_bool   boolCanonical   = 0;
1541    char*       result          =NULL;
1542
1543    intl_error_reset( NULL TSRMLS_CC );
1544
1545    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
1546        &boolCanonical, &fallback_loc, &fallback_loc_len) == FAILURE) {
1547        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_lookup: unable to parse input params", 0 TSRMLS_CC );
1548        RETURN_FALSE;
1549    }
1550
1551    if(loc_range_len == 0) {
1552        loc_range = intl_locale_get_default(TSRMLS_C);
1553    }
1554
1555    hash_arr = HASH_OF(arr);
1556
1557    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1558        RETURN_EMPTY_STRING();
1559    }
1560
1561    result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
1562    if(result == NULL || result[0] == '\0') {
1563        if( fallback_loc ) {
1564            result = estrndup(fallback_loc, fallback_loc_len);
1565        } else {
1566            RETURN_EMPTY_STRING();
1567        }
1568    }
1569
1570    RETVAL_STRINGL(result, strlen(result));
1571    //????
1572    efree(result);
1573}
1574/* }}} */
1575
1576/* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1577* Tries to find out best available locale based on HTTP �Accept-Language� header
1578*/
1579/* }}} */
1580/* {{{ proto string locale_accept_from_http(string $http_accept)
1581* Tries to find out best available locale based on HTTP �Accept-Language� header
1582*/
1583PHP_FUNCTION(locale_accept_from_http)
1584{
1585    UEnumeration *available;
1586    char *http_accept = NULL;
1587    size_t http_accept_len;
1588    UErrorCode status = 0;
1589    int len;
1590    char resultLocale[INTL_MAX_LOCALE_LEN+1];
1591    UAcceptResult outResult;
1592
1593    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
1594    {
1595        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1596        "locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
1597        RETURN_FALSE;
1598    }
1599
1600    available = ures_openAvailableLocales(NULL, &status);
1601    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1602    len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1603                        &outResult, http_accept, available, &status);
1604    uenum_close(available);
1605    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1606    if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1607        RETURN_FALSE;
1608    }
1609    RETURN_STRINGL(resultLocale, len);
1610}
1611/* }}} */
1612
1613/*
1614 * Local variables:
1615 * tab-width: 4
1616 * c-basic-offset: 4
1617 * End:
1618 * vim600: noet sw=4 ts=4 fdm=marker
1619 * vim<600: noet sw=4 ts=4
1620 *can_loc_len
1621*/
1622