1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | http://www.php.net/license/3_01.txt                                  |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
14   +----------------------------------------------------------------------+
15*/
16
17/* $Id$ */
18
19#ifdef HAVE_CONFIG_H
20#include "config.h"
21#endif
22
23#include <unicode/ustring.h>
24#include <unicode/udata.h>
25#include <unicode/putil.h>
26#include <unicode/ures.h>
27
28#include "php_intl.h"
29#include "locale.h"
30#include "locale_class.h"
31#include "locale_methods.h"
32#include "intl_convert.h"
33#include "intl_data.h"
34
35#include <zend_API.h>
36#include <zend.h>
37#include <php.h>
38#include "main/php_ini.h"
39#include "ext/standard/php_smart_str.h"
40
41ZEND_EXTERN_MODULE_GLOBALS( intl )
42
43/* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44#define SEPARATOR "_"
45#define SEPARATOR1 "-"
46#define DELIMITER "-_"
47#define EXTLANG_PREFIX "a"
48#define PRIVATE_PREFIX "x"
49#define DISP_NAME "name"
50
51#define MAX_NO_VARIANT  15
52#define MAX_NO_EXTLANG  3
53#define MAX_NO_PRIVATE  15
54#define MAX_NO_LOOKUP_LANG_TAG  100
55
56#define LOC_NOT_FOUND 1
57
58/* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59#define VARIANT_KEYNAME_LEN  11
60#define EXTLANG_KEYNAME_LEN  10
61#define PRIVATE_KEYNAME_LEN  11
62
63/* Based on IANA registry at the time of writing this code
64*
65*/
66static const char * const LOC_GRANDFATHERED[] = {
67    "art-lojban",       "i-klingon",        "i-lux",            "i-navajo",     "no-bok",       "no-nyn",
68    "cel-gaulish",      "en-GB-oed",        "i-ami",
69    "i-bnn",        "i-default",        "i-enochian",
70    "i-mingo",      "i-pwn",        "i-tao",
71    "i-tay",        "i-tsu",        "sgn-BE-fr",
72    "sgn-BE-nl",        "sgn-CH-de",        "zh-cmn",
73    "zh-cmn-Hans",      "zh-cmn-Hant",      "zh-gan" ,
74    "zh-guoyu",         "zh-hakka",         "zh-min",
75    "zh-min-nan",       "zh-wuu",       "zh-xiang",
76    "zh-yue",       NULL
77};
78
79/* Based on IANA registry at the time of writing this code
80*  This array lists the preferred values for the grandfathered tags if applicable
81*  This is in sync with the array LOC_GRANDFATHERED
82*  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
83*/
84static const int        LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85static const char * const   LOC_PREFERRED_GRANDFATHERED[]  = {
86    "jbo",          "tlh",          "lb",
87    "nv",           "nb",           "nn",
88    NULL
89};
90
91/*returns TRUE if a is an ID separator FALSE otherwise*/
92#define isIDSeparator(a) (a == '_' || a == '-')
93#define isKeywordSeparator(a) (a == '@' )
94#define isEndOfTag(a) (a == '\0' )
95
96#define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97
98/*returns TRUE if one of the special prefixes is here (s=string)
99  'x-' or 'i-' */
100#define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101#define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102
103/* Dot terminates it because of POSIX form  where dot precedes the codepage
104 * except for variant */
105#define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
106
107/* {{{ return the offset of 'key' in the array 'list'.
108 * returns -1 if not present */
109static int16_t findOffset(const char* const* list, const char* key)
110{
111    const char* const* anchor = list;
112    while (*list != NULL) {
113        if (strcmp(key, *list) == 0) {
114            return (int16_t)(list - anchor);
115        }
116        list++;
117    }
118
119    return -1;
120
121}
122/*}}}*/
123
124static char* getPreferredTag(const char* gf_tag)
125{
126    char* result = NULL;
127    int grOffset = 0;
128
129    grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130    if(grOffset < 0) {
131        return NULL;
132    }
133    if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
134        /* return preferred tag */
135        result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
136    } else {
137        /* Return correct grandfathered language tag */
138        result = estrdup( LOC_GRANDFATHERED[grOffset] );
139    }
140    return result;
141}
142
143/* {{{
144* returns the position of next token for lookup
145* or -1 if no token
146* strtokr equivalent search for token in reverse direction
147*/
148static int getStrrtokenPos(char* str, int savedPos)
149{
150    int result =-1;
151    int i;
152
153    for(i=savedPos-1; i>=0; i--) {
154        if(isIDSeparator(*(str+i)) ){
155            /* delimiter found; check for singleton */
156            if(i>=2 && isIDSeparator(*(str+i-2)) ){
157                /* a singleton; so send the position of token before the singleton */
158                result = i-2;
159            } else {
160                result = i;
161            }
162            break;
163        }
164    }
165    if(result < 1){
166        /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
167        result =-1;
168    }
169    return result;
170}
171/* }}} */
172
173/* {{{
174* returns the position of a singleton if present
175* returns -1 if no singleton
176* strtok equivalent search for singleton
177*/
178static int getSingletonPos(const char* str)
179{
180    int result =-1;
181    int i=0;
182    int len = 0;
183
184    if( str && ((len=strlen(str))>0) ){
185        for( i=0; i<len ; i++){
186            if( isIDSeparator(*(str+i)) ){
187                if( i==1){
188                    /* string is of the form x-avy or a-prv1 */
189                    result =0;
190                    break;
191                } else {
192                    /* delimiter found; check for singleton */
193                    if( isIDSeparator(*(str+i+2)) ){
194                        /* a singleton; so send the position of separator before singleton */
195                        result = i+1;
196                        break;
197                    }
198                }
199            }
200        }/* end of for */
201
202    }
203    return result;
204}
205/* }}} */
206
207/* {{{ proto static string Locale::getDefault(  )
208   Get default locale */
209/* }}} */
210/* {{{ proto static string locale_get_default( )
211   Get default locale */
212PHP_NAMED_FUNCTION(zif_locale_get_default)
213{
214    RETURN_STRING( intl_locale_get_default( TSRMLS_C ), TRUE );
215}
216
217/* }}} */
218
219/* {{{ proto static string Locale::setDefault( string $locale )
220   Set default locale */
221/* }}} */
222/* {{{ proto static string locale_set_default( string $locale )
223   Set default locale */
224PHP_NAMED_FUNCTION(zif_locale_set_default)
225{
226    char* locale_name = NULL;
227    int   len=0;
228
229    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC,  "s",
230        &locale_name ,&len ) == FAILURE)
231    {
232        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
233                "locale_set_default: unable to parse input params", 0 TSRMLS_CC );
234
235        RETURN_FALSE;
236    }
237
238    if(len == 0) {
239        locale_name =  (char *)uloc_getDefault() ;
240        len = strlen(locale_name);
241    }
242
243    zend_alter_ini_entry(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME), locale_name, len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
244
245    RETURN_TRUE;
246}
247/* }}} */
248
249/* {{{
250* Gets the value from ICU
251* common code shared by get_primary_language,get_script or get_region or get_variant
252* result = 0 if error, 1 if successful , -1 if no value
253*/
254static char* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
255{
256    char*       tag_value   = NULL;
257    int32_t         tag_value_len   = 512;
258
259    int     singletonPos    = 0;
260    char*           mod_loc_name    = NULL;
261    int         grOffset    = 0;
262
263    int32_t         buflen          = 512;
264    UErrorCode      status          = U_ZERO_ERROR;
265
266
267    if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
268        /* Handle  grandfathered languages */
269        grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
270        if( grOffset >= 0 ){
271            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
272                return estrdup(loc_name);
273            } else {
274                /* Since Grandfathered , no value , do nothing , retutn NULL */
275                return NULL;
276            }
277        }
278
279    if( fromParseLocale==1 ){
280        /* Handle singletons */
281        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
282            if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
283                return estrdup(loc_name);
284            }
285        }
286
287        singletonPos = getSingletonPos( loc_name );
288        if( singletonPos == 0){
289            /* singleton at start of script, region , variant etc.
290             * or invalid singleton at start of language */
291            return NULL;
292        } else if( singletonPos > 0 ){
293            /* singleton at some position except at start
294             * strip off the singleton and rest of the loc_name */
295            mod_loc_name = estrndup ( loc_name , singletonPos-1);
296        }
297    } /* end of if fromParse */
298
299    } /* end of if != LOC_CANONICAL_TAG */
300
301    if( mod_loc_name == NULL){
302        mod_loc_name = estrdup(loc_name );
303    }
304
305    /* Proceed to ICU */
306    do{
307        tag_value = erealloc( tag_value , buflen  );
308        tag_value_len = buflen;
309
310        if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
311            buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
312        }
313        if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
314            buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
315        }
316        if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
317            buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
318        }
319        if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
320            buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
321        }
322        if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
323            buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
324        }
325
326        if( U_FAILURE( status ) ) {
327            if( status == U_BUFFER_OVERFLOW_ERROR ) {
328                status = U_ZERO_ERROR;
329                continue;
330            }
331
332            /* Error in retriving data */
333            *result = 0;
334            if( tag_value ){
335                efree( tag_value );
336            }
337            if( mod_loc_name ){
338                efree( mod_loc_name);
339            }
340            return NULL;
341        }
342    } while( buflen > tag_value_len );
343
344    if(  buflen ==0 ){
345        /* No value found */
346        *result = -1;
347        if( tag_value ){
348            efree( tag_value );
349        }
350        if( mod_loc_name ){
351            efree( mod_loc_name);
352        }
353        return NULL;
354    } else {
355        *result = 1;
356    }
357
358    if( mod_loc_name ){
359        efree( mod_loc_name);
360    }
361    return tag_value;
362}
363/* }}} */
364
365/* {{{
366* Gets the value from ICU , called when PHP userspace function is called
367* common code shared by get_primary_language,get_script or get_region or get_variant
368*/
369static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
370{
371
372    const char* loc_name            = NULL;
373    int         loc_name_len        = 0;
374
375    char*       tag_value       = NULL;
376    char*       empty_result    = "";
377
378    int         result          = 0;
379    char*       msg             = NULL;
380
381    UErrorCode  status              = U_ZERO_ERROR;
382
383    intl_error_reset( NULL TSRMLS_CC );
384
385    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
386    &loc_name ,&loc_name_len ) == FAILURE) {
387        spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
388        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
389        efree(msg);
390
391        RETURN_FALSE;
392    }
393
394    if(loc_name_len == 0) {
395        loc_name = intl_locale_get_default(TSRMLS_C);
396    }
397
398    /* Call ICU get */
399    tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
400
401    /* No value found */
402    if( result == -1 ) {
403        if( tag_value){
404            efree( tag_value);
405        }
406        RETURN_STRING( empty_result , TRUE);
407    }
408
409    /* value found */
410    if( tag_value){
411        RETURN_STRING( tag_value , FALSE);
412    }
413
414    /* Error encountered while fetching the value */
415    if( result ==0) {
416        spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
417        intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
418        efree(msg);
419        RETURN_NULL();
420    }
421
422}
423/* }}} */
424
425/* {{{ proto static string Locale::getScript($locale)
426 * gets the script for the $locale
427 }}} */
428/* {{{ proto static string locale_get_script($locale)
429 * gets the script for the $locale
430 */
431PHP_FUNCTION( locale_get_script )
432{
433    get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
434}
435/* }}} */
436
437/* {{{ proto static string Locale::getRegion($locale)
438 * gets the region for the $locale
439 }}} */
440/* {{{ proto static string locale_get_region($locale)
441 * gets the region for the $locale
442 */
443PHP_FUNCTION( locale_get_region )
444{
445    get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
446}
447/* }}} */
448
449/* {{{ proto static string Locale::getPrimaryLanguage($locale)
450 * gets the primary language for the $locale
451 }}} */
452/* {{{ proto static string locale_get_primary_language($locale)
453 * gets the primary language for the $locale
454 */
455PHP_FUNCTION(locale_get_primary_language )
456{
457    get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
458}
459/* }}} */
460
461
462/* {{{
463 * common code shared by display_xyz functions to  get the value from ICU
464 }}} */
465static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
466{
467    const char* loc_name            = NULL;
468    int         loc_name_len        = 0;
469
470    const char* disp_loc_name       = NULL;
471    int         disp_loc_name_len   = 0;
472    int         free_loc_name       = 0;
473
474    UChar*      disp_name       = NULL;
475    int32_t     disp_name_len   = 0;
476
477    char*       mod_loc_name        = NULL;
478
479    int32_t     buflen              = 512;
480    UErrorCode  status              = U_ZERO_ERROR;
481
482    char*       utf8value       = NULL;
483    int         utf8value_len       = 0;
484
485    char*       msg                 = NULL;
486    int         grOffset        = 0;
487
488    intl_error_reset( NULL TSRMLS_CC );
489
490    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
491        &loc_name, &loc_name_len ,
492        &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
493    {
494        spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
495        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
496        efree(msg);
497        RETURN_FALSE;
498    }
499
500    if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
501        /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
502        spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
503        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
504        efree(msg);
505        RETURN_FALSE;
506    }
507
508    if(loc_name_len == 0) {
509        loc_name = intl_locale_get_default(TSRMLS_C);
510    }
511
512    if( strcmp(tag_name, DISP_NAME) != 0 ){
513        /* Handle grandfathered languages */
514        grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
515        if( grOffset >= 0 ){
516            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
517                mod_loc_name = getPreferredTag( loc_name );
518            } else {
519                /* Since Grandfathered, no value, do nothing, retutn NULL */
520                RETURN_FALSE;
521            }
522        }
523    } /* end of if != LOC_CANONICAL_TAG */
524
525    if( mod_loc_name==NULL ){
526        mod_loc_name = estrdup( loc_name );
527    }
528
529    /* Check if disp_loc_name passed , if not use default locale */
530    if( !disp_loc_name){
531        disp_loc_name = estrdup(intl_locale_get_default(TSRMLS_C));
532        free_loc_name = 1;
533    }
534
535    /* Get the disp_value for the given locale */
536    do{
537        disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
538        disp_name_len = buflen;
539
540        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
541            buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
542        } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
543            buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
544        } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
545            buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
546        } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
547            buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
548        } else if( strcmp(tag_name , DISP_NAME)==0 ){
549            buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
550        }
551
552        /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
553        if( U_FAILURE( status ) )
554        {
555            if( status == U_BUFFER_OVERFLOW_ERROR )
556            {
557                status = U_ZERO_ERROR;
558                continue;
559            }
560
561            spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
562            intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
563            efree(msg);
564            if( disp_name){
565                efree( disp_name );
566            }
567            if( mod_loc_name){
568                efree( mod_loc_name );
569            }
570            if (free_loc_name) {
571                efree((void *)disp_loc_name);
572                disp_loc_name = NULL;
573            }
574            RETURN_FALSE;
575        }
576    } while( buflen > disp_name_len );
577
578    if( mod_loc_name){
579        efree( mod_loc_name );
580    }
581    if (free_loc_name) {
582        efree((void *)disp_loc_name);
583        disp_loc_name = NULL;
584    }
585    /* Convert display locale name from UTF-16 to UTF-8. */
586    intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
587    efree( disp_name );
588    if( U_FAILURE( status ) )
589    {
590        spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
591        intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
592        efree(msg);
593        RETURN_FALSE;
594    }
595
596    RETVAL_STRINGL( utf8value, utf8value_len , FALSE);
597
598}
599/* }}} */
600
601/* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
602* gets the name for the $locale in $in_locale or default_locale
603 }}} */
604/* {{{ proto static string get_display_name($locale[, $in_locale = null])
605* gets the name for the $locale in $in_locale or default_locale
606*/
607PHP_FUNCTION(locale_get_display_name)
608{
609    get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
610}
611/* }}} */
612
613/* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
614* gets the language for the $locale in $in_locale or default_locale
615 }}} */
616/* {{{ proto static string get_display_language($locale[, $in_locale = null])
617* gets the language for the $locale in $in_locale or default_locale
618*/
619PHP_FUNCTION(locale_get_display_language)
620{
621    get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
622}
623/* }}} */
624
625/* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
626* gets the script for the $locale in $in_locale or default_locale
627 }}} */
628/* {{{ proto static string get_display_script($locale, $in_locale = null)
629* gets the script for the $locale in $in_locale or default_locale
630*/
631PHP_FUNCTION(locale_get_display_script)
632{
633    get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
634}
635/* }}} */
636
637/* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
638* gets the region for the $locale in $in_locale or default_locale
639 }}} */
640/* {{{ proto static string get_display_region($locale, $in_locale = null)
641* gets the region for the $locale in $in_locale or default_locale
642*/
643PHP_FUNCTION(locale_get_display_region)
644{
645    get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
646}
647/* }}} */
648
649/* {{{
650* proto static string Locale::getDisplayVariant($locale, $in_locale = null)
651* gets the variant for the $locale in $in_locale or default_locale
652 }}} */
653/* {{{
654* proto static string get_display_variant($locale, $in_locale = null)
655* gets the variant for the $locale in $in_locale or default_locale
656*/
657PHP_FUNCTION(locale_get_display_variant)
658{
659    get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
660}
661/* }}} */
662
663 /* {{{ proto static array getKeywords(string $locale) {
664 * return an associative array containing keyword-value
665 * pairs for this locale. The keys are keys to the array (doh!)
666 * }}}*/
667 /* {{{ proto static array locale_get_keywords(string $locale) {
668 * return an associative array containing keyword-value
669 * pairs for this locale. The keys are keys to the array (doh!)
670 */
671PHP_FUNCTION( locale_get_keywords )
672{
673    UEnumeration*   e        = NULL;
674    UErrorCode      status   = U_ZERO_ERROR;
675
676    const char*     kw_key        = NULL;
677    int32_t         kw_key_len    = 0;
678
679    const char*         loc_name        = NULL;
680    int             loc_name_len    = 0;
681
682/*
683    ICU expects the buffer to be allocated  before calling the function
684    and so the buffer size has been explicitly specified
685    ICU uloc.h #define  ULOC_KEYWORD_AND_VALUES_CAPACITY   100
686    hence the kw_value buffer size is 100
687*/
688    char*       kw_value        = NULL;
689    int32_t     kw_value_len    = 100;
690
691    intl_error_reset( NULL TSRMLS_CC );
692
693    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
694        &loc_name, &loc_name_len ) == FAILURE)
695    {
696        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
697             "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
698
699        RETURN_FALSE;
700    }
701
702    if(loc_name_len == 0) {
703        loc_name = intl_locale_get_default(TSRMLS_C);
704    }
705
706    /* Get the keywords */
707    e = uloc_openKeywords( loc_name, &status );
708    if( e != NULL )
709    {
710        /* Traverse it, filling the return array. */
711        array_init( return_value );
712
713        while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
714            kw_value = ecalloc( 1 , kw_value_len  );
715
716            /* Get the keyword value for each keyword */
717            kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len ,  &status );
718            if (status == U_BUFFER_OVERFLOW_ERROR) {
719                status = U_ZERO_ERROR;
720                kw_value = erealloc( kw_value , kw_value_len+1);
721                kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 ,  &status );
722            } else if(!U_FAILURE(status)) {
723                kw_value = erealloc( kw_value , kw_value_len+1);
724            }
725            if (U_FAILURE(status)) {
726                    intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 TSRMLS_CC );
727                if( kw_value){
728                    efree( kw_value );
729                }
730                zval_dtor(return_value);
731                RETURN_FALSE;
732            }
733
734            add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len, 0);
735        } /* end of while */
736
737    } /* end of if e!=NULL */
738
739    uenum_close( e );
740}
741/* }}} */
742
743 /* {{{ proto static string Locale::canonicalize($locale)
744 * @return string the canonicalized locale
745 * }}} */
746 /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
747 * @param string $locale    The locale string to canonicalize
748 */
749PHP_FUNCTION(locale_canonicalize)
750{
751    get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
752}
753/* }}} */
754
755/* {{{ append_key_value
756* Internal function which is called from locale_compose
757* gets the value for the key_name and appends to the loc_name
758* returns 1 if successful , -1 if not found ,
759* 0 if array element is not a string , -2 if buffer-overflow
760*/
761static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
762{
763    zval**  ele_value   = NULL;
764
765    if(zend_hash_find(hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
766        if(Z_TYPE_PP(ele_value)!= IS_STRING ){
767            /* element value is not a string */
768            return FAILURE;
769        }
770        if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
771           strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
772            /* not lang or grandfathered tag */
773            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
774        }
775        smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
776        return SUCCESS;
777    }
778
779    return LOC_NOT_FOUND;
780}
781/* }}} */
782
783/* {{{ append_prefix , appends the prefix needed
784* e.g. private adds 'x'
785*/
786static void add_prefix(smart_str* loc_name, char* key_name)
787{
788    if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
789        smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
790        smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
791    }
792}
793/* }}} */
794
795/* {{{ append_multiple_key_values
796* Internal function which is called from locale_compose
797* gets the multiple values for the key_name and appends to the loc_name
798* used for 'variant','extlang','private'
799* returns 1 if successful , -1 if not found ,
800* 0 if array element is not a string , -2 if buffer-overflow
801*/
802static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
803{
804    zval**  ele_value       = NULL;
805    int     i       = 0;
806    int     isFirstSubtag   = 0;
807    int     max_value   = 0;
808
809    /* Variant/ Extlang/Private etc. */
810    if( zend_hash_find( hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
811        if( Z_TYPE_PP(ele_value) == IS_STRING ){
812            add_prefix( loc_name , key_name);
813
814            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
815            smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
816            return SUCCESS;
817        } else if(Z_TYPE_PP(ele_value) == IS_ARRAY ) {
818            HashPosition pos;
819            HashTable *arr = HASH_OF(*ele_value);
820            zval **data = NULL;
821
822            zend_hash_internal_pointer_reset_ex(arr, &pos);
823            while(zend_hash_get_current_data_ex(arr, (void **)&data, &pos) != FAILURE) {
824                if(Z_TYPE_PP(data) != IS_STRING) {
825                    return FAILURE;
826                }
827                if (isFirstSubtag++ == 0){
828                    add_prefix(loc_name , key_name);
829                }
830                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
831                smart_str_appendl(loc_name, Z_STRVAL_PP(data) , Z_STRLEN_PP(data));
832                zend_hash_move_forward_ex(arr, &pos);
833            }
834            return SUCCESS;
835        } else {
836            return FAILURE;
837        }
838    } else {
839        char cur_key_name[31];
840        /* Decide the max_value: the max. no. of elements allowed */
841        if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
842            max_value  = MAX_NO_VARIANT;
843        }
844        if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
845            max_value  = MAX_NO_EXTLANG;
846        }
847        if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
848            max_value  = MAX_NO_PRIVATE;
849        }
850
851        /* Multiple variant values as variant0, variant1 ,variant2 */
852        isFirstSubtag = 0;
853        for( i=0 ; i< max_value; i++ ){
854            snprintf( cur_key_name , 30, "%s%d", key_name , i);
855            if( zend_hash_find( hash_arr , cur_key_name , strlen(cur_key_name) + 1,(void **)&ele_value ) == SUCCESS ){
856                if( Z_TYPE_PP(ele_value)!= IS_STRING ){
857                    /* variant is not a string */
858                    return FAILURE;
859                }
860                /* Add the contents */
861                if (isFirstSubtag++ == 0){
862                    add_prefix(loc_name , cur_key_name);
863                }
864                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
865                smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
866            }
867        } /* end of for */
868    } /* end of else */
869
870    return SUCCESS;
871}
872/* }}} */
873
874/*{{{
875* If applicable sets error message and aborts locale_compose gracefully
876* returns 0  if locale_compose needs to be aborted
877* otherwise returns 1
878*/
879static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
880{
881    intl_error_reset( NULL TSRMLS_CC );
882    if( result == FAILURE) {
883        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
884             "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
885        smart_str_free(loc_name);
886        return 0;
887    }
888    return 1;
889}
890/* }}} */
891
892#define RETURN_SMART_STR(s) smart_str_0((s)); RETURN_STRINGL((s)->c, (s)->len, 0)
893/* {{{ proto static string Locale::composeLocale($array)
894* Creates a locale by combining the parts of locale-ID passed
895* }}} */
896/* {{{ proto static string compose_locale($array)
897* Creates a locale by combining the parts of locale-ID passed
898* }}} */
899PHP_FUNCTION(locale_compose)
900{
901    smart_str       loc_name_s = {0};
902    smart_str *loc_name = &loc_name_s;
903    zval*           arr = NULL;
904    HashTable*      hash_arr = NULL;
905    int             result = 0;
906
907    intl_error_reset( NULL TSRMLS_CC );
908
909    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
910        &arr) == FAILURE)
911    {
912        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
913             "locale_compose: unable to parse input params", 0 TSRMLS_CC );
914        RETURN_FALSE;
915    }
916
917    hash_arr = HASH_OF( arr );
918
919    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
920        RETURN_FALSE;
921
922    /* Check for grandfathered first */
923    result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
924    if( result == SUCCESS){
925        RETURN_SMART_STR(loc_name);
926    }
927    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
928        RETURN_FALSE;
929    }
930
931    /* Not grandfathered */
932    result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
933    if( result == LOC_NOT_FOUND ){
934        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
935        "locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
936        smart_str_free(loc_name);
937        RETURN_FALSE;
938    }
939    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
940        RETURN_FALSE;
941    }
942
943    /* Extlang */
944    result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
945    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
946        RETURN_FALSE;
947    }
948
949    /* Script */
950    result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
951    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
952        RETURN_FALSE;
953    }
954
955    /* Region */
956    result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
957    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
958        RETURN_FALSE;
959    }
960
961    /* Variant */
962    result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC);
963    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
964        RETURN_FALSE;
965    }
966
967    /* Private */
968    result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
969    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
970        RETURN_FALSE;
971    }
972
973    RETURN_SMART_STR(loc_name);
974}
975/* }}} */
976
977
978/*{{{
979* Parses the locale and returns private subtags  if existing
980* else returns NULL
981* e.g. for locale='en_US-x-prv1-prv2-prv3'
982* returns a pointer to the string 'prv1-prv2-prv3'
983*/
984static char* get_private_subtags(const char* loc_name)
985{
986    char*   result =NULL;
987    int     singletonPos = 0;
988    int     len =0;
989    const char*     mod_loc_name =NULL;
990
991    if( loc_name && (len = strlen(loc_name)>0 ) ){
992        mod_loc_name = loc_name ;
993        len   = strlen(mod_loc_name);
994        while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
995
996            if( singletonPos!=-1){
997                if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
998                    /* private subtag start found */
999                    if( singletonPos + 2 ==  len){
1000                        /* loc_name ends with '-x-' ; return  NULL */
1001                    }
1002                    else{
1003                        /* result = mod_loc_name + singletonPos +2; */
1004                        result = estrndup(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ) );
1005                    }
1006                    break;
1007                }
1008                else{
1009                    if( singletonPos + 1 >=  len){
1010                        /* String end */
1011                        break;
1012                    } else {
1013                        /* singleton found but not a private subtag , hence check further in the string for the private subtag */
1014                        mod_loc_name = mod_loc_name + singletonPos +1;
1015                        len = strlen(mod_loc_name);
1016                    }
1017                }
1018            }
1019
1020        } /* end of while */
1021    }
1022
1023    return result;
1024}
1025/* }}} */
1026
1027/* {{{ code used by locale_parse
1028*/
1029static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
1030{
1031    char*   key_value   = NULL;
1032    char*   cur_key_name    = NULL;
1033    char*   token           = NULL;
1034    char*   last_ptr    = NULL;
1035
1036    int result      = 0;
1037    int     cur_result      = 0;
1038    int     cnt         = 0;
1039
1040
1041    if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1042        key_value = get_private_subtags( loc_name );
1043        result = 1;
1044    } else {
1045        key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1046    }
1047    if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1048        ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1049        if( result > 0 && key_value){
1050            /* Tokenize on the "_" or "-"  */
1051            token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
1052            if( cur_key_name ){
1053                efree( cur_key_name);
1054            }
1055            cur_key_name = (char*)ecalloc( 25,  25);
1056            sprintf( cur_key_name , "%s%d", key_name , cnt++);
1057            add_assoc_string( hash_arr, cur_key_name , token ,TRUE );
1058            /* tokenize on the "_" or "-" and stop  at singleton if any */
1059            while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1060                sprintf( cur_key_name , "%s%d", key_name , cnt++);
1061                add_assoc_string( hash_arr, cur_key_name , token , TRUE );
1062            }
1063/*
1064            if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1065            }
1066*/
1067        }
1068    } else {
1069        if( result == 1 ){
1070            add_assoc_string( hash_arr, key_name , key_value , TRUE );
1071            cur_result = 1;
1072        }
1073    }
1074
1075    if( cur_key_name ){
1076        efree( cur_key_name);
1077    }
1078    /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1079    if( key_value){
1080        efree(key_value);
1081    }
1082    return cur_result;
1083}
1084/* }}} */
1085
1086/* {{{ proto static array Locale::parseLocale($locale)
1087* parses a locale-id into an array the different parts of it
1088 }}} */
1089/* {{{ proto static array parse_locale($locale)
1090* parses a locale-id into an array the different parts of it
1091*/
1092PHP_FUNCTION(locale_parse)
1093{
1094    const char* loc_name        = NULL;
1095    int         loc_name_len    = 0;
1096    int         grOffset        = 0;
1097
1098    intl_error_reset( NULL TSRMLS_CC );
1099
1100    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1101        &loc_name, &loc_name_len ) == FAILURE)
1102    {
1103        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1104             "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1105
1106        RETURN_FALSE;
1107    }
1108
1109    if(loc_name_len == 0) {
1110        loc_name = intl_locale_get_default(TSRMLS_C);
1111    }
1112
1113    array_init( return_value );
1114
1115    grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1116    if( grOffset >= 0 ){
1117        add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG , estrdup(loc_name) ,FALSE );
1118    }
1119    else{
1120        /* Not grandfathered */
1121        add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
1122        add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
1123        add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
1124        add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
1125        add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
1126    }
1127}
1128/* }}} */
1129
1130/* {{{ proto static array Locale::getAllVariants($locale)
1131* gets an array containing the list of variants, or null
1132 }}} */
1133/* {{{ proto static array locale_get_all_variants($locale)
1134* gets an array containing the list of variants, or null
1135*/
1136PHP_FUNCTION(locale_get_all_variants)
1137{
1138    const char*     loc_name        = NULL;
1139    int         loc_name_len    = 0;
1140
1141    int result      = 0;
1142    char*   token       = NULL;
1143    char*   variant     = NULL;
1144    char*   saved_ptr   = NULL;
1145
1146    intl_error_reset( NULL TSRMLS_CC );
1147
1148    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1149    &loc_name, &loc_name_len ) == FAILURE)
1150    {
1151        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1152         "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1153
1154        RETURN_FALSE;
1155    }
1156
1157    if(loc_name_len == 0) {
1158        loc_name = intl_locale_get_default(TSRMLS_C);
1159    }
1160
1161
1162    array_init( return_value );
1163
1164    /* If the locale is grandfathered, stop, no variants */
1165    if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1166        /* ("Grandfathered Tag. No variants."); */
1167    }
1168    else {
1169    /* Call ICU variant */
1170        variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1171        if( result > 0 && variant){
1172            /* Tokenize on the "_" or "-" */
1173            token = php_strtok_r( variant , DELIMITER , &saved_ptr);
1174            add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1175            /* tokenize on the "_" or "-" and stop  at singleton if any */
1176            while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1177                add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1178            }
1179        }
1180        if( variant ){
1181            efree( variant );
1182        }
1183    }
1184
1185
1186}
1187/* }}} */
1188
1189/*{{{
1190* Converts to lower case and also replaces all hyphens with the underscore
1191*/
1192static int strToMatch(const char* str ,char *retstr)
1193{
1194    char*   anchor  = NULL;
1195    const char*     anchor1 = NULL;
1196    int     result  = 0;
1197
1198    if( (!str) || str[0] == '\0'){
1199        return result;
1200    } else {
1201    anchor = retstr;
1202    anchor1 = str;
1203        while( (*str)!='\0' ){
1204        if( *str == '-' ){
1205            *retstr =  '_';
1206        } else {
1207            *retstr = tolower(*str);
1208        }
1209            str++;
1210            retstr++;
1211    }
1212    *retstr = '\0';
1213    retstr=  anchor;
1214    str=  anchor1;
1215    result = 1;
1216    }
1217
1218    return(result);
1219}
1220/* }}} */
1221
1222/* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1223* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1224*/
1225/* }}} */
1226/* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1227* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1228*/
1229PHP_FUNCTION(locale_filter_matches)
1230{
1231    char*           lang_tag        = NULL;
1232    int             lang_tag_len    = 0;
1233    const char*     loc_range       = NULL;
1234    int             loc_range_len   = 0;
1235
1236    int     result      = 0;
1237    char*       token       = 0;
1238    char*       chrcheck    = NULL;
1239
1240    char*           can_lang_tag    = NULL;
1241    char*           can_loc_range   = NULL;
1242
1243    char*           cur_lang_tag    = NULL;
1244    char*           cur_loc_range   = NULL;
1245
1246    zend_bool   boolCanonical   = 0;
1247    UErrorCode  status      = U_ZERO_ERROR;
1248
1249    intl_error_reset( NULL TSRMLS_CC );
1250
1251    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
1252        &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1253        &boolCanonical) == FAILURE)
1254    {
1255        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1256        "locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
1257
1258        RETURN_FALSE;
1259    }
1260
1261    if(loc_range_len == 0) {
1262        loc_range = intl_locale_get_default(TSRMLS_C);
1263    }
1264
1265    if( strcmp(loc_range,"*")==0){
1266        RETURN_TRUE;
1267    }
1268
1269    if( boolCanonical ){
1270        /* canonicalize loc_range */
1271        can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1272        if( result ==0) {
1273            intl_error_set( NULL, status,
1274                "locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
1275            RETURN_FALSE;
1276        }
1277
1278        /* canonicalize lang_tag */
1279        can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1280        if( result ==0) {
1281            intl_error_set( NULL, status,
1282                "locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
1283            RETURN_FALSE;
1284        }
1285
1286        /* Convert to lower case for case-insensitive comparison */
1287        cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1288
1289        /* Convert to lower case for case-insensitive comparison */
1290        result = strToMatch( can_lang_tag , cur_lang_tag);
1291        if( result == 0) {
1292            efree( cur_lang_tag );
1293            efree( can_lang_tag );
1294            RETURN_FALSE;
1295        }
1296
1297        cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1298        result = strToMatch( can_loc_range , cur_loc_range );
1299        if( result == 0) {
1300            efree( cur_lang_tag );
1301            efree( can_lang_tag );
1302            efree( cur_loc_range );
1303            efree( can_loc_range );
1304            RETURN_FALSE;
1305        }
1306
1307        /* check if prefix */
1308        token   = strstr( cur_lang_tag , cur_loc_range );
1309
1310        if( token && (token==cur_lang_tag) ){
1311            /* check if the char. after match is SEPARATOR */
1312            chrcheck = token + (strlen(cur_loc_range));
1313            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1314                if( cur_lang_tag){
1315                    efree( cur_lang_tag );
1316                }
1317                if( cur_loc_range){
1318                    efree( cur_loc_range );
1319                }
1320                if( can_lang_tag){
1321                    efree( can_lang_tag );
1322                }
1323                if( can_loc_range){
1324                    efree( can_loc_range );
1325                }
1326                RETURN_TRUE;
1327            }
1328        }
1329
1330        /* No prefix as loc_range */
1331        if( cur_lang_tag){
1332            efree( cur_lang_tag );
1333        }
1334        if( cur_loc_range){
1335            efree( cur_loc_range );
1336        }
1337        if( can_lang_tag){
1338            efree( can_lang_tag );
1339        }
1340        if( can_loc_range){
1341            efree( can_loc_range );
1342        }
1343        RETURN_FALSE;
1344
1345    } /* end of if isCanonical */
1346    else{
1347        /* Convert to lower case for case-insensitive comparison */
1348        cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1349
1350        result = strToMatch( lang_tag , cur_lang_tag);
1351        if( result == 0) {
1352            efree( cur_lang_tag );
1353            RETURN_FALSE;
1354        }
1355        cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1356        result = strToMatch( loc_range , cur_loc_range );
1357        if( result == 0) {
1358            efree( cur_lang_tag );
1359            efree( cur_loc_range );
1360            RETURN_FALSE;
1361        }
1362
1363        /* check if prefix */
1364        token   = strstr( cur_lang_tag , cur_loc_range );
1365
1366        if( token && (token==cur_lang_tag) ){
1367            /* check if the char. after match is SEPARATOR */
1368            chrcheck = token + (strlen(cur_loc_range));
1369            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1370                if( cur_lang_tag){
1371                    efree( cur_lang_tag );
1372                }
1373                if( cur_loc_range){
1374                    efree( cur_loc_range );
1375                }
1376                RETURN_TRUE;
1377            }
1378        }
1379
1380        /* No prefix as loc_range */
1381        if( cur_lang_tag){
1382            efree( cur_lang_tag );
1383        }
1384        if( cur_loc_range){
1385            efree( cur_loc_range );
1386        }
1387        RETURN_FALSE;
1388
1389    }
1390}
1391/* }}} */
1392
1393static void array_cleanup( char* arr[] , int arr_size)
1394{
1395    int i=0;
1396    for( i=0; i< arr_size; i++ ){
1397        if( arr[i*2] ){
1398            efree( arr[i*2]);
1399        }
1400    }
1401    efree(arr);
1402}
1403
1404#define LOOKUP_CLEAN_RETURN(value)  array_cleanup(cur_arr, cur_arr_len); return (value)
1405/* {{{
1406* returns the lookup result to lookup_loc_range_src_php
1407* internal function
1408*/
1409static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize  TSRMLS_DC)
1410{
1411    int i = 0;
1412    int cur_arr_len = 0;
1413    int result = 0;
1414
1415    char* lang_tag = NULL;
1416    zval** ele_value = NULL;
1417    char** cur_arr = NULL;
1418
1419    char* cur_loc_range = NULL;
1420    char* can_loc_range = NULL;
1421    int saved_pos = 0;
1422
1423    char* return_value = NULL;
1424
1425    cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1426    /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1427    for(zend_hash_internal_pointer_reset(hash_arr);
1428        zend_hash_has_more_elements(hash_arr) == SUCCESS;
1429        zend_hash_move_forward(hash_arr)) {
1430
1431        if (zend_hash_get_current_data(hash_arr, (void**)&ele_value) == FAILURE) {
1432            /* Should never actually fail since the key is known to exist.*/
1433            continue;
1434        }
1435        if(Z_TYPE_PP(ele_value)!= IS_STRING) {
1436            /* element value is not a string */
1437            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
1438            LOOKUP_CLEAN_RETURN(NULL);
1439        }
1440        cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_PP(ele_value), Z_STRLEN_PP(ele_value));
1441        result = strToMatch(Z_STRVAL_PP(ele_value), cur_arr[cur_arr_len*2]);
1442        if(result == 0) {
1443            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
1444            LOOKUP_CLEAN_RETURN(NULL);
1445        }
1446        cur_arr[cur_arr_len*2+1] = Z_STRVAL_PP(ele_value);
1447        cur_arr_len++ ;
1448    } /* end of for */
1449
1450    /* Canonicalize array elements */
1451    if(canonicalize) {
1452        for(i=0; i<cur_arr_len; i++) {
1453            lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1454            if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1455                if(lang_tag) {
1456                    efree(lang_tag);
1457                }
1458                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1459                LOOKUP_CLEAN_RETURN(NULL);
1460            }
1461            cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1462            result = strToMatch(lang_tag, cur_arr[i*2]);
1463            efree(lang_tag);
1464            if(result == 0) {
1465                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1466                LOOKUP_CLEAN_RETURN(NULL);
1467            }
1468        }
1469
1470    }
1471
1472    if(canonicalize) {
1473        /* Canonicalize the loc_range */
1474        can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1475        if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1476            /* Error */
1477            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
1478            if(can_loc_range) {
1479                efree(can_loc_range);
1480            }
1481            LOOKUP_CLEAN_RETURN(NULL);
1482        } else {
1483            loc_range = can_loc_range;
1484        }
1485    }
1486
1487    cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1488    /* convert to lower and replace hyphens */
1489    result = strToMatch(loc_range, cur_loc_range);
1490    if(can_loc_range) {
1491        efree(can_loc_range);
1492    }
1493    if(result == 0) {
1494        intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1495        LOOKUP_CLEAN_RETURN(NULL);
1496    }
1497
1498    /* Lookup for the lang_tag match */
1499    saved_pos = strlen(cur_loc_range);
1500    while(saved_pos > 0) {
1501        for(i=0; i< cur_arr_len; i++){
1502            if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1503                /* Match found */
1504                return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
1505                efree(cur_loc_range);
1506                LOOKUP_CLEAN_RETURN(return_value);
1507            }
1508        }
1509        saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1510    }
1511
1512    /* Match not found */
1513    efree(cur_loc_range);
1514    LOOKUP_CLEAN_RETURN(NULL);
1515}
1516/* }}} */
1517
1518/* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1519* Searchs the items in $langtag for the best match to the language
1520* range
1521*/
1522/* }}} */
1523/* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1524* Searchs the items in $langtag for the best match to the language
1525* range
1526*/
1527PHP_FUNCTION(locale_lookup)
1528{
1529    char*       fallback_loc        = NULL;
1530    int         fallback_loc_len    = 0;
1531    const char*     loc_range           = NULL;
1532    int         loc_range_len       = 0;
1533
1534    zval*       arr             = NULL;
1535    HashTable*  hash_arr        = NULL;
1536    zend_bool   boolCanonical   = 0;
1537    char*       result          =NULL;
1538
1539    intl_error_reset( NULL TSRMLS_CC );
1540
1541    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
1542        &boolCanonical, &fallback_loc, &fallback_loc_len) == FAILURE) {
1543        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_lookup: unable to parse input params", 0 TSRMLS_CC );
1544        RETURN_FALSE;
1545    }
1546
1547    if(loc_range_len == 0) {
1548        loc_range = intl_locale_get_default(TSRMLS_C);
1549    }
1550
1551    hash_arr = HASH_OF(arr);
1552
1553    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1554        RETURN_EMPTY_STRING();
1555    }
1556
1557    result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
1558    if(result == NULL || result[0] == '\0') {
1559        if( fallback_loc ) {
1560            result = estrndup(fallback_loc, fallback_loc_len);
1561        } else {
1562            RETURN_EMPTY_STRING();
1563        }
1564    }
1565
1566    RETVAL_STRINGL(result, strlen(result), 0);
1567}
1568/* }}} */
1569
1570/* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1571* Tries to find out best available locale based on HTTP �Accept-Language� header
1572*/
1573/* }}} */
1574/* {{{ proto string locale_accept_from_http(string $http_accept)
1575* Tries to find out best available locale based on HTTP �Accept-Language� header
1576*/
1577PHP_FUNCTION(locale_accept_from_http)
1578{
1579    UEnumeration *available;
1580    char *http_accept = NULL;
1581    int http_accept_len;
1582    UErrorCode status = 0;
1583    int len;
1584    char resultLocale[INTL_MAX_LOCALE_LEN+1];
1585    UAcceptResult outResult;
1586
1587    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
1588    {
1589        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1590        "locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
1591        RETURN_FALSE;
1592    }
1593
1594    available = ures_openAvailableLocales(NULL, &status);
1595    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1596    len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1597                        &outResult, http_accept, available, &status);
1598    uenum_close(available);
1599    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1600    if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1601        RETURN_FALSE;
1602    }
1603    RETURN_STRINGL(resultLocale, len, 1);
1604}
1605/* }}} */
1606
1607/*
1608 * Local variables:
1609 * tab-width: 4
1610 * c-basic-offset: 4
1611 * End:
1612 * vim600: noet sw=4 ts=4 fdm=marker
1613 * vim<600: noet sw=4 ts=4
1614 *can_loc_len
1615*/
1616