1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | http://www.php.net/license/3_01.txt                                  |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
14   +----------------------------------------------------------------------+
15*/
16
17/* $Id$ */
18
19#ifdef HAVE_CONFIG_H
20#include "config.h"
21#endif
22
23#include <unicode/ustring.h>
24#include <unicode/udata.h>
25#include <unicode/putil.h>
26#include <unicode/ures.h>
27
28#include "php_intl.h"
29#include "locale.h"
30#include "locale_class.h"
31#include "locale_methods.h"
32#include "intl_convert.h"
33#include "intl_data.h"
34
35#include <zend_API.h>
36#include <zend.h>
37#include <php.h>
38#include "main/php_ini.h"
39#include "ext/standard/php_smart_str.h"
40
41ZEND_EXTERN_MODULE_GLOBALS( intl )
42
43/* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44#define SEPARATOR "_"
45#define SEPARATOR1 "-"
46#define DELIMITER "-_"
47#define EXTLANG_PREFIX "a"
48#define PRIVATE_PREFIX "x"
49#define DISP_NAME "name"
50
51#define MAX_NO_VARIANT  15
52#define MAX_NO_EXTLANG  3
53#define MAX_NO_PRIVATE  15
54#define MAX_NO_LOOKUP_LANG_TAG  100
55
56#define LOC_NOT_FOUND 1
57
58/* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59#define VARIANT_KEYNAME_LEN  11
60#define EXTLANG_KEYNAME_LEN  10
61#define PRIVATE_KEYNAME_LEN  11
62
63/* Based on IANA registry at the time of writing this code
64*
65*/
66static const char * const LOC_GRANDFATHERED[] = {
67    "art-lojban",       "i-klingon",        "i-lux",            "i-navajo",     "no-bok",       "no-nyn",
68    "cel-gaulish",      "en-GB-oed",        "i-ami",
69    "i-bnn",        "i-default",        "i-enochian",
70    "i-mingo",      "i-pwn",        "i-tao",
71    "i-tay",        "i-tsu",        "sgn-BE-fr",
72    "sgn-BE-nl",        "sgn-CH-de",        "zh-cmn",
73    "zh-cmn-Hans",      "zh-cmn-Hant",      "zh-gan" ,
74    "zh-guoyu",         "zh-hakka",         "zh-min",
75    "zh-min-nan",       "zh-wuu",       "zh-xiang",
76    "zh-yue",       NULL
77};
78
79/* Based on IANA registry at the time of writing this code
80*  This array lists the preferred values for the grandfathered tags if applicable
81*  This is in sync with the array LOC_GRANDFATHERED
82*  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
83*/
84static const int        LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85static const char * const   LOC_PREFERRED_GRANDFATHERED[]  = {
86    "jbo",          "tlh",          "lb",
87    "nv",           "nb",           "nn",
88    NULL
89};
90
91/*returns TRUE if a is an ID separator FALSE otherwise*/
92#define isIDSeparator(a) (a == '_' || a == '-')
93#define isKeywordSeparator(a) (a == '@' )
94#define isEndOfTag(a) (a == '\0' )
95
96#define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97
98/*returns TRUE if one of the special prefixes is here (s=string)
99  'x-' or 'i-' */
100#define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101#define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102
103/* Dot terminates it because of POSIX form  where dot precedes the codepage
104 * except for variant */
105#define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
106
107/* {{{ return the offset of 'key' in the array 'list'.
108 * returns -1 if not present */
109static int16_t findOffset(const char* const* list, const char* key)
110{
111    const char* const* anchor = list;
112    while (*list != NULL) {
113        if (strcmp(key, *list) == 0) {
114            return (int16_t)(list - anchor);
115        }
116        list++;
117    }
118
119    return -1;
120
121}
122/*}}}*/
123
124static char* getPreferredTag(const char* gf_tag)
125{
126    char* result = NULL;
127    int grOffset = 0;
128
129    grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130    if(grOffset < 0) {
131        return NULL;
132    }
133    if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
134        /* return preferred tag */
135        result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
136    } else {
137        /* Return correct grandfathered language tag */
138        result = estrdup( LOC_GRANDFATHERED[grOffset] );
139    }
140    return result;
141}
142
143/* {{{
144* returns the position of next token for lookup
145* or -1 if no token
146* strtokr equivalent search for token in reverse direction
147*/
148static int getStrrtokenPos(char* str, int savedPos)
149{
150    int result =-1;
151    int i;
152
153    for(i=savedPos-1; i>=0; i--) {
154        if(isIDSeparator(*(str+i)) ){
155            /* delimiter found; check for singleton */
156            if(i>=2 && isIDSeparator(*(str+i-2)) ){
157                /* a singleton; so send the position of token before the singleton */
158                result = i-2;
159            } else {
160                result = i;
161            }
162            break;
163        }
164    }
165    if(result < 1){
166        /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
167        result =-1;
168    }
169    return result;
170}
171/* }}} */
172
173/* {{{
174* returns the position of a singleton if present
175* returns -1 if no singleton
176* strtok equivalent search for singleton
177*/
178static int getSingletonPos(const char* str)
179{
180    int result =-1;
181    int i=0;
182    int len = 0;
183
184    if( str && ((len=strlen(str))>0) ){
185        for( i=0; i<len ; i++){
186            if( isIDSeparator(*(str+i)) ){
187                if( i==1){
188                    /* string is of the form x-avy or a-prv1 */
189                    result =0;
190                    break;
191                } else {
192                    /* delimiter found; check for singleton */
193                    if( isIDSeparator(*(str+i+2)) ){
194                        /* a singleton; so send the position of separator before singleton */
195                        result = i+1;
196                        break;
197                    }
198                }
199            }
200        }/* end of for */
201
202    }
203    return result;
204}
205/* }}} */
206
207/* {{{ proto static string Locale::getDefault(  )
208   Get default locale */
209/* }}} */
210/* {{{ proto static string locale_get_default( )
211   Get default locale */
212PHP_NAMED_FUNCTION(zif_locale_get_default)
213{
214    RETURN_STRING( intl_locale_get_default( TSRMLS_C ), TRUE );
215}
216
217/* }}} */
218
219/* {{{ proto static string Locale::setDefault( string $locale )
220   Set default locale */
221/* }}} */
222/* {{{ proto static string locale_set_default( string $locale )
223   Set default locale */
224PHP_NAMED_FUNCTION(zif_locale_set_default)
225{
226    char* locale_name = NULL;
227    int   len=0;
228
229    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC,  "s",
230        &locale_name ,&len ) == FAILURE)
231    {
232        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
233                "locale_set_default: unable to parse input params", 0 TSRMLS_CC );
234
235        RETURN_FALSE;
236    }
237
238    if(len == 0) {
239        locale_name =  (char *)uloc_getDefault() ;
240        len = strlen(locale_name);
241    }
242
243    zend_alter_ini_entry(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME), locale_name, len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
244
245    RETURN_TRUE;
246}
247/* }}} */
248
249/* {{{
250* Gets the value from ICU
251* common code shared by get_primary_language,get_script or get_region or get_variant
252* result = 0 if error, 1 if successful , -1 if no value
253*/
254static char* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
255{
256    char*       tag_value   = NULL;
257    int32_t         tag_value_len   = 512;
258
259    int     singletonPos    = 0;
260    char*           mod_loc_name    = NULL;
261    int         grOffset    = 0;
262
263    int32_t         buflen          = 512;
264    UErrorCode      status          = U_ZERO_ERROR;
265
266
267    if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
268        /* Handle  grandfathered languages */
269        grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
270        if( grOffset >= 0 ){
271            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
272                return estrdup(loc_name);
273            } else {
274                /* Since Grandfathered , no value , do nothing , retutn NULL */
275                return NULL;
276            }
277        }
278
279    if( fromParseLocale==1 ){
280        /* Handle singletons */
281        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
282            if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
283                return estrdup(loc_name);
284            }
285        }
286
287        singletonPos = getSingletonPos( loc_name );
288        if( singletonPos == 0){
289            /* singleton at start of script, region , variant etc.
290             * or invalid singleton at start of language */
291            return NULL;
292        } else if( singletonPos > 0 ){
293            /* singleton at some position except at start
294             * strip off the singleton and rest of the loc_name */
295            mod_loc_name = estrndup ( loc_name , singletonPos-1);
296        }
297    } /* end of if fromParse */
298
299    } /* end of if != LOC_CANONICAL_TAG */
300
301    if( mod_loc_name == NULL){
302        mod_loc_name = estrdup(loc_name );
303    }
304
305    /* Proceed to ICU */
306    do{
307        tag_value = erealloc( tag_value , buflen  );
308        tag_value_len = buflen;
309
310        if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
311            buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
312        }
313        if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
314            buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
315        }
316        if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
317            buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
318        }
319        if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
320            buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
321        }
322        if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
323            buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
324        }
325
326        if( U_FAILURE( status ) ) {
327            if( status == U_BUFFER_OVERFLOW_ERROR ) {
328                status = U_ZERO_ERROR;
329                continue;
330            }
331
332            /* Error in retriving data */
333            *result = 0;
334            if( tag_value ){
335                efree( tag_value );
336            }
337            if( mod_loc_name ){
338                efree( mod_loc_name);
339            }
340            return NULL;
341        }
342    } while( buflen > tag_value_len );
343
344    if(  buflen ==0 ){
345        /* No value found */
346        *result = -1;
347        if( tag_value ){
348            efree( tag_value );
349        }
350        if( mod_loc_name ){
351            efree( mod_loc_name);
352        }
353        return NULL;
354    } else {
355        *result = 1;
356    }
357
358    if( mod_loc_name ){
359        efree( mod_loc_name);
360    }
361    return tag_value;
362}
363/* }}} */
364
365/* {{{
366* Gets the value from ICU , called when PHP userspace function is called
367* common code shared by get_primary_language,get_script or get_region or get_variant
368*/
369static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
370{
371
372    const char* loc_name            = NULL;
373    int         loc_name_len        = 0;
374
375    char*       tag_value       = NULL;
376    char*       empty_result    = "";
377
378    int         result          = 0;
379    char*       msg             = NULL;
380
381    UErrorCode  status              = U_ZERO_ERROR;
382
383    intl_error_reset( NULL TSRMLS_CC );
384
385    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
386    &loc_name ,&loc_name_len ) == FAILURE) {
387        spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
388        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
389        efree(msg);
390
391        RETURN_FALSE;
392    }
393
394    if(loc_name_len == 0) {
395        loc_name = intl_locale_get_default(TSRMLS_C);
396    }
397
398    /* Call ICU get */
399    tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
400
401    /* No value found */
402    if( result == -1 ) {
403        if( tag_value){
404            efree( tag_value);
405        }
406        RETURN_STRING( empty_result , TRUE);
407    }
408
409    /* value found */
410    if( tag_value){
411        RETURN_STRING( tag_value , FALSE);
412    }
413
414    /* Error encountered while fetching the value */
415    if( result ==0) {
416        spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
417        intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
418        efree(msg);
419        RETURN_NULL();
420    }
421
422}
423/* }}} */
424
425/* {{{ proto static string Locale::getScript($locale)
426 * gets the script for the $locale
427 }}} */
428/* {{{ proto static string locale_get_script($locale)
429 * gets the script for the $locale
430 */
431PHP_FUNCTION( locale_get_script )
432{
433    get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
434}
435/* }}} */
436
437/* {{{ proto static string Locale::getRegion($locale)
438 * gets the region for the $locale
439 }}} */
440/* {{{ proto static string locale_get_region($locale)
441 * gets the region for the $locale
442 */
443PHP_FUNCTION( locale_get_region )
444{
445    get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
446}
447/* }}} */
448
449/* {{{ proto static string Locale::getPrimaryLanguage($locale)
450 * gets the primary language for the $locale
451 }}} */
452/* {{{ proto static string locale_get_primary_language($locale)
453 * gets the primary language for the $locale
454 */
455PHP_FUNCTION(locale_get_primary_language )
456{
457    get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
458}
459/* }}} */
460
461
462/* {{{
463 * common code shared by display_xyz functions to  get the value from ICU
464 }}} */
465static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
466{
467    const char* loc_name            = NULL;
468    int         loc_name_len        = 0;
469
470    const char* disp_loc_name       = NULL;
471    int         disp_loc_name_len   = 0;
472    int         free_loc_name       = 0;
473
474    UChar*      disp_name       = NULL;
475    int32_t     disp_name_len   = 0;
476
477    char*       mod_loc_name        = NULL;
478
479    int32_t     buflen              = 512;
480    UErrorCode  status              = U_ZERO_ERROR;
481
482    char*       utf8value       = NULL;
483    int         utf8value_len       = 0;
484
485    char*       msg                 = NULL;
486    int         grOffset        = 0;
487
488    intl_error_reset( NULL TSRMLS_CC );
489
490    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
491        &loc_name, &loc_name_len ,
492        &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
493    {
494        spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
495        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
496        efree(msg);
497        RETURN_FALSE;
498    }
499
500    if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
501        /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
502        spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
503        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
504        efree(msg);
505        RETURN_FALSE;
506    }
507
508    if(loc_name_len == 0) {
509        loc_name = intl_locale_get_default(TSRMLS_C);
510    }
511
512    if( strcmp(tag_name, DISP_NAME) != 0 ){
513        /* Handle grandfathered languages */
514        grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
515        if( grOffset >= 0 ){
516            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
517                mod_loc_name = getPreferredTag( loc_name );
518            } else {
519                /* Since Grandfathered, no value, do nothing, retutn NULL */
520                RETURN_FALSE;
521            }
522        }
523    } /* end of if != LOC_CANONICAL_TAG */
524
525    if( mod_loc_name==NULL ){
526        mod_loc_name = estrdup( loc_name );
527    }
528
529    /* Check if disp_loc_name passed , if not use default locale */
530    if( !disp_loc_name){
531        disp_loc_name = estrdup(intl_locale_get_default(TSRMLS_C));
532        free_loc_name = 1;
533    }
534
535    /* Get the disp_value for the given locale */
536    do{
537        disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
538        disp_name_len = buflen;
539
540        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
541            buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
542        } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
543            buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
544        } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
545            buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
546        } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
547            buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
548        } else if( strcmp(tag_name , DISP_NAME)==0 ){
549            buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
550        }
551
552        /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
553        if( U_FAILURE( status ) )
554        {
555            if( status == U_BUFFER_OVERFLOW_ERROR )
556            {
557                status = U_ZERO_ERROR;
558                continue;
559            }
560
561            spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
562            intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
563            efree(msg);
564            if( disp_name){
565                efree( disp_name );
566            }
567            if( mod_loc_name){
568                efree( mod_loc_name );
569            }
570            if (free_loc_name) {
571                efree((void *)disp_loc_name);
572                disp_loc_name = NULL;
573            }
574            RETURN_FALSE;
575        }
576    } while( buflen > disp_name_len );
577
578    if( mod_loc_name){
579        efree( mod_loc_name );
580    }
581    if (free_loc_name) {
582        efree((void *)disp_loc_name);
583        disp_loc_name = NULL;
584    }
585    /* Convert display locale name from UTF-16 to UTF-8. */
586    intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
587    efree( disp_name );
588    if( U_FAILURE( status ) )
589    {
590        spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
591        intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
592        efree(msg);
593        RETURN_FALSE;
594    }
595
596    RETVAL_STRINGL( utf8value, utf8value_len , FALSE);
597
598}
599/* }}} */
600
601/* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
602* gets the name for the $locale in $in_locale or default_locale
603 }}} */
604/* {{{ proto static string get_display_name($locale[, $in_locale = null])
605* gets the name for the $locale in $in_locale or default_locale
606*/
607PHP_FUNCTION(locale_get_display_name)
608{
609    get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
610}
611/* }}} */
612
613/* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
614* gets the language for the $locale in $in_locale or default_locale
615 }}} */
616/* {{{ proto static string get_display_language($locale[, $in_locale = null])
617* gets the language for the $locale in $in_locale or default_locale
618*/
619PHP_FUNCTION(locale_get_display_language)
620{
621    get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
622}
623/* }}} */
624
625/* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
626* gets the script for the $locale in $in_locale or default_locale
627 }}} */
628/* {{{ proto static string get_display_script($locale, $in_locale = null)
629* gets the script for the $locale in $in_locale or default_locale
630*/
631PHP_FUNCTION(locale_get_display_script)
632{
633    get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
634}
635/* }}} */
636
637/* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
638* gets the region for the $locale in $in_locale or default_locale
639 }}} */
640/* {{{ proto static string get_display_region($locale, $in_locale = null)
641* gets the region for the $locale in $in_locale or default_locale
642*/
643PHP_FUNCTION(locale_get_display_region)
644{
645    get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
646}
647/* }}} */
648
649/* {{{
650* proto static string Locale::getDisplayVariant($locale, $in_locale = null)
651* gets the variant for the $locale in $in_locale or default_locale
652 }}} */
653/* {{{
654* proto static string get_display_variant($locale, $in_locale = null)
655* gets the variant for the $locale in $in_locale or default_locale
656*/
657PHP_FUNCTION(locale_get_display_variant)
658{
659    get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
660}
661/* }}} */
662
663 /* {{{ proto static array getKeywords(string $locale) {
664 * return an associative array containing keyword-value
665 * pairs for this locale. The keys are keys to the array (doh!)
666 * }}}*/
667 /* {{{ proto static array locale_get_keywords(string $locale) {
668 * return an associative array containing keyword-value
669 * pairs for this locale. The keys are keys to the array (doh!)
670 */
671PHP_FUNCTION( locale_get_keywords )
672{
673    UEnumeration*   e        = NULL;
674    UErrorCode      status   = U_ZERO_ERROR;
675
676    const char*     kw_key        = NULL;
677    int32_t         kw_key_len    = 0;
678
679    const char*         loc_name        = NULL;
680    int             loc_name_len    = 0;
681
682/*
683    ICU expects the buffer to be allocated  before calling the function
684    and so the buffer size has been explicitly specified
685    ICU uloc.h #define  ULOC_KEYWORD_AND_VALUES_CAPACITY   100
686    hence the kw_value buffer size is 100
687*/
688    char*       kw_value        = NULL;
689    int32_t     kw_value_len    = 100;
690
691    intl_error_reset( NULL TSRMLS_CC );
692
693    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
694        &loc_name, &loc_name_len ) == FAILURE)
695    {
696        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
697             "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
698
699        RETURN_FALSE;
700    }
701
702    if(loc_name_len == 0) {
703        loc_name = intl_locale_get_default(TSRMLS_C);
704    }
705
706    /* Get the keywords */
707    e = uloc_openKeywords( loc_name, &status );
708    if( e != NULL )
709    {
710        /* Traverse it, filling the return array. */
711        array_init( return_value );
712
713        while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
714            kw_value = ecalloc( 1 , kw_value_len  );
715
716            /* Get the keyword value for each keyword */
717            kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len ,  &status );
718            if (status == U_BUFFER_OVERFLOW_ERROR) {
719                status = U_ZERO_ERROR;
720                kw_value = erealloc( kw_value , kw_value_len+1);
721                kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 ,  &status );
722            } else if(!U_FAILURE(status)) {
723                kw_value = erealloc( kw_value , kw_value_len+1);
724            }
725            if (U_FAILURE(status)) {
726                    intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 TSRMLS_CC );
727                if( kw_value){
728                    efree( kw_value );
729                }
730                zval_dtor(return_value);
731                RETURN_FALSE;
732            }
733
734            add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len, 0);
735        } /* end of while */
736
737    } /* end of if e!=NULL */
738
739    uenum_close( e );
740}
741/* }}} */
742
743 /* {{{ proto static string Locale::canonicalize($locale)
744 * @return string the canonicalized locale
745 * }}} */
746 /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
747 * @param string $locale    The locale string to canonicalize
748 */
749PHP_FUNCTION(locale_canonicalize)
750{
751    get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
752}
753/* }}} */
754
755/* {{{ append_key_value
756* Internal function which is called from locale_compose
757* gets the value for the key_name and appends to the loc_name
758* returns 1 if successful , -1 if not found ,
759* 0 if array element is not a string , -2 if buffer-overflow
760*/
761static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
762{
763    zval**  ele_value   = NULL;
764
765    if(zend_hash_find(hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
766        if(Z_TYPE_PP(ele_value)!= IS_STRING ){
767            /* element value is not a string */
768            return FAILURE;
769        }
770        if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
771           strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
772            /* not lang or grandfathered tag */
773            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
774        }
775        smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
776        return SUCCESS;
777    }
778
779    return LOC_NOT_FOUND;
780}
781/* }}} */
782
783/* {{{ append_prefix , appends the prefix needed
784* e.g. private adds 'x'
785*/
786static void add_prefix(smart_str* loc_name, char* key_name)
787{
788    if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
789        smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
790        smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
791    }
792}
793/* }}} */
794
795/* {{{ append_multiple_key_values
796* Internal function which is called from locale_compose
797* gets the multiple values for the key_name and appends to the loc_name
798* used for 'variant','extlang','private'
799* returns 1 if successful , -1 if not found ,
800* 0 if array element is not a string , -2 if buffer-overflow
801*/
802static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
803{
804    zval**  ele_value       = NULL;
805    int     i       = 0;
806    int     isFirstSubtag   = 0;
807    int     max_value   = 0;
808
809    /* Variant/ Extlang/Private etc. */
810    if( zend_hash_find( hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
811        if( Z_TYPE_PP(ele_value) == IS_STRING ){
812            add_prefix( loc_name , key_name);
813
814            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
815            smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
816            return SUCCESS;
817        } else if(Z_TYPE_PP(ele_value) == IS_ARRAY ) {
818            HashPosition pos;
819            HashTable *arr = HASH_OF(*ele_value);
820            zval **data = NULL;
821
822            zend_hash_internal_pointer_reset_ex(arr, &pos);
823            while(zend_hash_get_current_data_ex(arr, (void **)&data, &pos) != FAILURE) {
824                if(Z_TYPE_PP(data) != IS_STRING) {
825                    return FAILURE;
826                }
827                if (isFirstSubtag++ == 0){
828                    add_prefix(loc_name , key_name);
829                }
830                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
831                smart_str_appendl(loc_name, Z_STRVAL_PP(data) , Z_STRLEN_PP(data));
832                zend_hash_move_forward_ex(arr, &pos);
833            }
834            return SUCCESS;
835        } else {
836            return FAILURE;
837        }
838    } else {
839        char cur_key_name[31];
840        /* Decide the max_value: the max. no. of elements allowed */
841        if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
842            max_value  = MAX_NO_VARIANT;
843        }
844        if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
845            max_value  = MAX_NO_EXTLANG;
846        }
847        if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
848            max_value  = MAX_NO_PRIVATE;
849        }
850
851        /* Multiple variant values as variant0, variant1 ,variant2 */
852        isFirstSubtag = 0;
853        for( i=0 ; i< max_value; i++ ){
854            snprintf( cur_key_name , 30, "%s%d", key_name , i);
855            if( zend_hash_find( hash_arr , cur_key_name , strlen(cur_key_name) + 1,(void **)&ele_value ) == SUCCESS ){
856                if( Z_TYPE_PP(ele_value)!= IS_STRING ){
857                    /* variant is not a string */
858                    return FAILURE;
859                }
860                /* Add the contents */
861                if (isFirstSubtag++ == 0){
862                    add_prefix(loc_name , cur_key_name);
863                }
864                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
865                smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
866            }
867        } /* end of for */
868    } /* end of else */
869
870    return SUCCESS;
871}
872/* }}} */
873
874/*{{{
875* If applicable sets error message and aborts locale_compose gracefully
876* returns 0  if locale_compose needs to be aborted
877* otherwise returns 1
878*/
879static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
880{
881    intl_error_reset( NULL TSRMLS_CC );
882    if( result == FAILURE) {
883        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
884             "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
885        smart_str_free(loc_name);
886        return 0;
887    }
888    return 1;
889}
890/* }}} */
891
892#define RETURN_SMART_STR(s) smart_str_0((s)); RETURN_STRINGL((s)->c, (s)->len, 0)
893/* {{{ proto static string Locale::composeLocale($array)
894* Creates a locale by combining the parts of locale-ID passed
895* }}} */
896/* {{{ proto static string compose_locale($array)
897* Creates a locale by combining the parts of locale-ID passed
898* }}} */
899PHP_FUNCTION(locale_compose)
900{
901    smart_str       loc_name_s = {0};
902    smart_str *loc_name = &loc_name_s;
903    zval*           arr = NULL;
904    HashTable*      hash_arr = NULL;
905    int             result = 0;
906
907    intl_error_reset( NULL TSRMLS_CC );
908
909    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
910        &arr) == FAILURE)
911    {
912        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
913             "locale_compose: unable to parse input params", 0 TSRMLS_CC );
914        RETURN_FALSE;
915    }
916
917    hash_arr = HASH_OF( arr );
918
919    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
920        RETURN_FALSE;
921
922    /* Check for grandfathered first */
923    result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
924    if( result == SUCCESS){
925        RETURN_SMART_STR(loc_name);
926    }
927    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
928        RETURN_FALSE;
929    }
930
931    /* Not grandfathered */
932    result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
933    if( result == LOC_NOT_FOUND ){
934        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
935        "locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
936        smart_str_free(loc_name);
937        RETURN_FALSE;
938    }
939    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
940        RETURN_FALSE;
941    }
942
943    /* Extlang */
944    result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
945    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
946        RETURN_FALSE;
947    }
948
949    /* Script */
950    result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
951    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
952        RETURN_FALSE;
953    }
954
955    /* Region */
956    result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
957    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
958        RETURN_FALSE;
959    }
960
961    /* Variant */
962    result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC);
963    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
964        RETURN_FALSE;
965    }
966
967    /* Private */
968    result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
969    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
970        RETURN_FALSE;
971    }
972
973    RETURN_SMART_STR(loc_name);
974}
975/* }}} */
976
977
978/*{{{
979* Parses the locale and returns private subtags  if existing
980* else returns NULL
981* e.g. for locale='en_US-x-prv1-prv2-prv3'
982* returns a pointer to the string 'prv1-prv2-prv3'
983*/
984static char* get_private_subtags(const char* loc_name)
985{
986    char*   result =NULL;
987    int     singletonPos = 0;
988    int     len =0;
989    const char*     mod_loc_name =NULL;
990
991    if( loc_name && (len = strlen(loc_name)>0 ) ){
992        mod_loc_name = loc_name ;
993        len   = strlen(mod_loc_name);
994        while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
995
996            if( singletonPos!=-1){
997                if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
998                    /* private subtag start found */
999                    if( singletonPos + 2 ==  len){
1000                        /* loc_name ends with '-x-' ; return  NULL */
1001                    }
1002                    else{
1003                        /* result = mod_loc_name + singletonPos +2; */
1004                        result = estrndup(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ) );
1005                    }
1006                    break;
1007                }
1008                else{
1009                    if( singletonPos + 1 >=  len){
1010                        /* String end */
1011                        break;
1012                    } else {
1013                        /* singleton found but not a private subtag , hence check further in the string for the private subtag */
1014                        mod_loc_name = mod_loc_name + singletonPos +1;
1015                        len = strlen(mod_loc_name);
1016                    }
1017                }
1018            }
1019
1020        } /* end of while */
1021    }
1022
1023    return result;
1024}
1025/* }}} */
1026
1027/* {{{ code used by locale_parse
1028*/
1029static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
1030{
1031    char*   key_value   = NULL;
1032    char*   cur_key_name    = NULL;
1033    char*   token           = NULL;
1034    char*   last_ptr    = NULL;
1035
1036    int result      = 0;
1037    int     cur_result      = 0;
1038    int     cnt         = 0;
1039
1040
1041    if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1042        key_value = get_private_subtags( loc_name );
1043        result = 1;
1044    } else {
1045        key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1046    }
1047    if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1048        ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1049        if( result > 0 && key_value){
1050            /* Tokenize on the "_" or "-"  */
1051            token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
1052            if( cur_key_name ){
1053                efree( cur_key_name);
1054            }
1055            cur_key_name = (char*)ecalloc( 25,  25);
1056            sprintf( cur_key_name , "%s%d", key_name , cnt++);
1057            add_assoc_string( hash_arr, cur_key_name , token ,TRUE );
1058            /* tokenize on the "_" or "-" and stop  at singleton if any */
1059            while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1060                sprintf( cur_key_name , "%s%d", key_name , cnt++);
1061                add_assoc_string( hash_arr, cur_key_name , token , TRUE );
1062            }
1063/*
1064            if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1065            }
1066*/
1067        }
1068    } else {
1069        if( result == 1 ){
1070            add_assoc_string( hash_arr, key_name , key_value , TRUE );
1071            cur_result = 1;
1072        }
1073    }
1074
1075    if( cur_key_name ){
1076        efree( cur_key_name);
1077    }
1078    /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1079    if( key_value){
1080        efree(key_value);
1081    }
1082    return cur_result;
1083}
1084/* }}} */
1085
1086/* {{{ proto static array Locale::parseLocale($locale)
1087* parses a locale-id into an array the different parts of it
1088 }}} */
1089/* {{{ proto static array parse_locale($locale)
1090* parses a locale-id into an array the different parts of it
1091*/
1092PHP_FUNCTION(locale_parse)
1093{
1094    const char* loc_name        = NULL;
1095    int         loc_name_len    = 0;
1096    int         grOffset        = 0;
1097
1098    intl_error_reset( NULL TSRMLS_CC );
1099
1100    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1101        &loc_name, &loc_name_len ) == FAILURE)
1102    {
1103        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1104             "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1105
1106        RETURN_FALSE;
1107    }
1108
1109    if(loc_name_len == 0) {
1110        loc_name = intl_locale_get_default(TSRMLS_C);
1111    }
1112
1113    array_init( return_value );
1114
1115    grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1116    if( grOffset >= 0 ){
1117        add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG , estrdup(loc_name) ,FALSE );
1118    }
1119    else{
1120        /* Not grandfathered */
1121        add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
1122        add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
1123        add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
1124        add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
1125        add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
1126    }
1127}
1128/* }}} */
1129
1130/* {{{ proto static array Locale::getAllVariants($locale)
1131* gets an array containing the list of variants, or null
1132 }}} */
1133/* {{{ proto static array locale_get_all_variants($locale)
1134* gets an array containing the list of variants, or null
1135*/
1136PHP_FUNCTION(locale_get_all_variants)
1137{
1138    const char*     loc_name        = NULL;
1139    int         loc_name_len    = 0;
1140
1141    int result      = 0;
1142    char*   token       = NULL;
1143    char*   variant     = NULL;
1144    char*   saved_ptr   = NULL;
1145
1146    intl_error_reset( NULL TSRMLS_CC );
1147
1148    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1149    &loc_name, &loc_name_len ) == FAILURE)
1150    {
1151        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1152         "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1153
1154        RETURN_FALSE;
1155    }
1156
1157    if(loc_name_len == 0) {
1158        loc_name = intl_locale_get_default(TSRMLS_C);
1159    }
1160
1161
1162    array_init( return_value );
1163
1164    /* If the locale is grandfathered, stop, no variants */
1165    if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1166        /* ("Grandfathered Tag. No variants."); */
1167    }
1168    else {
1169    /* Call ICU variant */
1170        variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1171        if( result > 0 && variant){
1172            /* Tokenize on the "_" or "-" */
1173            token = php_strtok_r( variant , DELIMITER , &saved_ptr);
1174            add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1175            /* tokenize on the "_" or "-" and stop  at singleton if any */
1176            while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1177                add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1178            }
1179        }
1180        if( variant ){
1181            efree( variant );
1182        }
1183    }
1184
1185
1186}
1187/* }}} */
1188
1189/*{{{
1190* Converts to lower case and also replaces all hyphens with the underscore
1191*/
1192static int strToMatch(const char* str ,char *retstr)
1193{
1194    char*   anchor  = NULL;
1195    const char*     anchor1 = NULL;
1196    int     result  = 0;
1197    int     len     = 0;
1198
1199    if( (!str) || str[0] == '\0'){
1200        return result;
1201    } else {
1202    anchor = retstr;
1203    anchor1 = str;
1204        len = strlen(str);
1205        while( (*str)!='\0' ){
1206        if( *str == '-' ){
1207            *retstr =  '_';
1208        } else {
1209            *retstr = tolower(*str);
1210        }
1211            str++;
1212            retstr++;
1213    }
1214    *retstr = '\0';
1215    retstr=  anchor;
1216    str=  anchor1;
1217    result = 1;
1218    }
1219
1220    return(result);
1221}
1222/* }}} */
1223
1224/* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1225* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1226*/
1227/* }}} */
1228/* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1229* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1230*/
1231PHP_FUNCTION(locale_filter_matches)
1232{
1233    char*           lang_tag        = NULL;
1234    int             lang_tag_len    = 0;
1235    const char*     loc_range       = NULL;
1236    int             loc_range_len   = 0;
1237
1238    int     result      = 0;
1239    char*       token       = 0;
1240    char*       chrcheck    = NULL;
1241
1242    char*           can_lang_tag    = NULL;
1243    char*           can_loc_range   = NULL;
1244
1245    char*           cur_lang_tag    = NULL;
1246    char*           cur_loc_range   = NULL;
1247
1248    zend_bool   boolCanonical   = 0;
1249    UErrorCode  status      = U_ZERO_ERROR;
1250
1251    intl_error_reset( NULL TSRMLS_CC );
1252
1253    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
1254        &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1255        &boolCanonical) == FAILURE)
1256    {
1257        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1258        "locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
1259
1260        RETURN_FALSE;
1261    }
1262
1263    if(loc_range_len == 0) {
1264        loc_range = intl_locale_get_default(TSRMLS_C);
1265    }
1266
1267    if( strcmp(loc_range,"*")==0){
1268        RETURN_TRUE;
1269    }
1270
1271    if( boolCanonical ){
1272        /* canonicalize loc_range */
1273        can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1274        if( result ==0) {
1275            intl_error_set( NULL, status,
1276                "locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
1277            RETURN_FALSE;
1278        }
1279
1280        /* canonicalize lang_tag */
1281        can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1282        if( result ==0) {
1283            intl_error_set( NULL, status,
1284                "locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
1285            RETURN_FALSE;
1286        }
1287
1288        /* Convert to lower case for case-insensitive comparison */
1289        cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1290
1291        /* Convert to lower case for case-insensitive comparison */
1292        result = strToMatch( can_lang_tag , cur_lang_tag);
1293        if( result == 0) {
1294            efree( cur_lang_tag );
1295            efree( can_lang_tag );
1296            RETURN_FALSE;
1297        }
1298
1299        cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1300        result = strToMatch( can_loc_range , cur_loc_range );
1301        if( result == 0) {
1302            efree( cur_lang_tag );
1303            efree( can_lang_tag );
1304            efree( cur_loc_range );
1305            efree( can_loc_range );
1306            RETURN_FALSE;
1307        }
1308
1309        /* check if prefix */
1310        token   = strstr( cur_lang_tag , cur_loc_range );
1311
1312        if( token && (token==cur_lang_tag) ){
1313            /* check if the char. after match is SEPARATOR */
1314            chrcheck = token + (strlen(cur_loc_range));
1315            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1316                if( cur_lang_tag){
1317                    efree( cur_lang_tag );
1318                }
1319                if( cur_loc_range){
1320                    efree( cur_loc_range );
1321                }
1322                if( can_lang_tag){
1323                    efree( can_lang_tag );
1324                }
1325                if( can_loc_range){
1326                    efree( can_loc_range );
1327                }
1328                RETURN_TRUE;
1329            }
1330        }
1331
1332        /* No prefix as loc_range */
1333        if( cur_lang_tag){
1334            efree( cur_lang_tag );
1335        }
1336        if( cur_loc_range){
1337            efree( cur_loc_range );
1338        }
1339        if( can_lang_tag){
1340            efree( can_lang_tag );
1341        }
1342        if( can_loc_range){
1343            efree( can_loc_range );
1344        }
1345        RETURN_FALSE;
1346
1347    } /* end of if isCanonical */
1348    else{
1349        /* Convert to lower case for case-insensitive comparison */
1350        cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1351
1352        result = strToMatch( lang_tag , cur_lang_tag);
1353        if( result == 0) {
1354            efree( cur_lang_tag );
1355            RETURN_FALSE;
1356        }
1357        cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1358        result = strToMatch( loc_range , cur_loc_range );
1359        if( result == 0) {
1360            efree( cur_lang_tag );
1361            efree( cur_loc_range );
1362            RETURN_FALSE;
1363        }
1364
1365        /* check if prefix */
1366        token   = strstr( cur_lang_tag , cur_loc_range );
1367
1368        if( token && (token==cur_lang_tag) ){
1369            /* check if the char. after match is SEPARATOR */
1370            chrcheck = token + (strlen(cur_loc_range));
1371            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1372                if( cur_lang_tag){
1373                    efree( cur_lang_tag );
1374                }
1375                if( cur_loc_range){
1376                    efree( cur_loc_range );
1377                }
1378                RETURN_TRUE;
1379            }
1380        }
1381
1382        /* No prefix as loc_range */
1383        if( cur_lang_tag){
1384            efree( cur_lang_tag );
1385        }
1386        if( cur_loc_range){
1387            efree( cur_loc_range );
1388        }
1389        RETURN_FALSE;
1390
1391    }
1392}
1393/* }}} */
1394
1395static void array_cleanup( char* arr[] , int arr_size)
1396{
1397    int i=0;
1398    for( i=0; i< arr_size; i++ ){
1399        if( arr[i*2] ){
1400            efree( arr[i*2]);
1401        }
1402    }
1403    efree(arr);
1404}
1405
1406#define LOOKUP_CLEAN_RETURN(value)  array_cleanup(cur_arr, cur_arr_len); return (value)
1407/* {{{
1408* returns the lookup result to lookup_loc_range_src_php
1409* internal function
1410*/
1411static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize  TSRMLS_DC)
1412{
1413    int i = 0;
1414    int cur_arr_len = 0;
1415    int result = 0;
1416
1417    char* lang_tag = NULL;
1418    zval** ele_value = NULL;
1419    char** cur_arr = NULL;
1420
1421    char* cur_loc_range = NULL;
1422    char* can_loc_range = NULL;
1423    int saved_pos = 0;
1424
1425    char* return_value = NULL;
1426
1427    cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1428    /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1429    for(zend_hash_internal_pointer_reset(hash_arr);
1430        zend_hash_has_more_elements(hash_arr) == SUCCESS;
1431        zend_hash_move_forward(hash_arr)) {
1432
1433        if (zend_hash_get_current_data(hash_arr, (void**)&ele_value) == FAILURE) {
1434            /* Should never actually fail since the key is known to exist.*/
1435            continue;
1436        }
1437        if(Z_TYPE_PP(ele_value)!= IS_STRING) {
1438            /* element value is not a string */
1439            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
1440            LOOKUP_CLEAN_RETURN(NULL);
1441        }
1442        cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_PP(ele_value), Z_STRLEN_PP(ele_value));
1443        result = strToMatch(Z_STRVAL_PP(ele_value), cur_arr[cur_arr_len*2]);
1444        if(result == 0) {
1445            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
1446            LOOKUP_CLEAN_RETURN(NULL);
1447        }
1448        cur_arr[cur_arr_len*2+1] = Z_STRVAL_PP(ele_value);
1449        cur_arr_len++ ;
1450    } /* end of for */
1451
1452    /* Canonicalize array elements */
1453    if(canonicalize) {
1454        for(i=0; i<cur_arr_len; i++) {
1455            lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1456            if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1457                if(lang_tag) {
1458                    efree(lang_tag);
1459                }
1460                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1461                LOOKUP_CLEAN_RETURN(NULL);
1462            }
1463            cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1464            result = strToMatch(lang_tag, cur_arr[i*2]);
1465            efree(lang_tag);
1466            if(result == 0) {
1467                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1468                LOOKUP_CLEAN_RETURN(NULL);
1469            }
1470        }
1471
1472    }
1473
1474    if(canonicalize) {
1475        /* Canonicalize the loc_range */
1476        can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1477        if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1478            /* Error */
1479            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
1480            if(can_loc_range) {
1481                efree(can_loc_range);
1482            }
1483            LOOKUP_CLEAN_RETURN(NULL);
1484        } else {
1485            loc_range = can_loc_range;
1486        }
1487    }
1488
1489    cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1490    /* convert to lower and replace hyphens */
1491    result = strToMatch(loc_range, cur_loc_range);
1492    if(can_loc_range) {
1493        efree(can_loc_range);
1494    }
1495    if(result == 0) {
1496        intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1497        LOOKUP_CLEAN_RETURN(NULL);
1498    }
1499
1500    /* Lookup for the lang_tag match */
1501    saved_pos = strlen(cur_loc_range);
1502    while(saved_pos > 0) {
1503        for(i=0; i< cur_arr_len; i++){
1504            if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1505                /* Match found */
1506                return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
1507                efree(cur_loc_range);
1508                LOOKUP_CLEAN_RETURN(return_value);
1509            }
1510        }
1511        saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1512    }
1513
1514    /* Match not found */
1515    efree(cur_loc_range);
1516    LOOKUP_CLEAN_RETURN(NULL);
1517}
1518/* }}} */
1519
1520/* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1521* Searchs the items in $langtag for the best match to the language
1522* range
1523*/
1524/* }}} */
1525/* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1526* Searchs the items in $langtag for the best match to the language
1527* range
1528*/
1529PHP_FUNCTION(locale_lookup)
1530{
1531    char*       fallback_loc        = NULL;
1532    int         fallback_loc_len    = 0;
1533    const char*     loc_range           = NULL;
1534    int         loc_range_len       = 0;
1535
1536    zval*       arr             = NULL;
1537    HashTable*  hash_arr        = NULL;
1538    zend_bool   boolCanonical   = 0;
1539    char*       result          =NULL;
1540
1541    intl_error_reset( NULL TSRMLS_CC );
1542
1543    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
1544        &boolCanonical, &fallback_loc, &fallback_loc_len) == FAILURE) {
1545        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_lookup: unable to parse input params", 0 TSRMLS_CC );
1546        RETURN_FALSE;
1547    }
1548
1549    if(loc_range_len == 0) {
1550        loc_range = intl_locale_get_default(TSRMLS_C);
1551    }
1552
1553    hash_arr = HASH_OF(arr);
1554
1555    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1556        RETURN_EMPTY_STRING();
1557    }
1558
1559    result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
1560    if(result == NULL || result[0] == '\0') {
1561        if( fallback_loc ) {
1562            result = estrndup(fallback_loc, fallback_loc_len);
1563        } else {
1564            RETURN_EMPTY_STRING();
1565        }
1566    }
1567
1568    RETVAL_STRINGL(result, strlen(result), 0);
1569}
1570/* }}} */
1571
1572/* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1573* Tries to find out best available locale based on HTTP �Accept-Language� header
1574*/
1575/* }}} */
1576/* {{{ proto string locale_accept_from_http(string $http_accept)
1577* Tries to find out best available locale based on HTTP �Accept-Language� header
1578*/
1579PHP_FUNCTION(locale_accept_from_http)
1580{
1581    UEnumeration *available;
1582    char *http_accept = NULL;
1583    int http_accept_len;
1584    UErrorCode status = 0;
1585    int len;
1586    char resultLocale[INTL_MAX_LOCALE_LEN+1];
1587    UAcceptResult outResult;
1588
1589    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
1590    {
1591        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1592        "locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
1593        RETURN_FALSE;
1594    }
1595
1596    available = ures_openAvailableLocales(NULL, &status);
1597    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1598    len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1599                        &outResult, http_accept, available, &status);
1600    uenum_close(available);
1601    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1602    if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1603        RETURN_FALSE;
1604    }
1605    RETURN_STRINGL(resultLocale, len, 1);
1606}
1607/* }}} */
1608
1609/*
1610 * Local variables:
1611 * tab-width: 4
1612 * c-basic-offset: 4
1613 * End:
1614 * vim600: noet sw=4 ts=4 fdm=marker
1615 * vim<600: noet sw=4 ts=4
1616 *can_loc_len
1617*/
1618