1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | http://www.php.net/license/3_01.txt                                  |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
14   +----------------------------------------------------------------------+
15*/
16
17/* $Id$ */
18
19#ifdef HAVE_CONFIG_H
20#include "config.h"
21#endif
22
23#include <unicode/ustring.h>
24#include <unicode/udata.h>
25#include <unicode/putil.h>
26#include <unicode/ures.h>
27
28#include "php_intl.h"
29#include "locale.h"
30#include "locale_class.h"
31#include "locale_methods.h"
32#include "intl_convert.h"
33#include "intl_data.h"
34
35#include <zend_API.h>
36#include <zend.h>
37#include <php.h>
38#include "main/php_ini.h"
39#include "ext/standard/php_smart_str.h"
40
41ZEND_EXTERN_MODULE_GLOBALS( intl )
42
43/* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44#define SEPARATOR "_"
45#define SEPARATOR1 "-"
46#define DELIMITER "-_"
47#define EXTLANG_PREFIX "a"
48#define PRIVATE_PREFIX "x"
49#define DISP_NAME "name"
50
51#define MAX_NO_VARIANT  15
52#define MAX_NO_EXTLANG  3
53#define MAX_NO_PRIVATE  15
54#define MAX_NO_LOOKUP_LANG_TAG  100
55
56#define LOC_NOT_FOUND 1
57
58/* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59#define VARIANT_KEYNAME_LEN  11
60#define EXTLANG_KEYNAME_LEN  10
61#define PRIVATE_KEYNAME_LEN  11
62
63/* Based on IANA registry at the time of writing this code
64*
65*/
66static const char * const LOC_GRANDFATHERED[] = {
67    "art-lojban",       "i-klingon",        "i-lux",            "i-navajo",     "no-bok",       "no-nyn",
68    "cel-gaulish",      "en-GB-oed",        "i-ami",
69    "i-bnn",        "i-default",        "i-enochian",
70    "i-mingo",      "i-pwn",        "i-tao",
71    "i-tay",        "i-tsu",        "sgn-BE-fr",
72    "sgn-BE-nl",        "sgn-CH-de",        "zh-cmn",
73    "zh-cmn-Hans",      "zh-cmn-Hant",      "zh-gan" ,
74    "zh-guoyu",         "zh-hakka",         "zh-min",
75    "zh-min-nan",       "zh-wuu",       "zh-xiang",
76    "zh-yue",       NULL
77};
78
79/* Based on IANA registry at the time of writing this code
80*  This array lists the preferred values for the grandfathered tags if applicable
81*  This is in sync with the array LOC_GRANDFATHERED
82*  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
83*/
84static const int        LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85static const char * const   LOC_PREFERRED_GRANDFATHERED[]  = {
86    "jbo",          "tlh",          "lb",
87    "nv",           "nb",           "nn",
88    NULL
89};
90
91/*returns TRUE if a is an ID separator FALSE otherwise*/
92#define isIDSeparator(a) (a == '_' || a == '-')
93#define isKeywordSeparator(a) (a == '@' )
94#define isEndOfTag(a) (a == '\0' )
95
96#define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97
98/*returns TRUE if one of the special prefixes is here (s=string)
99  'x-' or 'i-' */
100#define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101#define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102
103/* Dot terminates it because of POSIX form  where dot precedes the codepage
104 * except for variant */
105#define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
106
107/* {{{ return the offset of 'key' in the array 'list'.
108 * returns -1 if not present */
109static int16_t findOffset(const char* const* list, const char* key)
110{
111    const char* const* anchor = list;
112    while (*list != NULL) {
113        if (strcmp(key, *list) == 0) {
114            return (int16_t)(list - anchor);
115        }
116        list++;
117    }
118
119    return -1;
120
121}
122/*}}}*/
123
124static char* getPreferredTag(const char* gf_tag)
125{
126    char* result = NULL;
127    int grOffset = 0;
128
129    grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130    if(grOffset < 0) {
131        return NULL;
132    }
133    if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
134        /* return preferred tag */
135        result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
136    } else {
137        /* Return correct grandfathered language tag */
138        result = estrdup( LOC_GRANDFATHERED[grOffset] );
139    }
140    return result;
141}
142
143/* {{{
144* returns the position of next token for lookup
145* or -1 if no token
146* strtokr equivalent search for token in reverse direction
147*/
148static int getStrrtokenPos(char* str, int savedPos)
149{
150    int result =-1;
151    int i;
152
153    for(i=savedPos-1; i>=0; i--) {
154        if(isIDSeparator(*(str+i)) ){
155            /* delimiter found; check for singleton */
156            if(i>=2 && isIDSeparator(*(str+i-2)) ){
157                /* a singleton; so send the position of token before the singleton */
158                result = i-2;
159            } else {
160                result = i;
161            }
162            break;
163        }
164    }
165    if(result < 1){
166        /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
167        result =-1;
168    }
169    return result;
170}
171/* }}} */
172
173/* {{{
174* returns the position of a singleton if present
175* returns -1 if no singleton
176* strtok equivalent search for singleton
177*/
178static int getSingletonPos(const char* str)
179{
180    int result =-1;
181    int i=0;
182    int len = 0;
183
184    if( str && ((len=strlen(str))>0) ){
185        for( i=0; i<len ; i++){
186            if( isIDSeparator(*(str+i)) ){
187                if( i==1){
188                    /* string is of the form x-avy or a-prv1 */
189                    result =0;
190                    break;
191                } else {
192                    /* delimiter found; check for singleton */
193                    if( isIDSeparator(*(str+i+2)) ){
194                        /* a singleton; so send the position of separator before singleton */
195                        result = i+1;
196                        break;
197                    }
198                }
199            }
200        }/* end of for */
201
202    }
203    return result;
204}
205/* }}} */
206
207/* {{{ proto static string Locale::getDefault(  )
208   Get default locale */
209/* }}} */
210/* {{{ proto static string locale_get_default( )
211   Get default locale */
212PHP_NAMED_FUNCTION(zif_locale_get_default)
213{
214    RETURN_STRING( intl_locale_get_default( TSRMLS_C ), TRUE );
215}
216
217/* }}} */
218
219/* {{{ proto static string Locale::setDefault( string $locale )
220   Set default locale */
221/* }}} */
222/* {{{ proto static string locale_set_default( string $locale )
223   Set default locale */
224PHP_NAMED_FUNCTION(zif_locale_set_default)
225{
226    char* locale_name = NULL;
227    int   len=0;
228
229    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC,  "s",
230        &locale_name ,&len ) == FAILURE)
231    {
232        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
233                "locale_set_default: unable to parse input params", 0 TSRMLS_CC );
234
235        RETURN_FALSE;
236    }
237
238    if(len == 0) {
239        locale_name =  (char *)uloc_getDefault() ;
240        len = strlen(locale_name);
241    }
242
243    zend_alter_ini_entry(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME), locale_name, len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
244
245    RETURN_TRUE;
246}
247/* }}} */
248
249/* {{{
250* Gets the value from ICU
251* common code shared by get_primary_language,get_script or get_region or get_variant
252* result = 0 if error, 1 if successful , -1 if no value
253*/
254static char* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
255{
256    char*       tag_value   = NULL;
257    int32_t         tag_value_len   = 512;
258
259    int     singletonPos    = 0;
260    char*           mod_loc_name    = NULL;
261    int         grOffset    = 0;
262
263    int32_t         buflen          = 512;
264    UErrorCode      status          = U_ZERO_ERROR;
265
266
267    if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
268        /* Handle  grandfathered languages */
269        grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
270        if( grOffset >= 0 ){
271            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
272                return estrdup(loc_name);
273            } else {
274                /* Since Grandfathered , no value , do nothing , retutn NULL */
275                return NULL;
276            }
277        }
278
279    if( fromParseLocale==1 ){
280        /* Handle singletons */
281        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
282            if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
283                return estrdup(loc_name);
284            }
285        }
286
287        singletonPos = getSingletonPos( loc_name );
288        if( singletonPos == 0){
289            /* singleton at start of script, region , variant etc.
290             * or invalid singleton at start of language */
291            return NULL;
292        } else if( singletonPos > 0 ){
293            /* singleton at some position except at start
294             * strip off the singleton and rest of the loc_name */
295            mod_loc_name = estrndup ( loc_name , singletonPos-1);
296        }
297    } /* end of if fromParse */
298
299    } /* end of if != LOC_CANONICAL_TAG */
300
301    if( mod_loc_name == NULL){
302        mod_loc_name = estrdup(loc_name );
303    }
304
305    /* Proceed to ICU */
306    do{
307        tag_value = erealloc( tag_value , buflen  );
308        tag_value_len = buflen;
309
310        if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
311            buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
312        }
313        if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
314            buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
315        }
316        if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
317            buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
318        }
319        if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
320            buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
321        }
322        if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
323            buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
324        }
325
326        if( U_FAILURE( status ) ) {
327            if( status == U_BUFFER_OVERFLOW_ERROR ) {
328                status = U_ZERO_ERROR;
329                buflen++; /* add space for \0 */
330                continue;
331            }
332
333            /* Error in retriving data */
334            *result = 0;
335            if( tag_value ){
336                efree( tag_value );
337            }
338            if( mod_loc_name ){
339                efree( mod_loc_name);
340            }
341            return NULL;
342        }
343    } while( buflen > tag_value_len );
344
345    if(  buflen ==0 ){
346        /* No value found */
347        *result = -1;
348        if( tag_value ){
349            efree( tag_value );
350        }
351        if( mod_loc_name ){
352            efree( mod_loc_name);
353        }
354        return NULL;
355    } else {
356        *result = 1;
357    }
358
359    if( mod_loc_name ){
360        efree( mod_loc_name);
361    }
362    return tag_value;
363}
364/* }}} */
365
366/* {{{
367* Gets the value from ICU , called when PHP userspace function is called
368* common code shared by get_primary_language,get_script or get_region or get_variant
369*/
370static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
371{
372
373    const char* loc_name            = NULL;
374    int         loc_name_len        = 0;
375
376    char*       tag_value       = NULL;
377    char*       empty_result    = "";
378
379    int         result          = 0;
380    char*       msg             = NULL;
381
382    UErrorCode  status              = U_ZERO_ERROR;
383
384    intl_error_reset( NULL TSRMLS_CC );
385
386    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
387    &loc_name ,&loc_name_len ) == FAILURE) {
388        spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
389        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
390        efree(msg);
391
392        RETURN_FALSE;
393    }
394
395    if(loc_name_len == 0) {
396        loc_name = intl_locale_get_default(TSRMLS_C);
397    }
398
399    /* Call ICU get */
400    tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
401
402    /* No value found */
403    if( result == -1 ) {
404        if( tag_value){
405            efree( tag_value);
406        }
407        RETURN_STRING( empty_result , TRUE);
408    }
409
410    /* value found */
411    if( tag_value){
412        RETURN_STRING( tag_value , FALSE);
413    }
414
415    /* Error encountered while fetching the value */
416    if( result ==0) {
417        spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
418        intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
419        efree(msg);
420        RETURN_NULL();
421    }
422
423}
424/* }}} */
425
426/* {{{ proto static string Locale::getScript($locale)
427 * gets the script for the $locale
428 }}} */
429/* {{{ proto static string locale_get_script($locale)
430 * gets the script for the $locale
431 */
432PHP_FUNCTION( locale_get_script )
433{
434    get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
435}
436/* }}} */
437
438/* {{{ proto static string Locale::getRegion($locale)
439 * gets the region for the $locale
440 }}} */
441/* {{{ proto static string locale_get_region($locale)
442 * gets the region for the $locale
443 */
444PHP_FUNCTION( locale_get_region )
445{
446    get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
447}
448/* }}} */
449
450/* {{{ proto static string Locale::getPrimaryLanguage($locale)
451 * gets the primary language for the $locale
452 }}} */
453/* {{{ proto static string locale_get_primary_language($locale)
454 * gets the primary language for the $locale
455 */
456PHP_FUNCTION(locale_get_primary_language )
457{
458    get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
459}
460/* }}} */
461
462
463/* {{{
464 * common code shared by display_xyz functions to  get the value from ICU
465 }}} */
466static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
467{
468    const char* loc_name            = NULL;
469    int         loc_name_len        = 0;
470
471    const char* disp_loc_name       = NULL;
472    int         disp_loc_name_len   = 0;
473    int         free_loc_name       = 0;
474
475    UChar*      disp_name       = NULL;
476    int32_t     disp_name_len   = 0;
477
478    char*       mod_loc_name        = NULL;
479
480    int32_t     buflen              = 512;
481    UErrorCode  status              = U_ZERO_ERROR;
482
483    char*       utf8value       = NULL;
484    int         utf8value_len       = 0;
485
486    char*       msg                 = NULL;
487    int         grOffset        = 0;
488
489    intl_error_reset( NULL TSRMLS_CC );
490
491    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
492        &loc_name, &loc_name_len ,
493        &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
494    {
495        spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
496        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
497        efree(msg);
498        RETURN_FALSE;
499    }
500
501    if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
502        /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
503        spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
504        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
505        efree(msg);
506        RETURN_FALSE;
507    }
508
509    if(loc_name_len == 0) {
510        loc_name = intl_locale_get_default(TSRMLS_C);
511    }
512
513    if( strcmp(tag_name, DISP_NAME) != 0 ){
514        /* Handle grandfathered languages */
515        grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
516        if( grOffset >= 0 ){
517            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
518                mod_loc_name = getPreferredTag( loc_name );
519            } else {
520                /* Since Grandfathered, no value, do nothing, retutn NULL */
521                RETURN_FALSE;
522            }
523        }
524    } /* end of if != LOC_CANONICAL_TAG */
525
526    if( mod_loc_name==NULL ){
527        mod_loc_name = estrdup( loc_name );
528    }
529
530    /* Check if disp_loc_name passed , if not use default locale */
531    if( !disp_loc_name){
532        disp_loc_name = estrdup(intl_locale_get_default(TSRMLS_C));
533        free_loc_name = 1;
534    }
535
536    /* Get the disp_value for the given locale */
537    do{
538        disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
539        disp_name_len = buflen;
540
541        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
542            buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
543        } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
544            buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
545        } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
546            buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
547        } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
548            buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
549        } else if( strcmp(tag_name , DISP_NAME)==0 ){
550            buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
551        }
552
553        /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
554        if( U_FAILURE( status ) )
555        {
556            if( status == U_BUFFER_OVERFLOW_ERROR )
557            {
558                status = U_ZERO_ERROR;
559                continue;
560            }
561
562            spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
563            intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
564            efree(msg);
565            if( disp_name){
566                efree( disp_name );
567            }
568            if( mod_loc_name){
569                efree( mod_loc_name );
570            }
571            if (free_loc_name) {
572                efree((void *)disp_loc_name);
573                disp_loc_name = NULL;
574            }
575            RETURN_FALSE;
576        }
577    } while( buflen > disp_name_len );
578
579    if( mod_loc_name){
580        efree( mod_loc_name );
581    }
582    if (free_loc_name) {
583        efree((void *)disp_loc_name);
584        disp_loc_name = NULL;
585    }
586    /* Convert display locale name from UTF-16 to UTF-8. */
587    intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
588    efree( disp_name );
589    if( U_FAILURE( status ) )
590    {
591        spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
592        intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
593        efree(msg);
594        RETURN_FALSE;
595    }
596
597    RETVAL_STRINGL( utf8value, utf8value_len , FALSE);
598
599}
600/* }}} */
601
602/* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
603* gets the name for the $locale in $in_locale or default_locale
604 }}} */
605/* {{{ proto static string get_display_name($locale[, $in_locale = null])
606* gets the name for the $locale in $in_locale or default_locale
607*/
608PHP_FUNCTION(locale_get_display_name)
609{
610    get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
611}
612/* }}} */
613
614/* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
615* gets the language for the $locale in $in_locale or default_locale
616 }}} */
617/* {{{ proto static string get_display_language($locale[, $in_locale = null])
618* gets the language for the $locale in $in_locale or default_locale
619*/
620PHP_FUNCTION(locale_get_display_language)
621{
622    get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
623}
624/* }}} */
625
626/* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
627* gets the script for the $locale in $in_locale or default_locale
628 }}} */
629/* {{{ proto static string get_display_script($locale, $in_locale = null)
630* gets the script for the $locale in $in_locale or default_locale
631*/
632PHP_FUNCTION(locale_get_display_script)
633{
634    get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
635}
636/* }}} */
637
638/* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
639* gets the region for the $locale in $in_locale or default_locale
640 }}} */
641/* {{{ proto static string get_display_region($locale, $in_locale = null)
642* gets the region for the $locale in $in_locale or default_locale
643*/
644PHP_FUNCTION(locale_get_display_region)
645{
646    get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
647}
648/* }}} */
649
650/* {{{
651* proto static string Locale::getDisplayVariant($locale, $in_locale = null)
652* gets the variant for the $locale in $in_locale or default_locale
653 }}} */
654/* {{{
655* proto static string get_display_variant($locale, $in_locale = null)
656* gets the variant for the $locale in $in_locale or default_locale
657*/
658PHP_FUNCTION(locale_get_display_variant)
659{
660    get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
661}
662/* }}} */
663
664 /* {{{ proto static array getKeywords(string $locale) {
665 * return an associative array containing keyword-value
666 * pairs for this locale. The keys are keys to the array (doh!)
667 * }}}*/
668 /* {{{ proto static array locale_get_keywords(string $locale) {
669 * return an associative array containing keyword-value
670 * pairs for this locale. The keys are keys to the array (doh!)
671 */
672PHP_FUNCTION( locale_get_keywords )
673{
674    UEnumeration*   e        = NULL;
675    UErrorCode      status   = U_ZERO_ERROR;
676
677    const char*     kw_key        = NULL;
678    int32_t         kw_key_len    = 0;
679
680    const char*         loc_name        = NULL;
681    int             loc_name_len    = 0;
682
683/*
684    ICU expects the buffer to be allocated  before calling the function
685    and so the buffer size has been explicitly specified
686    ICU uloc.h #define  ULOC_KEYWORD_AND_VALUES_CAPACITY   100
687    hence the kw_value buffer size is 100
688*/
689    char*       kw_value        = NULL;
690    int32_t     kw_value_len    = 100;
691
692    intl_error_reset( NULL TSRMLS_CC );
693
694    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
695        &loc_name, &loc_name_len ) == FAILURE)
696    {
697        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
698             "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
699
700        RETURN_FALSE;
701    }
702
703    if(loc_name_len == 0) {
704        loc_name = intl_locale_get_default(TSRMLS_C);
705    }
706
707    /* Get the keywords */
708    e = uloc_openKeywords( loc_name, &status );
709    if( e != NULL )
710    {
711        /* Traverse it, filling the return array. */
712        array_init( return_value );
713
714        while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
715            kw_value = ecalloc( 1 , kw_value_len  );
716
717            /* Get the keyword value for each keyword */
718            kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len ,  &status );
719            if (status == U_BUFFER_OVERFLOW_ERROR) {
720                status = U_ZERO_ERROR;
721                kw_value = erealloc( kw_value , kw_value_len+1);
722                kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 ,  &status );
723            } else if(!U_FAILURE(status)) {
724                kw_value = erealloc( kw_value , kw_value_len+1);
725            }
726            if (U_FAILURE(status)) {
727                    intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 TSRMLS_CC );
728                if( kw_value){
729                    efree( kw_value );
730                }
731                zval_dtor(return_value);
732                RETURN_FALSE;
733            }
734
735            add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len, 0);
736        } /* end of while */
737
738    } /* end of if e!=NULL */
739
740    uenum_close( e );
741}
742/* }}} */
743
744 /* {{{ proto static string Locale::canonicalize($locale)
745 * @return string the canonicalized locale
746 * }}} */
747 /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
748 * @param string $locale    The locale string to canonicalize
749 */
750PHP_FUNCTION(locale_canonicalize)
751{
752    get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
753}
754/* }}} */
755
756/* {{{ append_key_value
757* Internal function which is called from locale_compose
758* gets the value for the key_name and appends to the loc_name
759* returns 1 if successful , -1 if not found ,
760* 0 if array element is not a string , -2 if buffer-overflow
761*/
762static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
763{
764    zval**  ele_value   = NULL;
765
766    if(zend_hash_find(hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
767        if(Z_TYPE_PP(ele_value)!= IS_STRING ){
768            /* element value is not a string */
769            return FAILURE;
770        }
771        if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
772           strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
773            /* not lang or grandfathered tag */
774            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
775        }
776        smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
777        return SUCCESS;
778    }
779
780    return LOC_NOT_FOUND;
781}
782/* }}} */
783
784/* {{{ append_prefix , appends the prefix needed
785* e.g. private adds 'x'
786*/
787static void add_prefix(smart_str* loc_name, char* key_name)
788{
789    if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
790        smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
791        smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
792    }
793}
794/* }}} */
795
796/* {{{ append_multiple_key_values
797* Internal function which is called from locale_compose
798* gets the multiple values for the key_name and appends to the loc_name
799* used for 'variant','extlang','private'
800* returns 1 if successful , -1 if not found ,
801* 0 if array element is not a string , -2 if buffer-overflow
802*/
803static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
804{
805    zval**  ele_value       = NULL;
806    int     i       = 0;
807    int     isFirstSubtag   = 0;
808    int     max_value   = 0;
809
810    /* Variant/ Extlang/Private etc. */
811    if( zend_hash_find( hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
812        if( Z_TYPE_PP(ele_value) == IS_STRING ){
813            add_prefix( loc_name , key_name);
814
815            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
816            smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
817            return SUCCESS;
818        } else if(Z_TYPE_PP(ele_value) == IS_ARRAY ) {
819            HashPosition pos;
820            HashTable *arr = HASH_OF(*ele_value);
821            zval **data = NULL;
822
823            zend_hash_internal_pointer_reset_ex(arr, &pos);
824            while(zend_hash_get_current_data_ex(arr, (void **)&data, &pos) != FAILURE) {
825                if(Z_TYPE_PP(data) != IS_STRING) {
826                    return FAILURE;
827                }
828                if (isFirstSubtag++ == 0){
829                    add_prefix(loc_name , key_name);
830                }
831                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
832                smart_str_appendl(loc_name, Z_STRVAL_PP(data) , Z_STRLEN_PP(data));
833                zend_hash_move_forward_ex(arr, &pos);
834            }
835            return SUCCESS;
836        } else {
837            return FAILURE;
838        }
839    } else {
840        char cur_key_name[31];
841        /* Decide the max_value: the max. no. of elements allowed */
842        if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
843            max_value  = MAX_NO_VARIANT;
844        }
845        if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
846            max_value  = MAX_NO_EXTLANG;
847        }
848        if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
849            max_value  = MAX_NO_PRIVATE;
850        }
851
852        /* Multiple variant values as variant0, variant1 ,variant2 */
853        isFirstSubtag = 0;
854        for( i=0 ; i< max_value; i++ ){
855            snprintf( cur_key_name , 30, "%s%d", key_name , i);
856            if( zend_hash_find( hash_arr , cur_key_name , strlen(cur_key_name) + 1,(void **)&ele_value ) == SUCCESS ){
857                if( Z_TYPE_PP(ele_value)!= IS_STRING ){
858                    /* variant is not a string */
859                    return FAILURE;
860                }
861                /* Add the contents */
862                if (isFirstSubtag++ == 0){
863                    add_prefix(loc_name , cur_key_name);
864                }
865                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
866                smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
867            }
868        } /* end of for */
869    } /* end of else */
870
871    return SUCCESS;
872}
873/* }}} */
874
875/*{{{
876* If applicable sets error message and aborts locale_compose gracefully
877* returns 0  if locale_compose needs to be aborted
878* otherwise returns 1
879*/
880static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
881{
882    intl_error_reset( NULL TSRMLS_CC );
883    if( result == FAILURE) {
884        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
885             "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
886        smart_str_free(loc_name);
887        return 0;
888    }
889    return 1;
890}
891/* }}} */
892
893#define RETURN_SMART_STR(s) smart_str_0((s)); RETURN_STRINGL((s)->c, (s)->len, 0)
894/* {{{ proto static string Locale::composeLocale($array)
895* Creates a locale by combining the parts of locale-ID passed
896* }}} */
897/* {{{ proto static string compose_locale($array)
898* Creates a locale by combining the parts of locale-ID passed
899* }}} */
900PHP_FUNCTION(locale_compose)
901{
902    smart_str       loc_name_s = {0};
903    smart_str *loc_name = &loc_name_s;
904    zval*           arr = NULL;
905    HashTable*      hash_arr = NULL;
906    int             result = 0;
907
908    intl_error_reset( NULL TSRMLS_CC );
909
910    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
911        &arr) == FAILURE)
912    {
913        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
914             "locale_compose: unable to parse input params", 0 TSRMLS_CC );
915        RETURN_FALSE;
916    }
917
918    hash_arr = HASH_OF( arr );
919
920    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
921        RETURN_FALSE;
922
923    /* Check for grandfathered first */
924    result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
925    if( result == SUCCESS){
926        RETURN_SMART_STR(loc_name);
927    }
928    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
929        RETURN_FALSE;
930    }
931
932    /* Not grandfathered */
933    result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
934    if( result == LOC_NOT_FOUND ){
935        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
936        "locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
937        smart_str_free(loc_name);
938        RETURN_FALSE;
939    }
940    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
941        RETURN_FALSE;
942    }
943
944    /* Extlang */
945    result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
946    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
947        RETURN_FALSE;
948    }
949
950    /* Script */
951    result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
952    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
953        RETURN_FALSE;
954    }
955
956    /* Region */
957    result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
958    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
959        RETURN_FALSE;
960    }
961
962    /* Variant */
963    result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC);
964    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
965        RETURN_FALSE;
966    }
967
968    /* Private */
969    result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
970    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
971        RETURN_FALSE;
972    }
973
974    RETURN_SMART_STR(loc_name);
975}
976/* }}} */
977
978
979/*{{{
980* Parses the locale and returns private subtags  if existing
981* else returns NULL
982* e.g. for locale='en_US-x-prv1-prv2-prv3'
983* returns a pointer to the string 'prv1-prv2-prv3'
984*/
985static char* get_private_subtags(const char* loc_name)
986{
987    char*   result =NULL;
988    int     singletonPos = 0;
989    int     len =0;
990    const char*     mod_loc_name =NULL;
991
992    if( loc_name && (len = strlen(loc_name)>0 ) ){
993        mod_loc_name = loc_name ;
994        len   = strlen(mod_loc_name);
995        while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
996
997            if( singletonPos!=-1){
998                if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
999                    /* private subtag start found */
1000                    if( singletonPos + 2 ==  len){
1001                        /* loc_name ends with '-x-' ; return  NULL */
1002                    }
1003                    else{
1004                        /* result = mod_loc_name + singletonPos +2; */
1005                        result = estrndup(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ) );
1006                    }
1007                    break;
1008                }
1009                else{
1010                    if( singletonPos + 1 >=  len){
1011                        /* String end */
1012                        break;
1013                    } else {
1014                        /* singleton found but not a private subtag , hence check further in the string for the private subtag */
1015                        mod_loc_name = mod_loc_name + singletonPos +1;
1016                        len = strlen(mod_loc_name);
1017                    }
1018                }
1019            }
1020
1021        } /* end of while */
1022    }
1023
1024    return result;
1025}
1026/* }}} */
1027
1028/* {{{ code used by locale_parse
1029*/
1030static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
1031{
1032    char*   key_value   = NULL;
1033    char*   cur_key_name    = NULL;
1034    char*   token           = NULL;
1035    char*   last_ptr    = NULL;
1036
1037    int result      = 0;
1038    int     cur_result      = 0;
1039    int     cnt         = 0;
1040
1041
1042    if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1043        key_value = get_private_subtags( loc_name );
1044        result = 1;
1045    } else {
1046        key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1047    }
1048    if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1049        ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1050        if( result > 0 && key_value){
1051            /* Tokenize on the "_" or "-"  */
1052            token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
1053            if( cur_key_name ){
1054                efree( cur_key_name);
1055            }
1056            cur_key_name = (char*)ecalloc( 25,  25);
1057            sprintf( cur_key_name , "%s%d", key_name , cnt++);
1058            add_assoc_string( hash_arr, cur_key_name , token ,TRUE );
1059            /* tokenize on the "_" or "-" and stop  at singleton if any */
1060            while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1061                sprintf( cur_key_name , "%s%d", key_name , cnt++);
1062                add_assoc_string( hash_arr, cur_key_name , token , TRUE );
1063            }
1064/*
1065            if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1066            }
1067*/
1068        }
1069    } else {
1070        if( result == 1 ){
1071            add_assoc_string( hash_arr, key_name , key_value , TRUE );
1072            cur_result = 1;
1073        }
1074    }
1075
1076    if( cur_key_name ){
1077        efree( cur_key_name);
1078    }
1079    /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1080    if( key_value){
1081        efree(key_value);
1082    }
1083    return cur_result;
1084}
1085/* }}} */
1086
1087/* {{{ proto static array Locale::parseLocale($locale)
1088* parses a locale-id into an array the different parts of it
1089 }}} */
1090/* {{{ proto static array parse_locale($locale)
1091* parses a locale-id into an array the different parts of it
1092*/
1093PHP_FUNCTION(locale_parse)
1094{
1095    const char* loc_name        = NULL;
1096    int         loc_name_len    = 0;
1097    int         grOffset        = 0;
1098
1099    intl_error_reset( NULL TSRMLS_CC );
1100
1101    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1102        &loc_name, &loc_name_len ) == FAILURE)
1103    {
1104        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1105             "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1106
1107        RETURN_FALSE;
1108    }
1109
1110    if(loc_name_len == 0) {
1111        loc_name = intl_locale_get_default(TSRMLS_C);
1112    }
1113
1114    array_init( return_value );
1115
1116    grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1117    if( grOffset >= 0 ){
1118        add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG , estrdup(loc_name) ,FALSE );
1119    }
1120    else{
1121        /* Not grandfathered */
1122        add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
1123        add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
1124        add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
1125        add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
1126        add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
1127    }
1128}
1129/* }}} */
1130
1131/* {{{ proto static array Locale::getAllVariants($locale)
1132* gets an array containing the list of variants, or null
1133 }}} */
1134/* {{{ proto static array locale_get_all_variants($locale)
1135* gets an array containing the list of variants, or null
1136*/
1137PHP_FUNCTION(locale_get_all_variants)
1138{
1139    const char*     loc_name        = NULL;
1140    int         loc_name_len    = 0;
1141
1142    int result      = 0;
1143    char*   token       = NULL;
1144    char*   variant     = NULL;
1145    char*   saved_ptr   = NULL;
1146
1147    intl_error_reset( NULL TSRMLS_CC );
1148
1149    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1150    &loc_name, &loc_name_len ) == FAILURE)
1151    {
1152        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1153         "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1154
1155        RETURN_FALSE;
1156    }
1157
1158    if(loc_name_len == 0) {
1159        loc_name = intl_locale_get_default(TSRMLS_C);
1160    }
1161
1162
1163    array_init( return_value );
1164
1165    /* If the locale is grandfathered, stop, no variants */
1166    if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1167        /* ("Grandfathered Tag. No variants."); */
1168    }
1169    else {
1170    /* Call ICU variant */
1171        variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1172        if( result > 0 && variant){
1173            /* Tokenize on the "_" or "-" */
1174            token = php_strtok_r( variant , DELIMITER , &saved_ptr);
1175            add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1176            /* tokenize on the "_" or "-" and stop  at singleton if any */
1177            while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1178                add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1179            }
1180        }
1181        if( variant ){
1182            efree( variant );
1183        }
1184    }
1185
1186
1187}
1188/* }}} */
1189
1190/*{{{
1191* Converts to lower case and also replaces all hyphens with the underscore
1192*/
1193static int strToMatch(const char* str ,char *retstr)
1194{
1195    char*   anchor  = NULL;
1196    const char*     anchor1 = NULL;
1197    int     result  = 0;
1198
1199    if( (!str) || str[0] == '\0'){
1200        return result;
1201    } else {
1202    anchor = retstr;
1203    anchor1 = str;
1204        while( (*str)!='\0' ){
1205        if( *str == '-' ){
1206            *retstr =  '_';
1207        } else {
1208            *retstr = tolower(*str);
1209        }
1210            str++;
1211            retstr++;
1212    }
1213    *retstr = '\0';
1214    retstr=  anchor;
1215    str=  anchor1;
1216    result = 1;
1217    }
1218
1219    return(result);
1220}
1221/* }}} */
1222
1223/* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1224* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1225*/
1226/* }}} */
1227/* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1228* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1229*/
1230PHP_FUNCTION(locale_filter_matches)
1231{
1232    char*           lang_tag        = NULL;
1233    int             lang_tag_len    = 0;
1234    const char*     loc_range       = NULL;
1235    int             loc_range_len   = 0;
1236
1237    int     result      = 0;
1238    char*       token       = 0;
1239    char*       chrcheck    = NULL;
1240
1241    char*           can_lang_tag    = NULL;
1242    char*           can_loc_range   = NULL;
1243
1244    char*           cur_lang_tag    = NULL;
1245    char*           cur_loc_range   = NULL;
1246
1247    zend_bool   boolCanonical   = 0;
1248    UErrorCode  status      = U_ZERO_ERROR;
1249
1250    intl_error_reset( NULL TSRMLS_CC );
1251
1252    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
1253        &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1254        &boolCanonical) == FAILURE)
1255    {
1256        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1257        "locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
1258
1259        RETURN_FALSE;
1260    }
1261
1262    if(loc_range_len == 0) {
1263        loc_range = intl_locale_get_default(TSRMLS_C);
1264    }
1265
1266    if( strcmp(loc_range,"*")==0){
1267        RETURN_TRUE;
1268    }
1269
1270    if( boolCanonical ){
1271        /* canonicalize loc_range */
1272        can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1273        if( result ==0) {
1274            intl_error_set( NULL, status,
1275                "locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
1276            RETURN_FALSE;
1277        }
1278
1279        /* canonicalize lang_tag */
1280        can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1281        if( result ==0) {
1282            intl_error_set( NULL, status,
1283                "locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
1284            RETURN_FALSE;
1285        }
1286
1287        /* Convert to lower case for case-insensitive comparison */
1288        cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1289
1290        /* Convert to lower case for case-insensitive comparison */
1291        result = strToMatch( can_lang_tag , cur_lang_tag);
1292        if( result == 0) {
1293            efree( cur_lang_tag );
1294            efree( can_lang_tag );
1295            RETURN_FALSE;
1296        }
1297
1298        cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1299        result = strToMatch( can_loc_range , cur_loc_range );
1300        if( result == 0) {
1301            efree( cur_lang_tag );
1302            efree( can_lang_tag );
1303            efree( cur_loc_range );
1304            efree( can_loc_range );
1305            RETURN_FALSE;
1306        }
1307
1308        /* check if prefix */
1309        token   = strstr( cur_lang_tag , cur_loc_range );
1310
1311        if( token && (token==cur_lang_tag) ){
1312            /* check if the char. after match is SEPARATOR */
1313            chrcheck = token + (strlen(cur_loc_range));
1314            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1315                if( cur_lang_tag){
1316                    efree( cur_lang_tag );
1317                }
1318                if( cur_loc_range){
1319                    efree( cur_loc_range );
1320                }
1321                if( can_lang_tag){
1322                    efree( can_lang_tag );
1323                }
1324                if( can_loc_range){
1325                    efree( can_loc_range );
1326                }
1327                RETURN_TRUE;
1328            }
1329        }
1330
1331        /* No prefix as loc_range */
1332        if( cur_lang_tag){
1333            efree( cur_lang_tag );
1334        }
1335        if( cur_loc_range){
1336            efree( cur_loc_range );
1337        }
1338        if( can_lang_tag){
1339            efree( can_lang_tag );
1340        }
1341        if( can_loc_range){
1342            efree( can_loc_range );
1343        }
1344        RETURN_FALSE;
1345
1346    } /* end of if isCanonical */
1347    else{
1348        /* Convert to lower case for case-insensitive comparison */
1349        cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1350
1351        result = strToMatch( lang_tag , cur_lang_tag);
1352        if( result == 0) {
1353            efree( cur_lang_tag );
1354            RETURN_FALSE;
1355        }
1356        cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1357        result = strToMatch( loc_range , cur_loc_range );
1358        if( result == 0) {
1359            efree( cur_lang_tag );
1360            efree( cur_loc_range );
1361            RETURN_FALSE;
1362        }
1363
1364        /* check if prefix */
1365        token   = strstr( cur_lang_tag , cur_loc_range );
1366
1367        if( token && (token==cur_lang_tag) ){
1368            /* check if the char. after match is SEPARATOR */
1369            chrcheck = token + (strlen(cur_loc_range));
1370            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1371                if( cur_lang_tag){
1372                    efree( cur_lang_tag );
1373                }
1374                if( cur_loc_range){
1375                    efree( cur_loc_range );
1376                }
1377                RETURN_TRUE;
1378            }
1379        }
1380
1381        /* No prefix as loc_range */
1382        if( cur_lang_tag){
1383            efree( cur_lang_tag );
1384        }
1385        if( cur_loc_range){
1386            efree( cur_loc_range );
1387        }
1388        RETURN_FALSE;
1389
1390    }
1391}
1392/* }}} */
1393
1394static void array_cleanup( char* arr[] , int arr_size)
1395{
1396    int i=0;
1397    for( i=0; i< arr_size; i++ ){
1398        if( arr[i*2] ){
1399            efree( arr[i*2]);
1400        }
1401    }
1402    efree(arr);
1403}
1404
1405#define LOOKUP_CLEAN_RETURN(value)  array_cleanup(cur_arr, cur_arr_len); return (value)
1406/* {{{
1407* returns the lookup result to lookup_loc_range_src_php
1408* internal function
1409*/
1410static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize  TSRMLS_DC)
1411{
1412    int i = 0;
1413    int cur_arr_len = 0;
1414    int result = 0;
1415
1416    char* lang_tag = NULL;
1417    zval** ele_value = NULL;
1418    char** cur_arr = NULL;
1419
1420    char* cur_loc_range = NULL;
1421    char* can_loc_range = NULL;
1422    int saved_pos = 0;
1423
1424    char* return_value = NULL;
1425
1426    cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1427    /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1428    for(zend_hash_internal_pointer_reset(hash_arr);
1429        zend_hash_has_more_elements(hash_arr) == SUCCESS;
1430        zend_hash_move_forward(hash_arr)) {
1431
1432        if (zend_hash_get_current_data(hash_arr, (void**)&ele_value) == FAILURE) {
1433            /* Should never actually fail since the key is known to exist.*/
1434            continue;
1435        }
1436        if(Z_TYPE_PP(ele_value)!= IS_STRING) {
1437            /* element value is not a string */
1438            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
1439            LOOKUP_CLEAN_RETURN(NULL);
1440        }
1441        cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_PP(ele_value), Z_STRLEN_PP(ele_value));
1442        result = strToMatch(Z_STRVAL_PP(ele_value), cur_arr[cur_arr_len*2]);
1443        if(result == 0) {
1444            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
1445            LOOKUP_CLEAN_RETURN(NULL);
1446        }
1447        cur_arr[cur_arr_len*2+1] = Z_STRVAL_PP(ele_value);
1448        cur_arr_len++ ;
1449    } /* end of for */
1450
1451    /* Canonicalize array elements */
1452    if(canonicalize) {
1453        for(i=0; i<cur_arr_len; i++) {
1454            lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1455            if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1456                if(lang_tag) {
1457                    efree(lang_tag);
1458                }
1459                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1460                LOOKUP_CLEAN_RETURN(NULL);
1461            }
1462            cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1463            result = strToMatch(lang_tag, cur_arr[i*2]);
1464            efree(lang_tag);
1465            if(result == 0) {
1466                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1467                LOOKUP_CLEAN_RETURN(NULL);
1468            }
1469        }
1470
1471    }
1472
1473    if(canonicalize) {
1474        /* Canonicalize the loc_range */
1475        can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1476        if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1477            /* Error */
1478            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
1479            if(can_loc_range) {
1480                efree(can_loc_range);
1481            }
1482            LOOKUP_CLEAN_RETURN(NULL);
1483        } else {
1484            loc_range = can_loc_range;
1485        }
1486    }
1487
1488    cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1489    /* convert to lower and replace hyphens */
1490    result = strToMatch(loc_range, cur_loc_range);
1491    if(can_loc_range) {
1492        efree(can_loc_range);
1493    }
1494    if(result == 0) {
1495        intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1496        LOOKUP_CLEAN_RETURN(NULL);
1497    }
1498
1499    /* Lookup for the lang_tag match */
1500    saved_pos = strlen(cur_loc_range);
1501    while(saved_pos > 0) {
1502        for(i=0; i< cur_arr_len; i++){
1503            if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1504                /* Match found */
1505                return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
1506                efree(cur_loc_range);
1507                LOOKUP_CLEAN_RETURN(return_value);
1508            }
1509        }
1510        saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1511    }
1512
1513    /* Match not found */
1514    efree(cur_loc_range);
1515    LOOKUP_CLEAN_RETURN(NULL);
1516}
1517/* }}} */
1518
1519/* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1520* Searchs the items in $langtag for the best match to the language
1521* range
1522*/
1523/* }}} */
1524/* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1525* Searchs the items in $langtag for the best match to the language
1526* range
1527*/
1528PHP_FUNCTION(locale_lookup)
1529{
1530    char*       fallback_loc        = NULL;
1531    int         fallback_loc_len    = 0;
1532    const char*     loc_range           = NULL;
1533    int         loc_range_len       = 0;
1534
1535    zval*       arr             = NULL;
1536    HashTable*  hash_arr        = NULL;
1537    zend_bool   boolCanonical   = 0;
1538    char*       result          =NULL;
1539
1540    intl_error_reset( NULL TSRMLS_CC );
1541
1542    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
1543        &boolCanonical, &fallback_loc, &fallback_loc_len) == FAILURE) {
1544        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_lookup: unable to parse input params", 0 TSRMLS_CC );
1545        RETURN_FALSE;
1546    }
1547
1548    if(loc_range_len == 0) {
1549        loc_range = intl_locale_get_default(TSRMLS_C);
1550    }
1551
1552    hash_arr = HASH_OF(arr);
1553
1554    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1555        RETURN_EMPTY_STRING();
1556    }
1557
1558    result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
1559    if(result == NULL || result[0] == '\0') {
1560        if( fallback_loc ) {
1561            result = estrndup(fallback_loc, fallback_loc_len);
1562        } else {
1563            RETURN_EMPTY_STRING();
1564        }
1565    }
1566
1567    RETVAL_STRINGL(result, strlen(result), 0);
1568}
1569/* }}} */
1570
1571/* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1572* Tries to find out best available locale based on HTTP �Accept-Language� header
1573*/
1574/* }}} */
1575/* {{{ proto string locale_accept_from_http(string $http_accept)
1576* Tries to find out best available locale based on HTTP �Accept-Language� header
1577*/
1578PHP_FUNCTION(locale_accept_from_http)
1579{
1580    UEnumeration *available;
1581    char *http_accept = NULL;
1582    int http_accept_len;
1583    UErrorCode status = 0;
1584    int len;
1585    char resultLocale[INTL_MAX_LOCALE_LEN+1];
1586    UAcceptResult outResult;
1587
1588    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
1589    {
1590        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1591        "locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
1592        RETURN_FALSE;
1593    }
1594
1595    available = ures_openAvailableLocales(NULL, &status);
1596    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1597    len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1598                        &outResult, http_accept, available, &status);
1599    uenum_close(available);
1600    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1601    if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1602        RETURN_FALSE;
1603    }
1604    RETURN_STRINGL(resultLocale, len, 1);
1605}
1606/* }}} */
1607
1608/*
1609 * Local variables:
1610 * tab-width: 4
1611 * c-basic-offset: 4
1612 * End:
1613 * vim600: noet sw=4 ts=4 fdm=marker
1614 * vim<600: noet sw=4 ts=4
1615 *can_loc_len
1616*/
1617