1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | http://www.php.net/license/3_01.txt                                  |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
14   +----------------------------------------------------------------------+
15*/
16
17/* $Id$ */
18
19#ifdef HAVE_CONFIG_H
20#include "config.h"
21#endif
22
23#include <unicode/ustring.h>
24#include <unicode/udata.h>
25#include <unicode/putil.h>
26#include <unicode/ures.h>
27
28#include "php_intl.h"
29#include "locale.h"
30#include "locale_class.h"
31#include "locale_methods.h"
32#include "intl_convert.h"
33#include "intl_data.h"
34
35#include <zend_API.h>
36#include <zend.h>
37#include <php.h>
38#include "main/php_ini.h"
39#include "ext/standard/php_smart_str.h"
40
41ZEND_EXTERN_MODULE_GLOBALS( intl )
42
43/* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44#define SEPARATOR "_"
45#define SEPARATOR1 "-"
46#define DELIMITER "-_"
47#define EXTLANG_PREFIX "a"
48#define PRIVATE_PREFIX "x"
49#define DISP_NAME "name"
50
51#define MAX_NO_VARIANT  15
52#define MAX_NO_EXTLANG  3
53#define MAX_NO_PRIVATE  15
54#define MAX_NO_LOOKUP_LANG_TAG  100
55
56#define LOC_NOT_FOUND 1
57
58/* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59#define VARIANT_KEYNAME_LEN  11
60#define EXTLANG_KEYNAME_LEN  10
61#define PRIVATE_KEYNAME_LEN  11
62
63/* Based on IANA registry at the time of writing this code
64*
65*/
66static const char * const LOC_GRANDFATHERED[] = {
67    "art-lojban",       "i-klingon",        "i-lux",            "i-navajo",     "no-bok",       "no-nyn",
68    "cel-gaulish",      "en-GB-oed",        "i-ami",
69    "i-bnn",        "i-default",        "i-enochian",
70    "i-mingo",      "i-pwn",        "i-tao",
71    "i-tay",        "i-tsu",        "sgn-BE-fr",
72    "sgn-BE-nl",        "sgn-CH-de",        "zh-cmn",
73    "zh-cmn-Hans",      "zh-cmn-Hant",      "zh-gan" ,
74    "zh-guoyu",         "zh-hakka",         "zh-min",
75    "zh-min-nan",       "zh-wuu",       "zh-xiang",
76    "zh-yue",       NULL
77};
78
79/* Based on IANA registry at the time of writing this code
80*  This array lists the preferred values for the grandfathered tags if applicable
81*  This is in sync with the array LOC_GRANDFATHERED
82*  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
83*/
84static const int        LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85static const char * const   LOC_PREFERRED_GRANDFATHERED[]  = {
86    "jbo",          "tlh",          "lb",
87    "nv",           "nb",           "nn",
88    NULL
89};
90
91/*returns TRUE if a is an ID separator FALSE otherwise*/
92#define isIDSeparator(a) (a == '_' || a == '-')
93#define isKeywordSeparator(a) (a == '@' )
94#define isEndOfTag(a) (a == '\0' )
95
96#define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97
98/*returns TRUE if one of the special prefixes is here (s=string)
99  'x-' or 'i-' */
100#define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101#define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102
103/* Dot terminates it because of POSIX form  where dot precedes the codepage
104 * except for variant */
105#define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
106
107/* {{{ return the offset of 'key' in the array 'list'.
108 * returns -1 if not present */
109static int16_t findOffset(const char* const* list, const char* key)
110{
111    const char* const* anchor = list;
112    while (*list != NULL) {
113        if (strcmp(key, *list) == 0) {
114            return (int16_t)(list - anchor);
115        }
116        list++;
117    }
118
119    return -1;
120
121}
122/*}}}*/
123
124static char* getPreferredTag(const char* gf_tag)
125{
126    char* result = NULL;
127    int grOffset = 0;
128
129    grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130    if(grOffset < 0) {
131        return NULL;
132    }
133    if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
134        /* return preferred tag */
135        result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
136    } else {
137        /* Return correct grandfathered language tag */
138        result = estrdup( LOC_GRANDFATHERED[grOffset] );
139    }
140    return result;
141}
142
143/* {{{
144* returns the position of next token for lookup
145* or -1 if no token
146* strtokr equivalent search for token in reverse direction
147*/
148static int getStrrtokenPos(char* str, int savedPos)
149{
150    int result =-1;
151    int i;
152
153    for(i=savedPos-1; i>=0; i--) {
154        if(isIDSeparator(*(str+i)) ){
155            /* delimiter found; check for singleton */
156            if(i>=2 && isIDSeparator(*(str+i-2)) ){
157                /* a singleton; so send the position of token before the singleton */
158                result = i-2;
159            } else {
160                result = i;
161            }
162            break;
163        }
164    }
165    if(result < 1){
166        /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
167        result =-1;
168    }
169    return result;
170}
171/* }}} */
172
173/* {{{
174* returns the position of a singleton if present
175* returns -1 if no singleton
176* strtok equivalent search for singleton
177*/
178static int getSingletonPos(const char* str)
179{
180    int result =-1;
181    int i=0;
182    int len = 0;
183
184    if( str && ((len=strlen(str))>0) ){
185        for( i=0; i<len ; i++){
186            if( isIDSeparator(*(str+i)) ){
187                if( i==1){
188                    /* string is of the form x-avy or a-prv1 */
189                    result =0;
190                    break;
191                } else {
192                    /* delimiter found; check for singleton */
193                    if( isIDSeparator(*(str+i+2)) ){
194                        /* a singleton; so send the position of separator before singleton */
195                        result = i+1;
196                        break;
197                    }
198                }
199            }
200        }/* end of for */
201
202    }
203    return result;
204}
205/* }}} */
206
207/* {{{ proto static string Locale::getDefault(  )
208   Get default locale */
209/* }}} */
210/* {{{ proto static string locale_get_default( )
211   Get default locale */
212PHP_NAMED_FUNCTION(zif_locale_get_default)
213{
214    RETURN_STRING( intl_locale_get_default( TSRMLS_C ), TRUE );
215}
216
217/* }}} */
218
219/* {{{ proto static string Locale::setDefault( string $locale )
220   Set default locale */
221/* }}} */
222/* {{{ proto static string locale_set_default( string $locale )
223   Set default locale */
224PHP_NAMED_FUNCTION(zif_locale_set_default)
225{
226    char* locale_name = NULL;
227    int   len=0;
228
229    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC,  "s",
230        &locale_name ,&len ) == FAILURE)
231    {
232        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
233                "locale_set_default: unable to parse input params", 0 TSRMLS_CC );
234
235        RETURN_FALSE;
236    }
237
238    if(len == 0) {
239        locale_name =  (char *)uloc_getDefault() ;
240        len = strlen(locale_name);
241    }
242
243    zend_alter_ini_entry(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME), locale_name, len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
244
245    RETURN_TRUE;
246}
247/* }}} */
248
249/* {{{
250* Gets the value from ICU
251* common code shared by get_primary_language,get_script or get_region or get_variant
252* result = 0 if error, 1 if successful , -1 if no value
253*/
254static char* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
255{
256    char*       tag_value   = NULL;
257    int32_t         tag_value_len   = 512;
258
259    int     singletonPos    = 0;
260    char*           mod_loc_name    = NULL;
261    int         grOffset    = 0;
262
263    int32_t         buflen          = 512;
264    UErrorCode      status          = U_ZERO_ERROR;
265
266
267    if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
268        /* Handle  grandfathered languages */
269        grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
270        if( grOffset >= 0 ){
271            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
272                tag_value = estrdup(loc_name);
273                return tag_value;
274            } else {
275                /* Since Grandfathered , no value , do nothing , retutn NULL */
276                return NULL;
277            }
278        }
279
280    if( fromParseLocale==1 ){
281        /* Handle singletons */
282        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
283            if( strlen(loc_name)>1 && (isIDPrefix(loc_name) ==1 ) ){
284                return (char *)loc_name;
285            }
286        }
287
288        singletonPos = getSingletonPos( loc_name );
289        if( singletonPos == 0){
290            /* singleton at start of script, region , variant etc.
291             * or invalid singleton at start of language */
292            return NULL;
293        } else if( singletonPos > 0 ){
294            /* singleton at some position except at start
295             * strip off the singleton and rest of the loc_name */
296            mod_loc_name = estrndup ( loc_name , singletonPos-1);
297        }
298    } /* end of if fromParse */
299
300    } /* end of if != LOC_CANONICAL_TAG */
301
302    if( mod_loc_name == NULL){
303        mod_loc_name = estrdup(loc_name );
304    }
305
306    /* Proceed to ICU */
307    do{
308        tag_value = erealloc( tag_value , buflen  );
309        tag_value_len = buflen;
310
311        if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
312            buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
313        }
314        if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
315            buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
316        }
317        if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
318            buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
319        }
320        if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
321            buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
322        }
323        if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
324            buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
325        }
326
327        if( U_FAILURE( status ) ) {
328            if( status == U_BUFFER_OVERFLOW_ERROR ) {
329                status = U_ZERO_ERROR;
330                continue;
331            }
332
333            /* Error in retriving data */
334            *result = 0;
335            if( tag_value ){
336                efree( tag_value );
337            }
338            if( mod_loc_name ){
339                efree( mod_loc_name);
340            }
341            return NULL;
342        }
343    } while( buflen > tag_value_len );
344
345    if(  buflen ==0 ){
346        /* No value found */
347        *result = -1;
348        if( tag_value ){
349            efree( tag_value );
350        }
351        if( mod_loc_name ){
352            efree( mod_loc_name);
353        }
354        return NULL;
355    } else {
356        *result = 1;
357    }
358
359    if( mod_loc_name ){
360        efree( mod_loc_name);
361    }
362    return tag_value;
363}
364/* }}} */
365
366/* {{{
367* Gets the value from ICU , called when PHP userspace function is called
368* common code shared by get_primary_language,get_script or get_region or get_variant
369*/
370static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
371{
372
373    const char* loc_name            = NULL;
374    int         loc_name_len        = 0;
375
376    char*       tag_value       = NULL;
377    char*       empty_result    = "";
378
379    int         result          = 0;
380    char*       msg             = NULL;
381
382    UErrorCode  status              = U_ZERO_ERROR;
383
384    intl_error_reset( NULL TSRMLS_CC );
385
386    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
387    &loc_name ,&loc_name_len ) == FAILURE) {
388        spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
389        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
390        efree(msg);
391
392        RETURN_FALSE;
393    }
394
395    if(loc_name_len == 0) {
396        loc_name = intl_locale_get_default(TSRMLS_C);
397    }
398
399    /* Call ICU get */
400    tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
401
402    /* No value found */
403    if( result == -1 ) {
404        if( tag_value){
405            efree( tag_value);
406        }
407        RETURN_STRING( empty_result , TRUE);
408    }
409
410    /* value found */
411    if( tag_value){
412        RETURN_STRING( tag_value , FALSE);
413    }
414
415    /* Error encountered while fetching the value */
416    if( result ==0) {
417        spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
418        intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
419        efree(msg);
420        RETURN_NULL();
421    }
422
423}
424/* }}} */
425
426/* {{{ proto static string Locale::getScript($locale)
427 * gets the script for the $locale
428 }}} */
429/* {{{ proto static string locale_get_script($locale)
430 * gets the script for the $locale
431 */
432PHP_FUNCTION( locale_get_script )
433{
434    get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
435}
436/* }}} */
437
438/* {{{ proto static string Locale::getRegion($locale)
439 * gets the region for the $locale
440 }}} */
441/* {{{ proto static string locale_get_region($locale)
442 * gets the region for the $locale
443 */
444PHP_FUNCTION( locale_get_region )
445{
446    get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
447}
448/* }}} */
449
450/* {{{ proto static string Locale::getPrimaryLanguage($locale)
451 * gets the primary language for the $locale
452 }}} */
453/* {{{ proto static string locale_get_primary_language($locale)
454 * gets the primary language for the $locale
455 */
456PHP_FUNCTION(locale_get_primary_language )
457{
458    get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
459}
460/* }}} */
461
462
463/* {{{
464 * common code shared by display_xyz functions to  get the value from ICU
465 }}} */
466static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
467{
468    const char* loc_name            = NULL;
469    int         loc_name_len        = 0;
470
471    const char* disp_loc_name       = NULL;
472    int         disp_loc_name_len   = 0;
473    int         free_loc_name       = 0;
474
475    UChar*      disp_name       = NULL;
476    int32_t     disp_name_len   = 0;
477
478    char*       mod_loc_name        = NULL;
479
480    int32_t     buflen              = 512;
481    UErrorCode  status              = U_ZERO_ERROR;
482
483    char*       utf8value       = NULL;
484    int         utf8value_len       = 0;
485
486    char*       msg                 = NULL;
487    int         grOffset        = 0;
488
489    intl_error_reset( NULL TSRMLS_CC );
490
491    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
492        &loc_name, &loc_name_len ,
493        &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
494    {
495        spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
496        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
497        efree(msg);
498        RETURN_FALSE;
499    }
500
501    if(loc_name_len == 0) {
502        loc_name = intl_locale_get_default(TSRMLS_C);
503    }
504
505    if( strcmp(tag_name, DISP_NAME) != 0 ){
506        /* Handle grandfathered languages */
507        grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
508        if( grOffset >= 0 ){
509            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
510                mod_loc_name = getPreferredTag( loc_name );
511            } else {
512                /* Since Grandfathered, no value, do nothing, retutn NULL */
513                RETURN_FALSE;
514            }
515        }
516    } /* end of if != LOC_CANONICAL_TAG */
517
518    if( mod_loc_name==NULL ){
519        mod_loc_name = estrdup( loc_name );
520    }
521
522    /* Check if disp_loc_name passed , if not use default locale */
523    if( !disp_loc_name){
524        disp_loc_name = estrdup(intl_locale_get_default(TSRMLS_C));
525        free_loc_name = 1;
526    }
527
528    /* Get the disp_value for the given locale */
529    do{
530        disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
531        disp_name_len = buflen;
532
533        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
534            buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
535        } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
536            buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
537        } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
538            buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
539        } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
540            buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
541        } else if( strcmp(tag_name , DISP_NAME)==0 ){
542            buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
543        }
544
545        /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
546        if( U_FAILURE( status ) )
547        {
548            if( status == U_BUFFER_OVERFLOW_ERROR )
549            {
550                status = U_ZERO_ERROR;
551                continue;
552            }
553
554            spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
555            intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
556            efree(msg);
557            if( disp_name){
558                efree( disp_name );
559            }
560            if( mod_loc_name){
561                efree( mod_loc_name );
562            }
563            if (free_loc_name) {
564                efree((void *)disp_loc_name);
565                disp_loc_name = NULL;
566            }
567            RETURN_FALSE;
568        }
569    } while( buflen > disp_name_len );
570
571    if( mod_loc_name){
572        efree( mod_loc_name );
573    }
574    if (free_loc_name) {
575        efree((void *)disp_loc_name);
576        disp_loc_name = NULL;
577    }
578    /* Convert display locale name from UTF-16 to UTF-8. */
579    intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
580    efree( disp_name );
581    if( U_FAILURE( status ) )
582    {
583        spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
584        intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
585        efree(msg);
586        RETURN_FALSE;
587    }
588
589    RETVAL_STRINGL( utf8value, utf8value_len , FALSE);
590
591}
592/* }}} */
593
594/* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
595* gets the name for the $locale in $in_locale or default_locale
596 }}} */
597/* {{{ proto static string get_display_name($locale[, $in_locale = null])
598* gets the name for the $locale in $in_locale or default_locale
599*/
600PHP_FUNCTION(locale_get_display_name)
601{
602    get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
603}
604/* }}} */
605
606/* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
607* gets the language for the $locale in $in_locale or default_locale
608 }}} */
609/* {{{ proto static string get_display_language($locale[, $in_locale = null])
610* gets the language for the $locale in $in_locale or default_locale
611*/
612PHP_FUNCTION(locale_get_display_language)
613{
614    get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
615}
616/* }}} */
617
618/* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
619* gets the script for the $locale in $in_locale or default_locale
620 }}} */
621/* {{{ proto static string get_display_script($locale, $in_locale = null)
622* gets the script for the $locale in $in_locale or default_locale
623*/
624PHP_FUNCTION(locale_get_display_script)
625{
626    get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
627}
628/* }}} */
629
630/* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
631* gets the region for the $locale in $in_locale or default_locale
632 }}} */
633/* {{{ proto static string get_display_region($locale, $in_locale = null)
634* gets the region for the $locale in $in_locale or default_locale
635*/
636PHP_FUNCTION(locale_get_display_region)
637{
638    get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
639}
640/* }}} */
641
642/* {{{
643* proto static string Locale::getDisplayVariant($locale, $in_locale = null)
644* gets the variant for the $locale in $in_locale or default_locale
645 }}} */
646/* {{{
647* proto static string get_display_variant($locale, $in_locale = null)
648* gets the variant for the $locale in $in_locale or default_locale
649*/
650PHP_FUNCTION(locale_get_display_variant)
651{
652    get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
653}
654/* }}} */
655
656 /* {{{ proto static array getKeywords(string $locale) {
657 * return an associative array containing keyword-value
658 * pairs for this locale. The keys are keys to the array (doh!)
659 * }}}*/
660 /* {{{ proto static array locale_get_keywords(string $locale) {
661 * return an associative array containing keyword-value
662 * pairs for this locale. The keys are keys to the array (doh!)
663 */
664PHP_FUNCTION( locale_get_keywords )
665{
666    UEnumeration*   e        = NULL;
667    UErrorCode      status   = U_ZERO_ERROR;
668
669    const char*     kw_key        = NULL;
670    int32_t         kw_key_len    = 0;
671
672    const char*         loc_name        = NULL;
673    int             loc_name_len    = 0;
674
675/*
676    ICU expects the buffer to be allocated  before calling the function
677    and so the buffer size has been explicitly specified
678    ICU uloc.h #define  ULOC_KEYWORD_AND_VALUES_CAPACITY   100
679    hence the kw_value buffer size is 100
680*/
681    char*       kw_value        = NULL;
682    int32_t     kw_value_len    = 100;
683
684    intl_error_reset( NULL TSRMLS_CC );
685
686    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
687        &loc_name, &loc_name_len ) == FAILURE)
688    {
689        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
690             "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
691
692        RETURN_FALSE;
693    }
694
695    if(loc_name_len == 0) {
696        loc_name = intl_locale_get_default(TSRMLS_C);
697    }
698
699    /* Get the keywords */
700    e = uloc_openKeywords( loc_name, &status );
701    if( e != NULL )
702    {
703        /* Traverse it, filling the return array. */
704        array_init( return_value );
705
706        while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
707            kw_value = ecalloc( 1 , kw_value_len  );
708
709            /* Get the keyword value for each keyword */
710            kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len ,  &status );
711            if (status == U_BUFFER_OVERFLOW_ERROR) {
712                status = U_ZERO_ERROR;
713                kw_value = erealloc( kw_value , kw_value_len+1);
714                kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 ,  &status );
715            } else if(!U_FAILURE(status)) {
716                kw_value = erealloc( kw_value , kw_value_len+1);
717            }
718            if (U_FAILURE(status)) {
719                    intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 TSRMLS_CC );
720                if( kw_value){
721                    efree( kw_value );
722                }
723                zval_dtor(return_value);
724                RETURN_FALSE;
725            }
726
727            add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len, 0);
728        } /* end of while */
729
730    } /* end of if e!=NULL */
731
732    uenum_close( e );
733}
734/* }}} */
735
736 /* {{{ proto static string Locale::canonicalize($locale)
737 * @return string the canonicalized locale
738 * }}} */
739 /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
740 * @param string $locale    The locale string to canonicalize
741 */
742PHP_FUNCTION(locale_canonicalize)
743{
744    get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
745}
746/* }}} */
747
748/* {{{ append_key_value
749* Internal function which is called from locale_compose
750* gets the value for the key_name and appends to the loc_name
751* returns 1 if successful , -1 if not found ,
752* 0 if array element is not a string , -2 if buffer-overflow
753*/
754static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
755{
756    zval**  ele_value   = NULL;
757
758    if(zend_hash_find(hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
759        if(Z_TYPE_PP(ele_value)!= IS_STRING ){
760            /* element value is not a string */
761            return FAILURE;
762        }
763        if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
764           strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
765            /* not lang or grandfathered tag */
766            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
767        }
768        smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
769        return SUCCESS;
770    }
771
772    return LOC_NOT_FOUND;
773}
774/* }}} */
775
776/* {{{ append_prefix , appends the prefix needed
777* e.g. private adds 'x'
778*/
779static void add_prefix(smart_str* loc_name, char* key_name)
780{
781    if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
782        smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
783        smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
784    }
785}
786/* }}} */
787
788/* {{{ append_multiple_key_values
789* Internal function which is called from locale_compose
790* gets the multiple values for the key_name and appends to the loc_name
791* used for 'variant','extlang','private'
792* returns 1 if successful , -1 if not found ,
793* 0 if array element is not a string , -2 if buffer-overflow
794*/
795static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
796{
797    zval**  ele_value       = NULL;
798    int     i       = 0;
799    int     isFirstSubtag   = 0;
800    int     max_value   = 0;
801
802    /* Variant/ Extlang/Private etc. */
803    if( zend_hash_find( hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
804        if( Z_TYPE_PP(ele_value) == IS_STRING ){
805            add_prefix( loc_name , key_name);
806
807            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
808            smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
809            return SUCCESS;
810        } else if(Z_TYPE_PP(ele_value) == IS_ARRAY ) {
811            HashPosition pos;
812            HashTable *arr = HASH_OF(*ele_value);
813            zval **data = NULL;
814
815            zend_hash_internal_pointer_reset_ex(arr, &pos);
816            while(zend_hash_get_current_data_ex(arr, (void **)&data, &pos) != FAILURE) {
817                if(Z_TYPE_PP(data) != IS_STRING) {
818                    return FAILURE;
819                }
820                if (isFirstSubtag++ == 0){
821                    add_prefix(loc_name , key_name);
822                }
823                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
824                smart_str_appendl(loc_name, Z_STRVAL_PP(data) , Z_STRLEN_PP(data));
825                zend_hash_move_forward_ex(arr, &pos);
826            }
827            return SUCCESS;
828        } else {
829            return FAILURE;
830        }
831    } else {
832        char cur_key_name[31];
833        /* Decide the max_value: the max. no. of elements allowed */
834        if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
835            max_value  = MAX_NO_VARIANT;
836        }
837        if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
838            max_value  = MAX_NO_EXTLANG;
839        }
840        if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
841            max_value  = MAX_NO_PRIVATE;
842        }
843
844        /* Multiple variant values as variant0, variant1 ,variant2 */
845        isFirstSubtag = 0;
846        for( i=0 ; i< max_value; i++ ){
847            snprintf( cur_key_name , 30, "%s%d", key_name , i);
848            if( zend_hash_find( hash_arr , cur_key_name , strlen(cur_key_name) + 1,(void **)&ele_value ) == SUCCESS ){
849                if( Z_TYPE_PP(ele_value)!= IS_STRING ){
850                    /* variant is not a string */
851                    return FAILURE;
852                }
853                /* Add the contents */
854                if (isFirstSubtag++ == 0){
855                    add_prefix(loc_name , cur_key_name);
856                }
857                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
858                smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
859            }
860        } /* end of for */
861    } /* end of else */
862
863    return SUCCESS;
864}
865/* }}} */
866
867/*{{{
868* If applicable sets error message and aborts locale_compose gracefully
869* returns 0  if locale_compose needs to be aborted
870* otherwise returns 1
871*/
872static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
873{
874    intl_error_reset( NULL TSRMLS_CC );
875    if( result == FAILURE) {
876        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
877             "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
878        smart_str_free(loc_name);
879        return 0;
880    }
881    return 1;
882}
883/* }}} */
884
885#define RETURN_SMART_STR(s) smart_str_0((s)); RETURN_STRINGL((s)->c, (s)->len, 0)
886/* {{{ proto static string Locale::composeLocale($array)
887* Creates a locale by combining the parts of locale-ID passed
888* }}} */
889/* {{{ proto static string compose_locale($array)
890* Creates a locale by combining the parts of locale-ID passed
891* }}} */
892PHP_FUNCTION(locale_compose)
893{
894    smart_str       loc_name_s = {0};
895    smart_str *loc_name = &loc_name_s;
896    zval*           arr = NULL;
897    HashTable*      hash_arr = NULL;
898    int             result = 0;
899
900    intl_error_reset( NULL TSRMLS_CC );
901
902    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
903        &arr) == FAILURE)
904    {
905        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
906             "locale_compose: unable to parse input params", 0 TSRMLS_CC );
907        RETURN_FALSE;
908    }
909
910    hash_arr = HASH_OF( arr );
911
912    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
913        RETURN_FALSE;
914
915    /* Check for grandfathered first */
916    result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
917    if( result == SUCCESS){
918        RETURN_SMART_STR(loc_name);
919    }
920    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
921        RETURN_FALSE;
922    }
923
924    /* Not grandfathered */
925    result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
926    if( result == LOC_NOT_FOUND ){
927        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
928        "locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
929        smart_str_free(loc_name);
930        RETURN_FALSE;
931    }
932    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
933        RETURN_FALSE;
934    }
935
936    /* Extlang */
937    result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
938    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
939        RETURN_FALSE;
940    }
941
942    /* Script */
943    result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
944    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
945        RETURN_FALSE;
946    }
947
948    /* Region */
949    result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
950    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
951        RETURN_FALSE;
952    }
953
954    /* Variant */
955    result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC);
956    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
957        RETURN_FALSE;
958    }
959
960    /* Private */
961    result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
962    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
963        RETURN_FALSE;
964    }
965
966    RETURN_SMART_STR(loc_name);
967}
968/* }}} */
969
970
971/*{{{
972* Parses the locale and returns private subtags  if existing
973* else returns NULL
974* e.g. for locale='en_US-x-prv1-prv2-prv3'
975* returns a pointer to the string 'prv1-prv2-prv3'
976*/
977static char* get_private_subtags(const char* loc_name)
978{
979    char*   result =NULL;
980    int     singletonPos = 0;
981    int     len =0;
982    const char*     mod_loc_name =NULL;
983
984    if( loc_name && (len = strlen(loc_name)>0 ) ){
985        mod_loc_name = loc_name ;
986        len   = strlen(mod_loc_name);
987        while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
988
989            if( singletonPos!=-1){
990                if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
991                    /* private subtag start found */
992                    if( singletonPos + 2 ==  len){
993                        /* loc_name ends with '-x-' ; return  NULL */
994                    }
995                    else{
996                        /* result = mod_loc_name + singletonPos +2; */
997                        result = estrndup(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ) );
998                    }
999                    break;
1000                }
1001                else{
1002                    if( singletonPos + 1 >=  len){
1003                        /* String end */
1004                        break;
1005                    } else {
1006                        /* singleton found but not a private subtag , hence check further in the string for the private subtag */
1007                        mod_loc_name = mod_loc_name + singletonPos +1;
1008                        len = strlen(mod_loc_name);
1009                    }
1010                }
1011            }
1012
1013        } /* end of while */
1014    }
1015
1016    return result;
1017}
1018/* }}} */
1019
1020/* {{{ code used by locale_parse
1021*/
1022static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
1023{
1024    char*   key_value   = NULL;
1025    char*   cur_key_name    = NULL;
1026    char*   token           = NULL;
1027    char*   last_ptr    = NULL;
1028
1029    int result      = 0;
1030    int     cur_result      = 0;
1031    int     cnt         = 0;
1032
1033
1034    if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1035        key_value = get_private_subtags( loc_name );
1036        result = 1;
1037    } else {
1038        key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1039    }
1040    if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1041        ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1042        if( result > 0 && key_value){
1043            /* Tokenize on the "_" or "-"  */
1044            token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
1045            if( cur_key_name ){
1046                efree( cur_key_name);
1047            }
1048            cur_key_name = (char*)ecalloc( 25,  25);
1049            sprintf( cur_key_name , "%s%d", key_name , cnt++);
1050            add_assoc_string( hash_arr, cur_key_name , token ,TRUE );
1051            /* tokenize on the "_" or "-" and stop  at singleton if any */
1052            while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1053                sprintf( cur_key_name , "%s%d", key_name , cnt++);
1054                add_assoc_string( hash_arr, cur_key_name , token , TRUE );
1055            }
1056/*
1057            if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1058            }
1059*/
1060        }
1061    } else {
1062        if( result == 1 ){
1063            add_assoc_string( hash_arr, key_name , key_value , TRUE );
1064            cur_result = 1;
1065        }
1066    }
1067
1068    if( cur_key_name ){
1069        efree( cur_key_name);
1070    }
1071    /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1072    if( key_value){
1073        efree(key_value);
1074    }
1075    return cur_result;
1076}
1077/* }}} */
1078
1079/* {{{ proto static array Locale::parseLocale($locale)
1080* parses a locale-id into an array the different parts of it
1081 }}} */
1082/* {{{ proto static array parse_locale($locale)
1083* parses a locale-id into an array the different parts of it
1084*/
1085PHP_FUNCTION(locale_parse)
1086{
1087    const char* loc_name        = NULL;
1088    int         loc_name_len    = 0;
1089    int         grOffset        = 0;
1090
1091    intl_error_reset( NULL TSRMLS_CC );
1092
1093    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1094        &loc_name, &loc_name_len ) == FAILURE)
1095    {
1096        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1097             "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1098
1099        RETURN_FALSE;
1100    }
1101
1102    if(loc_name_len == 0) {
1103        loc_name = intl_locale_get_default(TSRMLS_C);
1104    }
1105
1106    array_init( return_value );
1107
1108    grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1109    if( grOffset >= 0 ){
1110        add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG , estrdup(loc_name) ,FALSE );
1111    }
1112    else{
1113        /* Not grandfathered */
1114        add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
1115        add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
1116        add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
1117        add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
1118        add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
1119    }
1120}
1121/* }}} */
1122
1123/* {{{ proto static array Locale::getAllVariants($locale)
1124* gets an array containing the list of variants, or null
1125 }}} */
1126/* {{{ proto static array locale_get_all_variants($locale)
1127* gets an array containing the list of variants, or null
1128*/
1129PHP_FUNCTION(locale_get_all_variants)
1130{
1131    const char*     loc_name        = NULL;
1132    int         loc_name_len    = 0;
1133
1134    int result      = 0;
1135    char*   token       = NULL;
1136    char*   variant     = NULL;
1137    char*   saved_ptr   = NULL;
1138
1139    intl_error_reset( NULL TSRMLS_CC );
1140
1141    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1142    &loc_name, &loc_name_len ) == FAILURE)
1143    {
1144        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1145         "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1146
1147        RETURN_FALSE;
1148    }
1149
1150    if(loc_name_len == 0) {
1151        loc_name = intl_locale_get_default(TSRMLS_C);
1152    }
1153
1154
1155    array_init( return_value );
1156
1157    /* If the locale is grandfathered, stop, no variants */
1158    if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1159        /* ("Grandfathered Tag. No variants."); */
1160    }
1161    else {
1162    /* Call ICU variant */
1163        variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1164        if( result > 0 && variant){
1165            /* Tokenize on the "_" or "-" */
1166            token = php_strtok_r( variant , DELIMITER , &saved_ptr);
1167            add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1168            /* tokenize on the "_" or "-" and stop  at singleton if any */
1169            while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1170                add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1171            }
1172        }
1173        if( variant ){
1174            efree( variant );
1175        }
1176    }
1177
1178
1179}
1180/* }}} */
1181
1182/*{{{
1183* Converts to lower case and also replaces all hyphens with the underscore
1184*/
1185static int strToMatch(const char* str ,char *retstr)
1186{
1187    char*   anchor  = NULL;
1188    const char*     anchor1 = NULL;
1189    int     result  = 0;
1190    int     len     = 0;
1191
1192    if( (!str) || str[0] == '\0'){
1193        return result;
1194    } else {
1195    anchor = retstr;
1196    anchor1 = str;
1197        len = strlen(str);
1198        while( (*str)!='\0' ){
1199        if( *str == '-' ){
1200            *retstr =  '_';
1201        } else {
1202            *retstr = tolower(*str);
1203        }
1204            str++;
1205            retstr++;
1206    }
1207    *retstr = '\0';
1208    retstr=  anchor;
1209    str=  anchor1;
1210    result = 1;
1211    }
1212
1213    return(result);
1214}
1215/* }}} */
1216
1217/* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1218* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1219*/
1220/* }}} */
1221/* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1222* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1223*/
1224PHP_FUNCTION(locale_filter_matches)
1225{
1226    char*           lang_tag        = NULL;
1227    int             lang_tag_len    = 0;
1228    const char*     loc_range       = NULL;
1229    int             loc_range_len   = 0;
1230
1231    int     result      = 0;
1232    char*       token       = 0;
1233    char*       chrcheck    = NULL;
1234
1235    char*           can_lang_tag    = NULL;
1236    char*           can_loc_range   = NULL;
1237
1238    char*           cur_lang_tag    = NULL;
1239    char*           cur_loc_range   = NULL;
1240
1241    zend_bool   boolCanonical   = 0;
1242    UErrorCode  status      = U_ZERO_ERROR;
1243
1244    intl_error_reset( NULL TSRMLS_CC );
1245
1246    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
1247        &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1248        &boolCanonical) == FAILURE)
1249    {
1250        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1251        "locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
1252
1253        RETURN_FALSE;
1254    }
1255
1256    if(loc_range_len == 0) {
1257        loc_range = intl_locale_get_default(TSRMLS_C);
1258    }
1259
1260    if( strcmp(loc_range,"*")==0){
1261        RETURN_TRUE;
1262    }
1263
1264    if( boolCanonical ){
1265        /* canonicalize loc_range */
1266        can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1267        if( result ==0) {
1268            intl_error_set( NULL, status,
1269                "locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
1270            RETURN_FALSE;
1271        }
1272
1273        /* canonicalize lang_tag */
1274        can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1275        if( result ==0) {
1276            intl_error_set( NULL, status,
1277                "locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
1278            RETURN_FALSE;
1279        }
1280
1281        /* Convert to lower case for case-insensitive comparison */
1282        cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1283
1284        /* Convert to lower case for case-insensitive comparison */
1285        result = strToMatch( can_lang_tag , cur_lang_tag);
1286        if( result == 0) {
1287            efree( cur_lang_tag );
1288            efree( can_lang_tag );
1289            RETURN_FALSE;
1290        }
1291
1292        cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1293        result = strToMatch( can_loc_range , cur_loc_range );
1294        if( result == 0) {
1295            efree( cur_lang_tag );
1296            efree( can_lang_tag );
1297            efree( cur_loc_range );
1298            efree( can_loc_range );
1299            RETURN_FALSE;
1300        }
1301
1302        /* check if prefix */
1303        token   = strstr( cur_lang_tag , cur_loc_range );
1304
1305        if( token && (token==cur_lang_tag) ){
1306            /* check if the char. after match is SEPARATOR */
1307            chrcheck = token + (strlen(cur_loc_range));
1308            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1309                if( cur_lang_tag){
1310                    efree( cur_lang_tag );
1311                }
1312                if( cur_loc_range){
1313                    efree( cur_loc_range );
1314                }
1315                if( can_lang_tag){
1316                    efree( can_lang_tag );
1317                }
1318                if( can_loc_range){
1319                    efree( can_loc_range );
1320                }
1321                RETURN_TRUE;
1322            }
1323        }
1324
1325        /* No prefix as loc_range */
1326        if( cur_lang_tag){
1327            efree( cur_lang_tag );
1328        }
1329        if( cur_loc_range){
1330            efree( cur_loc_range );
1331        }
1332        if( can_lang_tag){
1333            efree( can_lang_tag );
1334        }
1335        if( can_loc_range){
1336            efree( can_loc_range );
1337        }
1338        RETURN_FALSE;
1339
1340    } /* end of if isCanonical */
1341    else{
1342        /* Convert to lower case for case-insensitive comparison */
1343        cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1344
1345        result = strToMatch( lang_tag , cur_lang_tag);
1346        if( result == 0) {
1347            efree( cur_lang_tag );
1348            RETURN_FALSE;
1349        }
1350        cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1351        result = strToMatch( loc_range , cur_loc_range );
1352        if( result == 0) {
1353            efree( cur_lang_tag );
1354            efree( cur_loc_range );
1355            RETURN_FALSE;
1356        }
1357
1358        /* check if prefix */
1359        token   = strstr( cur_lang_tag , cur_loc_range );
1360
1361        if( token && (token==cur_lang_tag) ){
1362            /* check if the char. after match is SEPARATOR */
1363            chrcheck = token + (strlen(cur_loc_range));
1364            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1365                if( cur_lang_tag){
1366                    efree( cur_lang_tag );
1367                }
1368                if( cur_loc_range){
1369                    efree( cur_loc_range );
1370                }
1371                RETURN_TRUE;
1372            }
1373        }
1374
1375        /* No prefix as loc_range */
1376        if( cur_lang_tag){
1377            efree( cur_lang_tag );
1378        }
1379        if( cur_loc_range){
1380            efree( cur_loc_range );
1381        }
1382        RETURN_FALSE;
1383
1384    }
1385}
1386/* }}} */
1387
1388static void array_cleanup( char* arr[] , int arr_size)
1389{
1390    int i=0;
1391    for( i=0; i< arr_size; i++ ){
1392        if( arr[i*2] ){
1393            efree( arr[i*2]);
1394        }
1395    }
1396    efree(arr);
1397}
1398
1399#define LOOKUP_CLEAN_RETURN(value)  array_cleanup(cur_arr, cur_arr_len); return (value)
1400/* {{{
1401* returns the lookup result to lookup_loc_range_src_php
1402* internal function
1403*/
1404static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize  TSRMLS_DC)
1405{
1406    int i = 0;
1407    int cur_arr_len = 0;
1408    int result = 0;
1409
1410    char* lang_tag = NULL;
1411    zval** ele_value = NULL;
1412    char** cur_arr = NULL;
1413
1414    char* cur_loc_range = NULL;
1415    char* can_loc_range = NULL;
1416    int saved_pos = 0;
1417
1418    char* return_value = NULL;
1419
1420    cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1421    /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1422    for(zend_hash_internal_pointer_reset(hash_arr);
1423        zend_hash_has_more_elements(hash_arr) == SUCCESS;
1424        zend_hash_move_forward(hash_arr)) {
1425
1426        if (zend_hash_get_current_data(hash_arr, (void**)&ele_value) == FAILURE) {
1427            /* Should never actually fail since the key is known to exist.*/
1428            continue;
1429        }
1430        if(Z_TYPE_PP(ele_value)!= IS_STRING) {
1431            /* element value is not a string */
1432            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
1433            LOOKUP_CLEAN_RETURN(NULL);
1434        }
1435        cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_PP(ele_value), Z_STRLEN_PP(ele_value));
1436        result = strToMatch(Z_STRVAL_PP(ele_value), cur_arr[cur_arr_len*2]);
1437        if(result == 0) {
1438            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
1439            LOOKUP_CLEAN_RETURN(NULL);
1440        }
1441        cur_arr[cur_arr_len*2+1] = Z_STRVAL_PP(ele_value);
1442        cur_arr_len++ ;
1443    } /* end of for */
1444
1445    /* Canonicalize array elements */
1446    if(canonicalize) {
1447        for(i=0; i<cur_arr_len; i++) {
1448            lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1449            if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1450                if(lang_tag) {
1451                    efree(lang_tag);
1452                }
1453                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1454                LOOKUP_CLEAN_RETURN(NULL);
1455            }
1456            cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1457            result = strToMatch(lang_tag, cur_arr[i*2]);
1458            efree(lang_tag);
1459            if(result == 0) {
1460                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1461                LOOKUP_CLEAN_RETURN(NULL);
1462            }
1463        }
1464
1465    }
1466
1467    if(canonicalize) {
1468        /* Canonicalize the loc_range */
1469        can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1470        if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1471            /* Error */
1472            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
1473            if(can_loc_range) {
1474                efree(can_loc_range);
1475            }
1476            LOOKUP_CLEAN_RETURN(NULL);
1477        } else {
1478            loc_range = can_loc_range;
1479        }
1480    }
1481
1482    cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1483    /* convert to lower and replace hyphens */
1484    result = strToMatch(loc_range, cur_loc_range);
1485    if(can_loc_range) {
1486        efree(can_loc_range);
1487    }
1488    if(result == 0) {
1489        intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1490        LOOKUP_CLEAN_RETURN(NULL);
1491    }
1492
1493    /* Lookup for the lang_tag match */
1494    saved_pos = strlen(cur_loc_range);
1495    while(saved_pos > 0) {
1496        for(i=0; i< cur_arr_len; i++){
1497            if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1498                /* Match found */
1499                return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
1500                efree(cur_loc_range);
1501                LOOKUP_CLEAN_RETURN(return_value);
1502            }
1503        }
1504        saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1505    }
1506
1507    /* Match not found */
1508    efree(cur_loc_range);
1509    LOOKUP_CLEAN_RETURN(NULL);
1510}
1511/* }}} */
1512
1513/* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1514* Searchs the items in $langtag for the best match to the language
1515* range
1516*/
1517/* }}} */
1518/* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1519* Searchs the items in $langtag for the best match to the language
1520* range
1521*/
1522PHP_FUNCTION(locale_lookup)
1523{
1524    char*       fallback_loc        = NULL;
1525    int         fallback_loc_len    = 0;
1526    const char*     loc_range           = NULL;
1527    int         loc_range_len       = 0;
1528
1529    zval*       arr             = NULL;
1530    HashTable*  hash_arr        = NULL;
1531    zend_bool   boolCanonical   = 0;
1532    char*       result          =NULL;
1533
1534    intl_error_reset( NULL TSRMLS_CC );
1535
1536    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
1537        &boolCanonical, &fallback_loc, &fallback_loc_len) == FAILURE) {
1538        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_lookup: unable to parse input params", 0 TSRMLS_CC );
1539        RETURN_FALSE;
1540    }
1541
1542    if(loc_range_len == 0) {
1543        loc_range = intl_locale_get_default(TSRMLS_C);
1544    }
1545
1546    hash_arr = HASH_OF(arr);
1547
1548    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1549        RETURN_EMPTY_STRING();
1550    }
1551
1552    result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
1553    if(result == NULL || result[0] == '\0') {
1554        if( fallback_loc ) {
1555            result = estrndup(fallback_loc, fallback_loc_len);
1556        } else {
1557            RETURN_EMPTY_STRING();
1558        }
1559    }
1560
1561    RETVAL_STRINGL(result, strlen(result), 0);
1562}
1563/* }}} */
1564
1565/* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1566* Tries to find out best available locale based on HTTP �Accept-Language� header
1567*/
1568/* }}} */
1569/* {{{ proto string locale_accept_from_http(string $http_accept)
1570* Tries to find out best available locale based on HTTP �Accept-Language� header
1571*/
1572PHP_FUNCTION(locale_accept_from_http)
1573{
1574    UEnumeration *available;
1575    char *http_accept = NULL;
1576    int http_accept_len;
1577    UErrorCode status = 0;
1578    int len;
1579    char resultLocale[INTL_MAX_LOCALE_LEN+1];
1580    UAcceptResult outResult;
1581
1582    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
1583    {
1584        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1585        "locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
1586        RETURN_FALSE;
1587    }
1588
1589    available = ures_openAvailableLocales(NULL, &status);
1590    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1591    len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1592                        &outResult, http_accept, available, &status);
1593    uenum_close(available);
1594    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1595    if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1596        RETURN_FALSE;
1597    }
1598    RETURN_STRINGL(resultLocale, len, 1);
1599}
1600/* }}} */
1601
1602/*
1603 * Local variables:
1604 * tab-width: 4
1605 * c-basic-offset: 4
1606 * End:
1607 * vim600: noet sw=4 ts=4 fdm=marker
1608 * vim<600: noet sw=4 ts=4
1609 *can_loc_len
1610*/
1611