1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | http://www.php.net/license/3_01.txt                                  |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
14   +----------------------------------------------------------------------+
15*/
16
17/* $Id$ */
18
19#ifdef HAVE_CONFIG_H
20#include "config.h"
21#endif
22
23#include <unicode/ustring.h>
24#include <unicode/udata.h>
25#include <unicode/putil.h>
26#include <unicode/ures.h>
27
28#include "php_intl.h"
29#include "locale.h"
30#include "locale_class.h"
31#include "locale_methods.h"
32#include "intl_convert.h"
33#include "intl_data.h"
34
35#include <zend_API.h>
36#include <zend.h>
37#include <php.h>
38#include "main/php_ini.h"
39#include "ext/standard/php_smart_str.h"
40
41ZEND_EXTERN_MODULE_GLOBALS( intl )
42
43/* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
44#define SEPARATOR "_"
45#define SEPARATOR1 "-"
46#define DELIMITER "-_"
47#define EXTLANG_PREFIX "a"
48#define PRIVATE_PREFIX "x"
49#define DISP_NAME "name"
50
51#define MAX_NO_VARIANT  15
52#define MAX_NO_EXTLANG  3
53#define MAX_NO_PRIVATE  15
54#define MAX_NO_LOOKUP_LANG_TAG  100
55
56#define LOC_NOT_FOUND 1
57
58/* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
59#define VARIANT_KEYNAME_LEN  11
60#define EXTLANG_KEYNAME_LEN  10
61#define PRIVATE_KEYNAME_LEN  11
62
63/* Based on IANA registry at the time of writing this code
64*
65*/
66static const char * const LOC_GRANDFATHERED[] = {
67    "art-lojban",       "i-klingon",        "i-lux",            "i-navajo",     "no-bok",       "no-nyn",
68    "cel-gaulish",      "en-GB-oed",        "i-ami",
69    "i-bnn",        "i-default",        "i-enochian",
70    "i-mingo",      "i-pwn",        "i-tao",
71    "i-tay",        "i-tsu",        "sgn-BE-fr",
72    "sgn-BE-nl",        "sgn-CH-de",        "zh-cmn",
73    "zh-cmn-Hans",      "zh-cmn-Hant",      "zh-gan" ,
74    "zh-guoyu",         "zh-hakka",         "zh-min",
75    "zh-min-nan",       "zh-wuu",       "zh-xiang",
76    "zh-yue",       NULL
77};
78
79/* Based on IANA registry at the time of writing this code
80*  This array lists the preferred values for the grandfathered tags if applicable
81*  This is in sync with the array LOC_GRANDFATHERED
82*  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
83*/
84static const int        LOC_PREFERRED_GRANDFATHERED_LEN = 6;
85static const char * const   LOC_PREFERRED_GRANDFATHERED[]  = {
86    "jbo",          "tlh",          "lb",
87    "nv",           "nb",           "nn",
88    NULL
89};
90
91/*returns TRUE if a is an ID separator FALSE otherwise*/
92#define isIDSeparator(a) (a == '_' || a == '-')
93#define isKeywordSeparator(a) (a == '@' )
94#define isEndOfTag(a) (a == '\0' )
95
96#define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
97
98/*returns TRUE if one of the special prefixes is here (s=string)
99  'x-' or 'i-' */
100#define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
101#define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
102
103/* Dot terminates it because of POSIX form  where dot precedes the codepage
104 * except for variant */
105#define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
106
107/* {{{ return the offset of 'key' in the array 'list'.
108 * returns -1 if not present */
109static int16_t findOffset(const char* const* list, const char* key)
110{
111    const char* const* anchor = list;
112    while (*list != NULL) {
113        if (strcmp(key, *list) == 0) {
114            return (int16_t)(list - anchor);
115        }
116        list++;
117    }
118
119    return -1;
120
121}
122/*}}}*/
123
124static char* getPreferredTag(const char* gf_tag)
125{
126    char* result = NULL;
127    int grOffset = 0;
128
129    grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
130    if(grOffset < 0) {
131        return NULL;
132    }
133    if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
134        /* return preferred tag */
135        result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
136    } else {
137        /* Return correct grandfathered language tag */
138        result = estrdup( LOC_GRANDFATHERED[grOffset] );
139    }
140    return result;
141}
142
143/* {{{
144* returns the position of next token for lookup
145* or -1 if no token
146* strtokr equivalent search for token in reverse direction
147*/
148static int getStrrtokenPos(char* str, int savedPos)
149{
150    int result =-1;
151    int i;
152
153    for(i=savedPos-1; i>=0; i--) {
154        if(isIDSeparator(*(str+i)) ){
155            /* delimiter found; check for singleton */
156            if(i>=2 && isIDSeparator(*(str+i-2)) ){
157                /* a singleton; so send the position of token before the singleton */
158                result = i-2;
159            } else {
160                result = i;
161            }
162            break;
163        }
164    }
165    if(result < 1){
166        /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
167        result =-1;
168    }
169    return result;
170}
171/* }}} */
172
173/* {{{
174* returns the position of a singleton if present
175* returns -1 if no singleton
176* strtok equivalent search for singleton
177*/
178static int getSingletonPos(const char* str)
179{
180    int result =-1;
181    int i=0;
182    int len = 0;
183
184    if( str && ((len=strlen(str))>0) ){
185        for( i=0; i<len ; i++){
186            if( isIDSeparator(*(str+i)) ){
187                if( i==1){
188                    /* string is of the form x-avy or a-prv1 */
189                    result =0;
190                    break;
191                } else {
192                    /* delimiter found; check for singleton */
193                    if( isIDSeparator(*(str+i+2)) ){
194                        /* a singleton; so send the position of separator before singleton */
195                        result = i+1;
196                        break;
197                    }
198                }
199            }
200        }/* end of for */
201
202    }
203    return result;
204}
205/* }}} */
206
207/* {{{ proto static string Locale::getDefault(  )
208   Get default locale */
209/* }}} */
210/* {{{ proto static string locale_get_default( )
211   Get default locale */
212PHP_NAMED_FUNCTION(zif_locale_get_default)
213{
214    RETURN_STRING( intl_locale_get_default( TSRMLS_C ) );
215}
216
217/* }}} */
218
219/* {{{ proto static string Locale::setDefault( string $locale )
220   Set default locale */
221/* }}} */
222/* {{{ proto static string locale_set_default( string $locale )
223   Set default locale */
224PHP_NAMED_FUNCTION(zif_locale_set_default)
225{
226    char* locale_name = NULL;
227    size_t   len = 0;
228    zend_string *ini_name;
229
230    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC,  "s",
231        &locale_name ,&len ) == FAILURE)
232    {
233        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
234                "locale_set_default: unable to parse input params", 0 TSRMLS_CC );
235
236        RETURN_FALSE;
237    }
238
239    if(len == 0) {
240        locale_name =  (char *)uloc_getDefault() ;
241        len = strlen(locale_name);
242    }
243
244    ini_name = zend_string_init(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME) - 1, 0);
245    zend_alter_ini_entry(ini_name, locale_name, len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);
246    zend_string_release(ini_name);
247
248    RETURN_TRUE;
249}
250/* }}} */
251
252/* {{{
253* Gets the value from ICU
254* common code shared by get_primary_language,get_script or get_region or get_variant
255* result = 0 if error, 1 if successful , -1 if no value
256*/
257static char* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
258{
259    char*       tag_value   = NULL;
260    int32_t         tag_value_len   = 512;
261
262    int     singletonPos    = 0;
263    char*           mod_loc_name    = NULL;
264    int         grOffset    = 0;
265
266    int32_t         buflen          = 512;
267    UErrorCode      status          = U_ZERO_ERROR;
268
269
270    if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
271        /* Handle  grandfathered languages */
272        grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
273        if( grOffset >= 0 ){
274            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
275                return estrdup(loc_name);
276            } else {
277                /* Since Grandfathered , no value , do nothing , retutn NULL */
278                return NULL;
279            }
280        }
281
282    if( fromParseLocale==1 ){
283        /* Handle singletons */
284        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
285            if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
286                return estrdup(loc_name);
287            }
288        }
289
290        singletonPos = getSingletonPos( loc_name );
291        if( singletonPos == 0){
292            /* singleton at start of script, region , variant etc.
293             * or invalid singleton at start of language */
294            return NULL;
295        } else if( singletonPos > 0 ){
296            /* singleton at some position except at start
297             * strip off the singleton and rest of the loc_name */
298            mod_loc_name = estrndup ( loc_name , singletonPos-1);
299        }
300    } /* end of if fromParse */
301
302    } /* end of if != LOC_CANONICAL_TAG */
303
304    if( mod_loc_name == NULL){
305        mod_loc_name = estrdup(loc_name );
306    }
307
308    /* Proceed to ICU */
309    do{
310        tag_value = erealloc( tag_value , buflen  );
311        tag_value_len = buflen;
312
313        if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
314            buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
315        }
316        if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
317            buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
318        }
319        if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
320            buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
321        }
322        if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
323            buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
324        }
325        if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
326            buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
327        }
328
329        if( U_FAILURE( status ) ) {
330            if( status == U_BUFFER_OVERFLOW_ERROR ) {
331                status = U_ZERO_ERROR;
332                continue;
333            }
334
335            /* Error in retriving data */
336            *result = 0;
337            if( tag_value ){
338                efree( tag_value );
339            }
340            if( mod_loc_name ){
341                efree( mod_loc_name);
342            }
343            return NULL;
344        }
345    } while( buflen > tag_value_len );
346
347    if(  buflen ==0 ){
348        /* No value found */
349        *result = -1;
350        if( tag_value ){
351            efree( tag_value );
352        }
353        if( mod_loc_name ){
354            efree( mod_loc_name);
355        }
356        return NULL;
357    } else {
358        *result = 1;
359    }
360
361    if( mod_loc_name ){
362        efree( mod_loc_name);
363    }
364    return tag_value;
365}
366/* }}} */
367
368/* {{{
369* Gets the value from ICU , called when PHP userspace function is called
370* common code shared by get_primary_language,get_script or get_region or get_variant
371*/
372static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
373{
374
375    const char* loc_name            = NULL;
376    size_t         loc_name_len     = 0;
377
378    char*       tag_value       = NULL;
379    char*       empty_result    = "";
380
381    int         result          = 0;
382    char*       msg             = NULL;
383
384    UErrorCode  status              = U_ZERO_ERROR;
385
386    intl_error_reset( NULL TSRMLS_CC );
387
388    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
389    &loc_name ,&loc_name_len ) == FAILURE) {
390        spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
391        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
392        efree(msg);
393
394        RETURN_FALSE;
395    }
396
397    if(loc_name_len == 0) {
398        loc_name = intl_locale_get_default(TSRMLS_C);
399    }
400
401    /* Call ICU get */
402    tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
403
404    /* No value found */
405    if( result == -1 ) {
406        if( tag_value){
407            efree( tag_value);
408        }
409        RETURN_STRING( empty_result);
410    }
411
412    /* value found */
413    if( tag_value){
414        RETVAL_STRING( tag_value );
415        //???
416        efree(tag_value);
417        return;
418    }
419
420    /* Error encountered while fetching the value */
421    if( result ==0) {
422        spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
423        intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
424        efree(msg);
425        RETURN_NULL();
426    }
427
428}
429/* }}} */
430
431/* {{{ proto static string Locale::getScript($locale)
432 * gets the script for the $locale
433 }}} */
434/* {{{ proto static string locale_get_script($locale)
435 * gets the script for the $locale
436 */
437PHP_FUNCTION( locale_get_script )
438{
439    get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
440}
441/* }}} */
442
443/* {{{ proto static string Locale::getRegion($locale)
444 * gets the region for the $locale
445 }}} */
446/* {{{ proto static string locale_get_region($locale)
447 * gets the region for the $locale
448 */
449PHP_FUNCTION( locale_get_region )
450{
451    get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
452}
453/* }}} */
454
455/* {{{ proto static string Locale::getPrimaryLanguage($locale)
456 * gets the primary language for the $locale
457 }}} */
458/* {{{ proto static string locale_get_primary_language($locale)
459 * gets the primary language for the $locale
460 */
461PHP_FUNCTION(locale_get_primary_language )
462{
463    get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
464}
465/* }}} */
466
467
468/* {{{
469 * common code shared by display_xyz functions to  get the value from ICU
470 }}} */
471static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS)
472{
473    const char* loc_name            = NULL;
474    size_t         loc_name_len     = 0;
475
476    const char* disp_loc_name       = NULL;
477    size_t         disp_loc_name_len   = 0;
478    int         free_loc_name       = 0;
479
480    UChar*      disp_name       = NULL;
481    int32_t     disp_name_len   = 0;
482
483    char*       mod_loc_name        = NULL;
484
485    int32_t     buflen              = 512;
486    UErrorCode  status              = U_ZERO_ERROR;
487
488    char*       utf8value       = NULL;
489    int         utf8value_len       = 0;
490
491    char*       msg                 = NULL;
492    int         grOffset        = 0;
493
494    intl_error_reset( NULL TSRMLS_CC );
495
496    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
497        &loc_name, &loc_name_len ,
498        &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
499    {
500        spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
501        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
502        efree(msg);
503        RETURN_FALSE;
504    }
505
506    if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
507        /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
508        spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
509        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
510        efree(msg);
511        RETURN_FALSE;
512    }
513
514    if(loc_name_len == 0) {
515        loc_name = intl_locale_get_default(TSRMLS_C);
516    }
517
518    if( strcmp(tag_name, DISP_NAME) != 0 ){
519        /* Handle grandfathered languages */
520        grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
521        if( grOffset >= 0 ){
522            if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
523                mod_loc_name = getPreferredTag( loc_name );
524            } else {
525                /* Since Grandfathered, no value, do nothing, retutn NULL */
526                RETURN_FALSE;
527            }
528        }
529    } /* end of if != LOC_CANONICAL_TAG */
530
531    if( mod_loc_name==NULL ){
532        mod_loc_name = estrdup( loc_name );
533    }
534
535    /* Check if disp_loc_name passed , if not use default locale */
536    if( !disp_loc_name){
537        disp_loc_name = estrdup(intl_locale_get_default(TSRMLS_C));
538        free_loc_name = 1;
539    }
540
541    /* Get the disp_value for the given locale */
542    do{
543        disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
544        disp_name_len = buflen;
545
546        if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
547            buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
548        } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
549            buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
550        } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
551            buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
552        } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
553            buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
554        } else if( strcmp(tag_name , DISP_NAME)==0 ){
555            buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
556        }
557
558        /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
559        if( U_FAILURE( status ) )
560        {
561            if( status == U_BUFFER_OVERFLOW_ERROR )
562            {
563                status = U_ZERO_ERROR;
564                continue;
565            }
566
567            spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
568            intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
569            efree(msg);
570            if( disp_name){
571                efree( disp_name );
572            }
573            if( mod_loc_name){
574                efree( mod_loc_name );
575            }
576            if (free_loc_name) {
577                efree((void *)disp_loc_name);
578                disp_loc_name = NULL;
579            }
580            RETURN_FALSE;
581        }
582    } while( buflen > disp_name_len );
583
584    if( mod_loc_name){
585        efree( mod_loc_name );
586    }
587    if (free_loc_name) {
588        efree((void *)disp_loc_name);
589        disp_loc_name = NULL;
590    }
591    /* Convert display locale name from UTF-16 to UTF-8. */
592    intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
593    efree( disp_name );
594    if( U_FAILURE( status ) )
595    {
596        spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
597        intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
598        efree(msg);
599        RETURN_FALSE;
600    }
601
602    RETVAL_STRINGL( utf8value, utf8value_len );
603    //????
604    efree(utf8value);
605
606}
607/* }}} */
608
609/* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
610* gets the name for the $locale in $in_locale or default_locale
611 }}} */
612/* {{{ proto static string get_display_name($locale[, $in_locale = null])
613* gets the name for the $locale in $in_locale or default_locale
614*/
615PHP_FUNCTION(locale_get_display_name)
616{
617    get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
618}
619/* }}} */
620
621/* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
622* gets the language for the $locale in $in_locale or default_locale
623 }}} */
624/* {{{ proto static string get_display_language($locale[, $in_locale = null])
625* gets the language for the $locale in $in_locale or default_locale
626*/
627PHP_FUNCTION(locale_get_display_language)
628{
629    get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
630}
631/* }}} */
632
633/* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
634* gets the script for the $locale in $in_locale or default_locale
635 }}} */
636/* {{{ proto static string get_display_script($locale, $in_locale = null)
637* gets the script for the $locale in $in_locale or default_locale
638*/
639PHP_FUNCTION(locale_get_display_script)
640{
641    get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
642}
643/* }}} */
644
645/* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
646* gets the region for the $locale in $in_locale or default_locale
647 }}} */
648/* {{{ proto static string get_display_region($locale, $in_locale = null)
649* gets the region for the $locale in $in_locale or default_locale
650*/
651PHP_FUNCTION(locale_get_display_region)
652{
653    get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
654}
655/* }}} */
656
657/* {{{
658* proto static string Locale::getDisplayVariant($locale, $in_locale = null)
659* gets the variant for the $locale in $in_locale or default_locale
660 }}} */
661/* {{{
662* proto static string get_display_variant($locale, $in_locale = null)
663* gets the variant for the $locale in $in_locale or default_locale
664*/
665PHP_FUNCTION(locale_get_display_variant)
666{
667    get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
668}
669/* }}} */
670
671 /* {{{ proto static array getKeywords(string $locale) {
672 * return an associative array containing keyword-value
673 * pairs for this locale. The keys are keys to the array (doh!)
674 * }}}*/
675 /* {{{ proto static array locale_get_keywords(string $locale) {
676 * return an associative array containing keyword-value
677 * pairs for this locale. The keys are keys to the array (doh!)
678 */
679PHP_FUNCTION( locale_get_keywords )
680{
681    UEnumeration*   e        = NULL;
682    UErrorCode      status   = U_ZERO_ERROR;
683
684    const char*     kw_key        = NULL;
685    int32_t         kw_key_len    = 0;
686
687    const char*         loc_name        = NULL;
688    size_t              loc_name_len    = 0;
689
690/*
691    ICU expects the buffer to be allocated  before calling the function
692    and so the buffer size has been explicitly specified
693    ICU uloc.h #define  ULOC_KEYWORD_AND_VALUES_CAPACITY   100
694    hence the kw_value buffer size is 100
695*/
696    char*       kw_value        = NULL;
697    int32_t     kw_value_len    = 100;
698
699    intl_error_reset( NULL TSRMLS_CC );
700
701    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
702        &loc_name, &loc_name_len ) == FAILURE)
703    {
704        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
705             "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
706
707        RETURN_FALSE;
708    }
709
710    if(loc_name_len == 0) {
711        loc_name = intl_locale_get_default(TSRMLS_C);
712    }
713
714    /* Get the keywords */
715    e = uloc_openKeywords( loc_name, &status );
716    if( e != NULL )
717    {
718        /* Traverse it, filling the return array. */
719        array_init( return_value );
720
721        while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
722            kw_value = ecalloc( 1 , kw_value_len  );
723
724            /* Get the keyword value for each keyword */
725            kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len ,  &status );
726            if (status == U_BUFFER_OVERFLOW_ERROR) {
727                status = U_ZERO_ERROR;
728                kw_value = erealloc( kw_value , kw_value_len+1);
729                kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 ,  &status );
730            } else if(!U_FAILURE(status)) {
731                kw_value = erealloc( kw_value , kw_value_len+1);
732            }
733            if (U_FAILURE(status)) {
734                    intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 TSRMLS_CC );
735                if( kw_value){
736                    efree( kw_value );
737                }
738                zval_dtor(return_value);
739                RETURN_FALSE;
740            }
741
742            // TODO: avoid reallocation ???
743            add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len);
744            efree(kw_value);
745        } /* end of while */
746
747    } /* end of if e!=NULL */
748
749    uenum_close( e );
750}
751/* }}} */
752
753 /* {{{ proto static string Locale::canonicalize($locale)
754 * @return string the canonicalized locale
755 * }}} */
756 /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale)
757 * @param string $locale    The locale string to canonicalize
758 */
759PHP_FUNCTION(locale_canonicalize)
760{
761    get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
762}
763/* }}} */
764
765/* {{{ append_key_value
766* Internal function which is called from locale_compose
767* gets the value for the key_name and appends to the loc_name
768* returns 1 if successful , -1 if not found ,
769* 0 if array element is not a string , -2 if buffer-overflow
770*/
771static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
772{
773    zval *ele_value;
774
775    if ((ele_value = zend_hash_str_find(hash_arr , key_name, strlen(key_name))) != NULL ) {
776        if(Z_TYPE_P(ele_value)!= IS_STRING ){
777            /* element value is not a string */
778            return FAILURE;
779        }
780        if(strcmp(key_name, LOC_LANG_TAG) != 0 &&
781           strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
782            /* not lang or grandfathered tag */
783            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
784        }
785        smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
786        return SUCCESS;
787    }
788
789    return LOC_NOT_FOUND;
790}
791/* }}} */
792
793/* {{{ append_prefix , appends the prefix needed
794* e.g. private adds 'x'
795*/
796static void add_prefix(smart_str* loc_name, char* key_name)
797{
798    if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
799        smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
800        smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
801    }
802}
803/* }}} */
804
805/* {{{ append_multiple_key_values
806* Internal function which is called from locale_compose
807* gets the multiple values for the key_name and appends to the loc_name
808* used for 'variant','extlang','private'
809* returns 1 if successful , -1 if not found ,
810* 0 if array element is not a string , -2 if buffer-overflow
811*/
812static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
813{
814    zval    *ele_value;
815    int     i       = 0;
816    int     isFirstSubtag   = 0;
817    int     max_value   = 0;
818
819    /* Variant/ Extlang/Private etc. */
820    if ((ele_value = zend_hash_str_find( hash_arr , key_name , strlen(key_name))) != NULL) {
821        if( Z_TYPE_P(ele_value) == IS_STRING ){
822            add_prefix( loc_name , key_name);
823
824            smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
825            smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
826            return SUCCESS;
827        } else if(Z_TYPE_P(ele_value) == IS_ARRAY ) {
828            HashTable *arr = HASH_OF(ele_value);
829            zval *data;
830
831            ZEND_HASH_FOREACH_VAL(arr, data) {
832                if(Z_TYPE_P(data) != IS_STRING) {
833                    return FAILURE;
834                }
835                if (isFirstSubtag++ == 0){
836                    add_prefix(loc_name , key_name);
837                }
838                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
839                smart_str_appendl(loc_name, Z_STRVAL_P(data) , Z_STRLEN_P(data));
840            } ZEND_HASH_FOREACH_END();
841            return SUCCESS;
842        } else {
843            return FAILURE;
844        }
845    } else {
846        char cur_key_name[31];
847        /* Decide the max_value: the max. no. of elements allowed */
848        if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
849            max_value  = MAX_NO_VARIANT;
850        }
851        if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
852            max_value  = MAX_NO_EXTLANG;
853        }
854        if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
855            max_value  = MAX_NO_PRIVATE;
856        }
857
858        /* Multiple variant values as variant0, variant1 ,variant2 */
859        isFirstSubtag = 0;
860        for( i=0 ; i< max_value; i++ ){
861            snprintf( cur_key_name , 30, "%s%d", key_name , i);
862            if ((ele_value = zend_hash_str_find( hash_arr , cur_key_name , strlen(cur_key_name))) != NULL) {
863                if( Z_TYPE_P(ele_value)!= IS_STRING ){
864                    /* variant is not a string */
865                    return FAILURE;
866                }
867                /* Add the contents */
868                if (isFirstSubtag++ == 0){
869                    add_prefix(loc_name , cur_key_name);
870                }
871                smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
872                smart_str_appendl(loc_name, Z_STRVAL_P(ele_value) , Z_STRLEN_P(ele_value));
873            }
874        } /* end of for */
875    } /* end of else */
876
877    return SUCCESS;
878}
879/* }}} */
880
881/*{{{
882* If applicable sets error message and aborts locale_compose gracefully
883* returns 0  if locale_compose needs to be aborted
884* otherwise returns 1
885*/
886static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
887{
888    intl_error_reset( NULL TSRMLS_CC );
889    if( result == FAILURE) {
890        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
891             "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
892        smart_str_free(loc_name);
893        return 0;
894    }
895    return 1;
896}
897/* }}} */
898
899#define RETURN_SMART_STR(str) smart_str_0((str)); RETURN_STR((str)->s)
900/* {{{ proto static string Locale::composeLocale($array)
901* Creates a locale by combining the parts of locale-ID passed
902* }}} */
903/* {{{ proto static string compose_locale($array)
904* Creates a locale by combining the parts of locale-ID passed
905* }}} */
906PHP_FUNCTION(locale_compose)
907{
908    smart_str       loc_name_s = {0};
909    smart_str *loc_name = &loc_name_s;
910    zval*           arr = NULL;
911    HashTable*      hash_arr = NULL;
912    int             result = 0;
913
914    intl_error_reset( NULL TSRMLS_CC );
915
916    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
917        &arr) == FAILURE)
918    {
919        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
920             "locale_compose: unable to parse input params", 0 TSRMLS_CC );
921        RETURN_FALSE;
922    }
923
924    hash_arr = HASH_OF( arr );
925
926    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
927        RETURN_FALSE;
928
929    /* Check for grandfathered first */
930    result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);
931    if( result == SUCCESS){
932        RETURN_SMART_STR(loc_name);
933    }
934    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
935        RETURN_FALSE;
936    }
937
938    /* Not grandfathered */
939    result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);
940    if( result == LOC_NOT_FOUND ){
941        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
942        "locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
943        smart_str_free(loc_name);
944        RETURN_FALSE;
945    }
946    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
947        RETURN_FALSE;
948    }
949
950    /* Extlang */
951    result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
952    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
953        RETURN_FALSE;
954    }
955
956    /* Script */
957    result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG);
958    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
959        RETURN_FALSE;
960    }
961
962    /* Region */
963    result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
964    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
965        RETURN_FALSE;
966    }
967
968    /* Variant */
969    result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC);
970    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
971        RETURN_FALSE;
972    }
973
974    /* Private */
975    result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
976    if( !handleAppendResult( result, loc_name TSRMLS_CC)){
977        RETURN_FALSE;
978    }
979
980    RETURN_SMART_STR(loc_name);
981}
982/* }}} */
983
984
985/*{{{
986* Parses the locale and returns private subtags  if existing
987* else returns NULL
988* e.g. for locale='en_US-x-prv1-prv2-prv3'
989* returns a pointer to the string 'prv1-prv2-prv3'
990*/
991static char* get_private_subtags(const char* loc_name)
992{
993    char*   result =NULL;
994    int     singletonPos = 0;
995    int     len =0;
996    const char*     mod_loc_name =NULL;
997
998    if( loc_name && (len = strlen(loc_name)>0 ) ){
999        mod_loc_name = loc_name ;
1000        len   = strlen(mod_loc_name);
1001        while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
1002
1003            if( singletonPos!=-1){
1004                if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){
1005                    /* private subtag start found */
1006                    if( singletonPos + 2 ==  len){
1007                        /* loc_name ends with '-x-' ; return  NULL */
1008                    }
1009                    else{
1010                        /* result = mod_loc_name + singletonPos +2; */
1011                        result = estrndup(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ) );
1012                    }
1013                    break;
1014                }
1015                else{
1016                    if( singletonPos + 1 >=  len){
1017                        /* String end */
1018                        break;
1019                    } else {
1020                        /* singleton found but not a private subtag , hence check further in the string for the private subtag */
1021                        mod_loc_name = mod_loc_name + singletonPos +1;
1022                        len = strlen(mod_loc_name);
1023                    }
1024                }
1025            }
1026
1027        } /* end of while */
1028    }
1029
1030    return result;
1031}
1032/* }}} */
1033
1034/* {{{ code used by locale_parse
1035*/
1036static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
1037{
1038    char*   key_value   = NULL;
1039    char*   cur_key_name    = NULL;
1040    char*   token           = NULL;
1041    char*   last_ptr    = NULL;
1042
1043    int result      = 0;
1044    int     cur_result      = 0;
1045    int     cnt         = 0;
1046
1047
1048    if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1049        key_value = get_private_subtags( loc_name );
1050        result = 1;
1051    } else {
1052        key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1053    }
1054    if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) ||
1055        ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1056        if( result > 0 && key_value){
1057            /* Tokenize on the "_" or "-"  */
1058            token = php_strtok_r( key_value , DELIMITER ,&last_ptr);
1059            if( cur_key_name ){
1060                efree( cur_key_name);
1061            }
1062            cur_key_name = (char*)ecalloc( 25,  25);
1063            sprintf( cur_key_name , "%s%d", key_name , cnt++);
1064            add_assoc_string( hash_arr, cur_key_name , token);
1065            /* tokenize on the "_" or "-" and stop  at singleton if any */
1066            while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1067                sprintf( cur_key_name , "%s%d", key_name , cnt++);
1068                add_assoc_string( hash_arr, cur_key_name , token);
1069            }
1070/*
1071            if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1072            }
1073*/
1074        }
1075    } else {
1076        if( result == 1 ){
1077            add_assoc_string( hash_arr, key_name , key_value);
1078            cur_result = 1;
1079        }
1080    }
1081
1082    if( cur_key_name ){
1083        efree( cur_key_name);
1084    }
1085    /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1086    if( key_value){
1087        efree(key_value);
1088    }
1089    return cur_result;
1090}
1091/* }}} */
1092
1093/* {{{ proto static array Locale::parseLocale($locale)
1094* parses a locale-id into an array the different parts of it
1095 }}} */
1096/* {{{ proto static array parse_locale($locale)
1097* parses a locale-id into an array the different parts of it
1098*/
1099PHP_FUNCTION(locale_parse)
1100{
1101    const char* loc_name        = NULL;
1102    size_t         loc_name_len    = 0;
1103    int         grOffset        = 0;
1104
1105    intl_error_reset( NULL TSRMLS_CC );
1106
1107    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1108        &loc_name, &loc_name_len ) == FAILURE)
1109    {
1110        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1111             "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1112
1113        RETURN_FALSE;
1114    }
1115
1116    if(loc_name_len == 0) {
1117        loc_name = intl_locale_get_default(TSRMLS_C);
1118    }
1119
1120    array_init( return_value );
1121
1122    grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1123    if( grOffset >= 0 ){
1124        add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG, (char *)loc_name);
1125    }
1126    else{
1127        /* Not grandfathered */
1128        add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
1129        add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
1130        add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
1131        add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
1132        add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
1133    }
1134}
1135/* }}} */
1136
1137/* {{{ proto static array Locale::getAllVariants($locale)
1138* gets an array containing the list of variants, or null
1139 }}} */
1140/* {{{ proto static array locale_get_all_variants($locale)
1141* gets an array containing the list of variants, or null
1142*/
1143PHP_FUNCTION(locale_get_all_variants)
1144{
1145    const char*     loc_name        = NULL;
1146    size_t          loc_name_len    = 0;
1147
1148    int result      = 0;
1149    char*   token       = NULL;
1150    char*   variant     = NULL;
1151    char*   saved_ptr   = NULL;
1152
1153    intl_error_reset( NULL TSRMLS_CC );
1154
1155    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1156    &loc_name, &loc_name_len ) == FAILURE)
1157    {
1158        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1159         "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1160
1161        RETURN_FALSE;
1162    }
1163
1164    if(loc_name_len == 0) {
1165        loc_name = intl_locale_get_default(TSRMLS_C);
1166    }
1167
1168
1169    array_init( return_value );
1170
1171    /* If the locale is grandfathered, stop, no variants */
1172    if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){
1173        /* ("Grandfathered Tag. No variants."); */
1174    }
1175    else {
1176    /* Call ICU variant */
1177        variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1178        if( result > 0 && variant){
1179            /* Tokenize on the "_" or "-" */
1180            token = php_strtok_r( variant , DELIMITER , &saved_ptr);
1181            add_next_index_stringl( return_value, token , strlen(token));
1182            /* tokenize on the "_" or "-" and stop  at singleton if any */
1183            while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1184                add_next_index_stringl( return_value, token , strlen(token));
1185            }
1186        }
1187        if( variant ){
1188            efree( variant );
1189        }
1190    }
1191
1192
1193}
1194/* }}} */
1195
1196/*{{{
1197* Converts to lower case and also replaces all hyphens with the underscore
1198*/
1199static int strToMatch(const char* str ,char *retstr)
1200{
1201    char*   anchor  = NULL;
1202    const char*     anchor1 = NULL;
1203    int     result  = 0;
1204    int     len     = 0;
1205
1206    if( (!str) || str[0] == '\0'){
1207        return result;
1208    } else {
1209    anchor = retstr;
1210    anchor1 = str;
1211        len = strlen(str);
1212        while( (*str)!='\0' ){
1213        if( *str == '-' ){
1214            *retstr =  '_';
1215        } else {
1216            *retstr = tolower(*str);
1217        }
1218            str++;
1219            retstr++;
1220    }
1221    *retstr = '\0';
1222    retstr=  anchor;
1223    str=  anchor1;
1224    result = 1;
1225    }
1226
1227    return(result);
1228}
1229/* }}} */
1230
1231/* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1232* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1233*/
1234/* }}} */
1235/* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1236* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm
1237*/
1238PHP_FUNCTION(locale_filter_matches)
1239{
1240    char*           lang_tag        = NULL;
1241    size_t          lang_tag_len    = 0;
1242    const char*     loc_range       = NULL;
1243    size_t          loc_range_len   = 0;
1244
1245    int     result      = 0;
1246    char*       token       = 0;
1247    char*       chrcheck    = NULL;
1248
1249    char*           can_lang_tag    = NULL;
1250    char*           can_loc_range   = NULL;
1251
1252    char*           cur_lang_tag    = NULL;
1253    char*           cur_loc_range   = NULL;
1254
1255    zend_bool   boolCanonical   = 0;
1256    UErrorCode  status      = U_ZERO_ERROR;
1257
1258    intl_error_reset( NULL TSRMLS_CC );
1259
1260    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
1261        &lang_tag, &lang_tag_len , &loc_range , &loc_range_len ,
1262        &boolCanonical) == FAILURE)
1263    {
1264        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1265        "locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
1266
1267        RETURN_FALSE;
1268    }
1269
1270    if(loc_range_len == 0) {
1271        loc_range = intl_locale_get_default(TSRMLS_C);
1272    }
1273
1274    if( strcmp(loc_range,"*")==0){
1275        RETURN_TRUE;
1276    }
1277
1278    if( boolCanonical ){
1279        /* canonicalize loc_range */
1280        can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1281        if( result ==0) {
1282            intl_error_set( NULL, status,
1283                "locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
1284            RETURN_FALSE;
1285        }
1286
1287        /* canonicalize lang_tag */
1288        can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1289        if( result ==0) {
1290            intl_error_set( NULL, status,
1291                "locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
1292            RETURN_FALSE;
1293        }
1294
1295        /* Convert to lower case for case-insensitive comparison */
1296        cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1297
1298        /* Convert to lower case for case-insensitive comparison */
1299        result = strToMatch( can_lang_tag , cur_lang_tag);
1300        if( result == 0) {
1301            efree( cur_lang_tag );
1302            efree( can_lang_tag );
1303            RETURN_FALSE;
1304        }
1305
1306        cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1307        result = strToMatch( can_loc_range , cur_loc_range );
1308        if( result == 0) {
1309            efree( cur_lang_tag );
1310            efree( can_lang_tag );
1311            efree( cur_loc_range );
1312            efree( can_loc_range );
1313            RETURN_FALSE;
1314        }
1315
1316        /* check if prefix */
1317        token   = strstr( cur_lang_tag , cur_loc_range );
1318
1319        if( token && (token==cur_lang_tag) ){
1320            /* check if the char. after match is SEPARATOR */
1321            chrcheck = token + (strlen(cur_loc_range));
1322            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1323                if( cur_lang_tag){
1324                    efree( cur_lang_tag );
1325                }
1326                if( cur_loc_range){
1327                    efree( cur_loc_range );
1328                }
1329                if( can_lang_tag){
1330                    efree( can_lang_tag );
1331                }
1332                if( can_loc_range){
1333                    efree( can_loc_range );
1334                }
1335                RETURN_TRUE;
1336            }
1337        }
1338
1339        /* No prefix as loc_range */
1340        if( cur_lang_tag){
1341            efree( cur_lang_tag );
1342        }
1343        if( cur_loc_range){
1344            efree( cur_loc_range );
1345        }
1346        if( can_lang_tag){
1347            efree( can_lang_tag );
1348        }
1349        if( can_loc_range){
1350            efree( can_loc_range );
1351        }
1352        RETURN_FALSE;
1353
1354    } /* end of if isCanonical */
1355    else{
1356        /* Convert to lower case for case-insensitive comparison */
1357        cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1358
1359        result = strToMatch( lang_tag , cur_lang_tag);
1360        if( result == 0) {
1361            efree( cur_lang_tag );
1362            RETURN_FALSE;
1363        }
1364        cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1365        result = strToMatch( loc_range , cur_loc_range );
1366        if( result == 0) {
1367            efree( cur_lang_tag );
1368            efree( cur_loc_range );
1369            RETURN_FALSE;
1370        }
1371
1372        /* check if prefix */
1373        token   = strstr( cur_lang_tag , cur_loc_range );
1374
1375        if( token && (token==cur_lang_tag) ){
1376            /* check if the char. after match is SEPARATOR */
1377            chrcheck = token + (strlen(cur_loc_range));
1378            if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){
1379                if( cur_lang_tag){
1380                    efree( cur_lang_tag );
1381                }
1382                if( cur_loc_range){
1383                    efree( cur_loc_range );
1384                }
1385                RETURN_TRUE;
1386            }
1387        }
1388
1389        /* No prefix as loc_range */
1390        if( cur_lang_tag){
1391            efree( cur_lang_tag );
1392        }
1393        if( cur_loc_range){
1394            efree( cur_loc_range );
1395        }
1396        RETURN_FALSE;
1397
1398    }
1399}
1400/* }}} */
1401
1402static void array_cleanup( char* arr[] , int arr_size)
1403{
1404    int i=0;
1405    for( i=0; i< arr_size; i++ ){
1406        if( arr[i*2] ){
1407            efree( arr[i*2]);
1408        }
1409    }
1410    efree(arr);
1411}
1412
1413#define LOOKUP_CLEAN_RETURN(value)  array_cleanup(cur_arr, cur_arr_len); return (value)
1414/* {{{
1415* returns the lookup result to lookup_loc_range_src_php
1416* internal function
1417*/
1418static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize  TSRMLS_DC)
1419{
1420    int i = 0;
1421    int cur_arr_len = 0;
1422    int result = 0;
1423
1424    char* lang_tag = NULL;
1425    zval* ele_value = NULL;
1426    char** cur_arr = NULL;
1427
1428    char* cur_loc_range = NULL;
1429    char* can_loc_range = NULL;
1430    int saved_pos = 0;
1431
1432    char* return_value = NULL;
1433
1434    cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1435    ZEND_HASH_FOREACH_VAL(hash_arr, ele_value) {
1436    /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1437        if(Z_TYPE_P(ele_value)!= IS_STRING) {
1438            /* element value is not a string */
1439            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
1440            LOOKUP_CLEAN_RETURN(NULL);
1441        }
1442        cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_P(ele_value), Z_STRLEN_P(ele_value));
1443        result = strToMatch(Z_STRVAL_P(ele_value), cur_arr[cur_arr_len*2]);
1444        if(result == 0) {
1445            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
1446            LOOKUP_CLEAN_RETURN(NULL);
1447        }
1448        cur_arr[cur_arr_len*2+1] = Z_STRVAL_P(ele_value);
1449        cur_arr_len++ ;
1450    } ZEND_HASH_FOREACH_END(); /* end of for */
1451
1452    /* Canonicalize array elements */
1453    if(canonicalize) {
1454        for(i=0; i<cur_arr_len; i++) {
1455            lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1456            if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1457                if(lang_tag) {
1458                    efree(lang_tag);
1459                }
1460                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1461                LOOKUP_CLEAN_RETURN(NULL);
1462            }
1463            cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1464            result = strToMatch(lang_tag, cur_arr[i*2]);
1465            efree(lang_tag);
1466            if(result == 0) {
1467                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1468                LOOKUP_CLEAN_RETURN(NULL);
1469            }
1470        }
1471
1472    }
1473
1474    if(canonicalize) {
1475        /* Canonicalize the loc_range */
1476        can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1477        if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1478            /* Error */
1479            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
1480            if(can_loc_range) {
1481                efree(can_loc_range);
1482            }
1483            LOOKUP_CLEAN_RETURN(NULL);
1484        } else {
1485            loc_range = can_loc_range;
1486        }
1487    }
1488
1489    cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1490    /* convert to lower and replace hyphens */
1491    result = strToMatch(loc_range, cur_loc_range);
1492    if(can_loc_range) {
1493        efree(can_loc_range);
1494    }
1495    if(result == 0) {
1496        intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1497        LOOKUP_CLEAN_RETURN(NULL);
1498    }
1499
1500    /* Lookup for the lang_tag match */
1501    saved_pos = strlen(cur_loc_range);
1502    while(saved_pos > 0) {
1503        for(i=0; i< cur_arr_len; i++){
1504            if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) {
1505                /* Match found */
1506                return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
1507                efree(cur_loc_range);
1508                LOOKUP_CLEAN_RETURN(return_value);
1509            }
1510        }
1511        saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1512    }
1513
1514    /* Match not found */
1515    efree(cur_loc_range);
1516    LOOKUP_CLEAN_RETURN(NULL);
1517}
1518/* }}} */
1519
1520/* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1521* Searchs the items in $langtag for the best match to the language
1522* range
1523*/
1524/* }}} */
1525/* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1526* Searchs the items in $langtag for the best match to the language
1527* range
1528*/
1529PHP_FUNCTION(locale_lookup)
1530{
1531    char*       fallback_loc        = NULL;
1532    size_t          fallback_loc_len    = 0;
1533    const char*     loc_range           = NULL;
1534    size_t          loc_range_len       = 0;
1535
1536    zval*       arr             = NULL;
1537    HashTable*  hash_arr        = NULL;
1538    zend_bool   boolCanonical   = 0;
1539    char*       result          =NULL;
1540
1541    intl_error_reset( NULL TSRMLS_CC );
1542
1543    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
1544        &boolCanonical, &fallback_loc, &fallback_loc_len) == FAILURE) {
1545        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_lookup: unable to parse input params", 0 TSRMLS_CC );
1546        RETURN_FALSE;
1547    }
1548
1549    if(loc_range_len == 0) {
1550        loc_range = intl_locale_get_default(TSRMLS_C);
1551    }
1552
1553    hash_arr = HASH_OF(arr);
1554
1555    if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1556        RETURN_EMPTY_STRING();
1557    }
1558
1559    result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
1560    if(result == NULL || result[0] == '\0') {
1561        if( fallback_loc ) {
1562            result = estrndup(fallback_loc, fallback_loc_len);
1563        } else {
1564            RETURN_EMPTY_STRING();
1565        }
1566    }
1567
1568    RETVAL_STRINGL(result, strlen(result));
1569    //????
1570    efree(result);
1571}
1572/* }}} */
1573
1574/* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1575* Tries to find out best available locale based on HTTP �Accept-Language� header
1576*/
1577/* }}} */
1578/* {{{ proto string locale_accept_from_http(string $http_accept)
1579* Tries to find out best available locale based on HTTP �Accept-Language� header
1580*/
1581PHP_FUNCTION(locale_accept_from_http)
1582{
1583    UEnumeration *available;
1584    char *http_accept = NULL;
1585    size_t http_accept_len;
1586    UErrorCode status = 0;
1587    int len;
1588    char resultLocale[INTL_MAX_LOCALE_LEN+1];
1589    UAcceptResult outResult;
1590
1591    if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
1592    {
1593        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1594        "locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
1595        RETURN_FALSE;
1596    }
1597
1598    available = ures_openAvailableLocales(NULL, &status);
1599    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1600    len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN,
1601                        &outResult, http_accept, available, &status);
1602    uenum_close(available);
1603    INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1604    if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1605        RETURN_FALSE;
1606    }
1607    RETURN_STRINGL(resultLocale, len);
1608}
1609/* }}} */
1610
1611/*
1612 * Local variables:
1613 * tab-width: 4
1614 * c-basic-offset: 4
1615 * End:
1616 * vim600: noet sw=4 ts=4 fdm=marker
1617 * vim<600: noet sw=4 ts=4
1618 *can_loc_len
1619*/
1620