Changeset 647cc79956895cfa8c8baf95d58e0f000a3e1a5b

Show
Ignore:
Timestamp:
03/21/06 17:42:34 (2 years ago)
Author:
Rémi Denis-Courmont <rem@videolan.org>
git-committer:
Rémi Denis-Courmont <rem@videolan.org> 1142959354 +0000
git-parent:

[e65878a6ef6e405098b77db28e02eebfae65b650]

git-author:
Rémi Denis-Courmont <rem@videolan.org> 1142959354 +0000
Message:

- FromWide?(): converts a wchar_t * to UTF-8 char *

(if local charset is UTF-8, this is similar to wcstombs())

- FromUTF16(): converts a host-order UTF-16 byte sequence to UTF-8

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • include/charset.h

    r3fdc34d r647cc79  
    4848VLC_EXPORT( char *, EnsureUTF8, ( char * ) ); 
    4949VLC_EXPORT( char *, FromUTF32, ( const uint32_t * ) ); 
     50VLC_EXPORT( char *, FromUTF16, ( const uint16_t * ) ); 
     51 
     52static inline char *FromWide( const wchar_t *in ) 
     53{ 
     54    return (sizeof( wchar_t ) == 2) 
     55        ? FromUTF16( (const uint16_t *)in ) 
     56        : FromUTF32( (const uint32_t *)in ); 
     57} 
     58 
     59 
    5060VLC_EXPORT( char *, __vlc_fix_readdir_charset, ( vlc_object_t *, const char * ) ); 
    5161#define vlc_fix_readdir_charset(a,b) __vlc_fix_readdir_charset(VLC_OBJECT(a),b) 
  • include/vlc_symbols.h

    r3fdc34d r647cc79  
    485485    char * (*decode_encoded_URI_duplicate_inner) (const char *psz); 
    486486    void (*resolve_xml_special_chars_inner) (char *psz_value); 
     487    char * (*FromUTF16_inner) (const uint16_t *); 
    487488}; 
    488489# if defined (__PLUGIN__) 
     
    949950#  define decode_encoded_URI_duplicate (p_symbols)->decode_encoded_URI_duplicate_inner 
    950951#  define resolve_xml_special_chars (p_symbols)->resolve_xml_special_chars_inner 
     952#  define FromUTF16 (p_symbols)->FromUTF16_inner 
    951953# elif defined (HAVE_DYNAMIC_PLUGINS) && !defined (__BUILTIN__) 
    952954/****************************************************************** 
     
    14161418    ((p_symbols)->decode_encoded_URI_duplicate_inner) = decode_encoded_URI_duplicate; \ 
    14171419    ((p_symbols)->resolve_xml_special_chars_inner) = resolve_xml_special_chars; \ 
     1420    ((p_symbols)->FromUTF16_inner) = FromUTF16; \ 
    14181421    (p_symbols)->net_ConvertIPv4_deprecated = NULL; \ 
    14191422    (p_symbols)->__stats_CounterGet_deprecated = NULL; \ 
  • src/misc/unicode.c

    r3fdc34d r647cc79  
    11/***************************************************************************** 
    2  * unicode.c: UTF8 <-> locale functions 
     2 * unicode.c: Unicode <-> locale functions 
    33 ***************************************************************************** 
    44 * Copyright (C) 2005-2006 the VideoLAN team 
     
    66 * 
    77 * Authors: Rémi Denis-Courmont <rem # videolan.org> 
     8 * 
     9 * UTF16toUTF8() adapted from Perl 5 (also GPL'd) 
     10 * Copyright (C) 1998-2002, Larry Wall 
    811 * 
    912 * This program is free software; you can redistribute it and/or modify 
     
    656659 
    657660/** 
    658  * UTF32toUTF8(): converts an array from UTF-32 to UTF-8. 
    659  * 
    660  * @param src the UTF32 table to be converted 
     661 * UTF32toUTF8(): converts an array from UTF-32 (host byte order) 
     662 * to UTF-8. 
     663 * 
     664 * @param src the UTF-32 table to be converted 
    661665 * @param len the number of code points to be converted from src 
    662666 * (ie. the number of uint32_t in the table pointed to by src) 
     
    667671 * or NULL on error (in that case, *newlen is undefined). 
    668672 */ 
    669 char *UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen ) 
     673static char * 
     674UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen ) 
    670675{ 
    671676    char *res, *out; 
     
    726731 * FromUTF32(): converts an UTF-32 string to UTF-8. 
    727732 * 
     733 * @param src UTF-32 bytes sequence, aligned on a 32-bits boundary. 
     734 * 
    728735 * @return the result of the conversion (must be free()'d), 
    729736 * or NULL in case of error. 
     
    731738char *FromUTF32( const uint32_t *src ) 
    732739{ 
     740    const uint32_t *in; 
    733741    size_t len; 
    734     const uint32_t *in; 
    735742 
    736743    /* determine the size of the string */ 
    737     for( len = 1, in = src; GetWBE( in ); len++ ) 
     744    for( len = 1, in = src; *in; len++ ) 
    738745        in++; 
    739746 
    740747    return UTF32toUTF8( src, len, NULL ); 
    741748} 
     749 
     750/** 
     751 * UTF16toUTF8: converts UTF-16 (host byte order) to UTF-8 
     752 * 
     753 * @param src UTF-16 bytes sequence, aligned on a 16-bits boundary 
     754 * @param len number of uint16_t to convert 
     755 */ 
     756static char * 
     757UTF16toUTF8( const uint16_t *in, size_t len, size_t *newlen ) 
     758{ 
     759    char *res, *out; 
     760 
     761    /* allocate memory */ 
     762    out = res = (char *)malloc( 3 * len ); 
     763    if( res == NULL ) 
     764        return NULL; 
     765 
     766    while( len > 0 ) 
     767    { 
     768        uint32_t uv = *in; 
     769 
     770        in++; 
     771        len--; 
     772 
     773        if( uv < 0x80 ) 
     774        { 
     775            *out++ = uv; 
     776            continue; 
     777        } 
     778        if( uv < 0x800 ) 
     779        { 
     780            *out++ = (( uv >>  6)         | 0xc0); 
     781            *out++ = (( uv        & 0x3f) | 0x80); 
     782            continue; 
     783        } 
     784        if( (uv >= 0xd800) && (uv < 0xdbff) ) 
     785        {   /* surrogates */ 
     786            uint16_t low = GetWBE( in ); 
     787            in++; 
     788            len--; 
     789 
     790            if( (low < 0xdc00) || (low >= 0xdfff) ) 
     791            { 
     792                *out++ = '?'; /* Malformed surrogate */ 
     793                continue; 
     794            } 
     795            else 
     796                uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000; 
     797        } 
     798        if( uv < 0x10000 ) 
     799        { 
     800            *out++ = (( uv >> 12)         | 0xe0); 
     801            *out++ = (((uv >>  6) & 0x3f) | 0x80); 
     802            *out++ = (( uv        & 0x3f) | 0x80); 
     803            continue; 
     804        } 
     805        else 
     806        { 
     807            *out++ = (( uv >> 18)         | 0xf0); 
     808            *out++ = (((uv >> 12) & 0x3f) | 0x80); 
     809            *out++ = (((uv >>  6) & 0x3f) | 0x80); 
     810            *out++ = (( uv        & 0x3f) | 0x80); 
     811            continue; 
     812        } 
     813    } 
     814    len = out - res; 
     815    res = realloc( res, len ); 
     816    if( newlen != NULL ) 
     817        *newlen = len; 
     818    return res; 
     819} 
     820 
     821 
     822/** 
     823 * FromUTF16(): converts an UTF-16 string to UTF-8. 
     824 * 
     825 * @param src UTF-16 bytes sequence, aligned on a 16-bits boundary. 
     826 * 
     827 * @return the result of the conversion (must be free()'d), 
     828 * or NULL in case of error. 
     829 */ 
     830char *FromUTF16( const uint16_t *src ) 
     831{ 
     832    const uint16_t *in; 
     833    size_t len; 
     834 
     835    /* determine the size of the string */ 
     836    for( len = 1, in = src; *in; len++ ) 
     837        in += 2; 
     838 
     839    return UTF16toUTF8( src, len, NULL ); 
     840}