Changeset 647cc79956895cfa8c8baf95d58e0f000a3e1a5b
- Timestamp:
- 03/21/06 17:42:34 (2 years ago)
- git-parent:
- Files:
-
- include/charset.h (modified) (1 diff)
- include/vlc_symbols.h (modified) (3 diffs)
- src/misc/unicode.c (modified) (6 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
include/charset.h
r3fdc34d r647cc79 48 48 VLC_EXPORT( char *, EnsureUTF8, ( char * ) ); 49 49 VLC_EXPORT( char *, FromUTF32, ( const uint32_t * ) ); 50 VLC_EXPORT( char *, FromUTF16, ( const uint16_t * ) ); 51 52 static inline char *FromWide( const wchar_t *in ) 53 { 54 return (sizeof( wchar_t ) == 2) 55 ? FromUTF16( (const uint16_t *)in ) 56 : FromUTF32( (const uint32_t *)in ); 57 } 58 59 50 60 VLC_EXPORT( char *, __vlc_fix_readdir_charset, ( vlc_object_t *, const char * ) ); 51 61 #define vlc_fix_readdir_charset(a,b) __vlc_fix_readdir_charset(VLC_OBJECT(a),b) include/vlc_symbols.h
r3fdc34d r647cc79 485 485 char * (*decode_encoded_URI_duplicate_inner) (const char *psz); 486 486 void (*resolve_xml_special_chars_inner) (char *psz_value); 487 char * (*FromUTF16_inner) (const uint16_t *); 487 488 }; 488 489 # if defined (__PLUGIN__) … … 949 950 # define decode_encoded_URI_duplicate (p_symbols)->decode_encoded_URI_duplicate_inner 950 951 # define resolve_xml_special_chars (p_symbols)->resolve_xml_special_chars_inner 952 # define FromUTF16 (p_symbols)->FromUTF16_inner 951 953 # elif defined (HAVE_DYNAMIC_PLUGINS) && !defined (__BUILTIN__) 952 954 /****************************************************************** … … 1416 1418 ((p_symbols)->decode_encoded_URI_duplicate_inner) = decode_encoded_URI_duplicate; \ 1417 1419 ((p_symbols)->resolve_xml_special_chars_inner) = resolve_xml_special_chars; \ 1420 ((p_symbols)->FromUTF16_inner) = FromUTF16; \ 1418 1421 (p_symbols)->net_ConvertIPv4_deprecated = NULL; \ 1419 1422 (p_symbols)->__stats_CounterGet_deprecated = NULL; \ src/misc/unicode.c
r3fdc34d r647cc79 1 1 /***************************************************************************** 2 * unicode.c: U TF8<-> locale functions2 * unicode.c: Unicode <-> locale functions 3 3 ***************************************************************************** 4 4 * Copyright (C) 2005-2006 the VideoLAN team … … 6 6 * 7 7 * Authors: Rémi Denis-Courmont <rem # videolan.org> 8 * 9 * UTF16toUTF8() adapted from Perl 5 (also GPL'd) 10 * Copyright (C) 1998-2002, Larry Wall 8 11 * 9 12 * This program is free software; you can redistribute it and/or modify … … 656 659 657 660 /** 658 * UTF32toUTF8(): converts an array from UTF-32 to UTF-8. 659 * 660 * @param src the UTF32 table to be converted 661 * UTF32toUTF8(): converts an array from UTF-32 (host byte order) 662 * to UTF-8. 663 * 664 * @param src the UTF-32 table to be converted 661 665 * @param len the number of code points to be converted from src 662 666 * (ie. the number of uint32_t in the table pointed to by src) … … 667 671 * or NULL on error (in that case, *newlen is undefined). 668 672 */ 669 char *UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen ) 673 static char * 674 UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen ) 670 675 { 671 676 char *res, *out; … … 726 731 * FromUTF32(): converts an UTF-32 string to UTF-8. 727 732 * 733 * @param src UTF-32 bytes sequence, aligned on a 32-bits boundary. 734 * 728 735 * @return the result of the conversion (must be free()'d), 729 736 * or NULL in case of error. … … 731 738 char *FromUTF32( const uint32_t *src ) 732 739 { 740 const uint32_t *in; 733 741 size_t len; 734 const uint32_t *in;735 742 736 743 /* determine the size of the string */ 737 for( len = 1, in = src; GetWBE( in ); len++ )744 for( len = 1, in = src; *in; len++ ) 738 745 in++; 739 746 740 747 return UTF32toUTF8( src, len, NULL ); 741 748 } 749 750 /** 751 * UTF16toUTF8: converts UTF-16 (host byte order) to UTF-8 752 * 753 * @param src UTF-16 bytes sequence, aligned on a 16-bits boundary 754 * @param len number of uint16_t to convert 755 */ 756 static char * 757 UTF16toUTF8( const uint16_t *in, size_t len, size_t *newlen ) 758 { 759 char *res, *out; 760 761 /* allocate memory */ 762 out = res = (char *)malloc( 3 * len ); 763 if( res == NULL ) 764 return NULL; 765 766 while( len > 0 ) 767 { 768 uint32_t uv = *in; 769 770 in++; 771 len--; 772 773 if( uv < 0x80 ) 774 { 775 *out++ = uv; 776 continue; 777 } 778 if( uv < 0x800 ) 779 { 780 *out++ = (( uv >> 6) | 0xc0); 781 *out++ = (( uv & 0x3f) | 0x80); 782 continue; 783 } 784 if( (uv >= 0xd800) && (uv < 0xdbff) ) 785 { /* surrogates */ 786 uint16_t low = GetWBE( in ); 787 in++; 788 len--; 789 790 if( (low < 0xdc00) || (low >= 0xdfff) ) 791 { 792 *out++ = '?'; /* Malformed surrogate */ 793 continue; 794 } 795 else 796 uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000; 797 } 798 if( uv < 0x10000 ) 799 { 800 *out++ = (( uv >> 12) | 0xe0); 801 *out++ = (((uv >> 6) & 0x3f) | 0x80); 802 *out++ = (( uv & 0x3f) | 0x80); 803 continue; 804 } 805 else 806 { 807 *out++ = (( uv >> 18) | 0xf0); 808 *out++ = (((uv >> 12) & 0x3f) | 0x80); 809 *out++ = (((uv >> 6) & 0x3f) | 0x80); 810 *out++ = (( uv & 0x3f) | 0x80); 811 continue; 812 } 813 } 814 len = out - res; 815 res = realloc( res, len ); 816 if( newlen != NULL ) 817 *newlen = len; 818 return res; 819 } 820 821 822 /** 823 * FromUTF16(): converts an UTF-16 string to UTF-8. 824 * 825 * @param src UTF-16 bytes sequence, aligned on a 16-bits boundary. 826 * 827 * @return the result of the conversion (must be free()'d), 828 * or NULL in case of error. 829 */ 830 char *FromUTF16( const uint16_t *src ) 831 { 832 const uint16_t *in; 833 size_t len; 834 835 /* determine the size of the string */ 836 for( len = 1, in = src; *in; len++ ) 837 in += 2; 838 839 return UTF16toUTF8( src, len, NULL ); 840 }
