diff options
Diffstat (limited to 'firmware/common/unicode.c')
-rw-r--r-- | firmware/common/unicode.c | 451 |
1 files changed, 313 insertions, 138 deletions
diff --git a/firmware/common/unicode.c b/firmware/common/unicode.c index 3ff1814c4b..954ad47e1d 100644 --- a/firmware/common/unicode.c +++ b/firmware/common/unicode.c @@ -28,161 +28,227 @@ #include <stdio.h> #include "config.h" +#include "system.h" +#include "thread.h" #include "file.h" #include "debug.h" #include "rbunicode.h" #include "rbpaths.h" +#include "pathfuncs.h" +#include "core_alloc.h" #ifndef O_BINARY #define O_BINARY 0 #endif +#ifndef O_NOISODECODE +#define O_NOISODECODE 0 +#endif -static int default_codepage = 0; -static int loaded_cp_table = 0; - -#ifdef HAVE_LCD_BITMAP +#define getle16(p) (p[0] | (p[1] >> 8)) +#define getbe16(p) ((p[1] << 8) | p[0]) -#define MAX_CP_TABLE_SIZE 32768 -#define NUM_TABLES 5 +#if !defined (__PCTOOL__) && (CONFIG_PLATFORM & PLATFORM_NATIVE) +/* Because file scanning uses the default CP table when matching entries, + on-demand loading is not feasible; we also must use the filesystem lock */ +#include "file_internal.h" +#else /* APPLICATION */ +#ifdef __PCTOOL__ +#define yield() +#endif +#define open_noiso_internal open +#endif /* !APPLICATION */ + +#if 0 /* not needed just now (will probably end up a spinlock) */ +#include "mutex.h" +static struct mutex cp_mutex SHAREDBSS_ATTR; +#define cp_lock_init() mutex_init(&cp_mutex) +#define cp_lock_enter() mutex_lock(&cp_mutex) +#define cp_lock_leave() mutex_unlock(&cp_mutex) +#else +#define cp_lock_init() do {} while (0) +#define cp_lock_enter() asm volatile ("") +#define cp_lock_leave() asm volatile ("") +#endif -static const char * const filename[NUM_TABLES] = +enum cp_tid { - CODEPAGE_DIR"/iso.cp", - CODEPAGE_DIR"/932.cp", /* SJIS */ - CODEPAGE_DIR"/936.cp", /* GB2312 */ - CODEPAGE_DIR"/949.cp", /* KSX1001 */ - CODEPAGE_DIR"/950.cp" /* BIG5 */ + CP_TID_NONE = -1, + CP_TID_ISO, + CP_TID_932, + CP_TID_936, + CP_TID_949, + CP_TID_950, }; -static const char cp_2_table[NUM_CODEPAGES] = +struct cp_info { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0 + int8_t tid; + const char *filename; + const char *name; }; -static const char * const name_codepages[NUM_CODEPAGES+1] = -{ - "ISO-8859-1", - "ISO-8859-7", - "ISO-8859-8", - "CP1251", - "ISO-8859-11", - "CP1256", - "ISO-8859-9", - "ISO-8859-2", - "CP1250", - "CP1252", - "SJIS", - "GB-2312", - "KSX-1001", - "BIG5", - "UTF-8", - "unknown" -}; +#ifdef HAVE_LCD_BITMAP -#if defined(APPLICATION) && defined(__linux__) -static const char * const name_codepages_linux[NUM_CODEPAGES+1] = -{ - /* "ISO-8859-1" */ "iso8859-1", - /* "ISO-8859-7" */ "iso8859-7", - /* "ISO-8859-8" */ "iso8859-8", - /* "CP1251" */ "cp1251", - /* "ISO-8859-11"*/ "iso8859-11", - /* "CP1256" */ "cp1256", - /* "ISO-8859-9" */ "iso8859-9", - /* "ISO-8859-2" */ "iso8859-2", - /* "CP1250" */ "cp1250", - /* "CP1252" */ "iso8859-15", /* closest, linux doesnt have a codepage named cp1252 */ - /* "SJIS" */ "cp932", - /* "GB-2312" */ "cp936", - /* "KSX-1001" */ "cp949", - /* "BIG5" */ "cp950", - /* "UTF-8" */ "utf8", - /* "unknown" */ "cp437" -}; +#define MAX_CP_TABLE_SIZE 32768 -const char *get_current_codepage_name_linux(void) +#define CPF_ISO "iso.cp" +#define CPF_932 "932.cp" /* SJIS */ +#define CPF_936 "936.cp" /* GB2312 */ +#define CPF_949 "949.cp" /* KSX1001 */ +#define CPF_950 "950.cp" /* BIG5 */ + +static const struct cp_info cp_info[NUM_CODEPAGES+1] = { - if (default_codepage < 0 || default_codepage >= NUM_CODEPAGES) - return name_codepages_linux[NUM_CODEPAGES]; - return name_codepages_linux[default_codepage]; -} -#endif + [0 ... NUM_CODEPAGES] = { CP_TID_NONE, NULL , "unknown" }, + [ISO_8859_1] = { CP_TID_NONE, NULL , "ISO-8859-1" }, + [ISO_8859_7] = { CP_TID_ISO , CPF_ISO, "ISO-8859-7" }, + [ISO_8859_8] = { CP_TID_ISO , CPF_ISO, "ISO-8859-8" }, + [WIN_1251] = { CP_TID_ISO , CPF_ISO, "CP1251" }, + [ISO_8859_11] = { CP_TID_ISO , CPF_ISO, "ISO-8859-11" }, + [WIN_1256] = { CP_TID_ISO , CPF_ISO, "CP1256" }, + [ISO_8859_9] = { CP_TID_ISO , CPF_ISO, "ISO-8859-9" }, + [ISO_8859_2] = { CP_TID_ISO , CPF_ISO, "ISO-8859-2" }, + [WIN_1250] = { CP_TID_ISO , CPF_ISO, "CP1250" }, + [WIN_1252] = { CP_TID_ISO , CPF_ISO, "CP1252" }, + [SJIS] = { CP_TID_932 , CPF_932, "SJIS" }, + [GB_2312] = { CP_TID_936 , CPF_936, "GB-2312" }, + [KSX_1001] = { CP_TID_949 , CPF_949, "KSX-1001" }, + [BIG_5] = { CP_TID_950 , CPF_950, "BIG5" }, + [UTF_8] = { CP_TID_NONE, NULL , "UTF-8" }, +}; #else /* !HAVE_LCD_BITMAP, reduced support */ #define MAX_CP_TABLE_SIZE 768 -#define NUM_TABLES 1 -static const char * const filename[NUM_TABLES] = { - CODEPAGE_DIR"/isomini.cp" -}; +#define CPF_ISOMINI "isomini.cp" -static const char cp_2_table[NUM_CODEPAGES] = +static const struct cp_info cp_info[NUM_CODEPAGES+1] = { - 0, 1, 1, 1, 1, 1, 1, 0 + [0 ... NUM_CODEPAGES] = { CP_TID_NONE, NULL , "unknown" }, + [ISO_8859_1] = { CP_TID_NONE, NULL , "ISO-8859-1" }, + [ISO_8859_7] = { CP_TID_ISO , CPF_ISOMINI, "ISO-8859-7" }, + [WIN_1251] = { CP_TID_ISO , CPF_ISOMINI, "CP1251" }, + [ISO_8859_9] = { CP_TID_ISO , CPF_ISOMINI, "ISO-8859-9" }, + [ISO_8859_2] = { CP_TID_ISO , CPF_ISOMINI, "ISO-8859-2" }, + [WIN_1250] = { CP_TID_ISO , CPF_ISOMINI, "CP1250" }, + [WIN_1252] = { CP_TID_ISO , CPF_ISOMINI, "CP1252" }, + [UTF_8] = { CP_TID_ISO , NULL , "UTF-8" }, }; -static const char * const name_codepages[NUM_CODEPAGES+1] = +#endif /* HAVE_LCD_BITMAP */ + +static int default_cp = INIT_CODEPAGE; +static int default_cp_tid = CP_TID_NONE; +static int default_cp_handle = 0; +static int volatile default_cp_table_ref = 0; + +static int loaded_cp_tid = CP_TID_NONE; +static int volatile cp_table_ref = 0; +#define CP_LOADING BIT_N(sizeof(int)*8-1) /* guard against multi loaders */ + +/* non-default codepage table buffer (cannot be bufalloced! playback itself + may be making the load request) */ +static unsigned short codepage_table[MAX_CP_TABLE_SIZE+1]; + +#if defined(APPLICATION) && defined(__linux__) +static const char * const name_codepages_linux[NUM_CODEPAGES+1] = { - "ISO-8859-1", - "ISO-8859-7", - "CP1251", - "ISO-8859-9", - "ISO-8859-2", - "CP1250", - "CP1252", - "UTF-8", - "unknown" + [0 ... NUM_CODEPAGES] = "unknown", + [ISO_8859_1] = "iso8859-1", + [ISO_8859_7] = "iso8859-7", + [ISO_8859_8] = "iso8859-8", + [WIN_1251] = "cp1251", + [ISO_8859_11] = "iso8859-11", + [WIN_1256] = "cp1256", + [ISO_8859_9] = "iso8859-9", + [ISO_8859_2] = "iso8859-2", + [WIN_1250] = "cp1250", + /* iso8859-15 is closest, linux doesnt have a codepage named cp1252 */ + [WIN_1252] = "iso8859-15", + [SJIS] = "cp932", + [GB_2312] = "cp936", + [KSX_1001] = "cp949", + [BIG_5] = "cp950", + [UTF_8] = "utf8", }; -#endif - -static unsigned short codepage_table[MAX_CP_TABLE_SIZE]; +const char *get_current_codepage_name_linux(void) +{ + int cp = default_cp; + if (cp < 0 || cp>= NUM_CODEPAGES) + cp = NUM_CODEPAGES; + return name_codepages_linux[cp]; +} +#endif /* defined(APPLICATION) && defined(__linux__) */ static const unsigned char utf8comp[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; -/* Load codepage file into memory */ -static int load_cp_table(int cp) +static inline void cptable_tohw16(uint16_t *buf, unsigned int count) { - int i = 0; - int table = cp_2_table[cp]; - int file, tablesize; - unsigned char tmp[2]; +#ifdef ROCKBOX_BIG_ENDIAN + for (unsigned int i = 0; i < count; i++) + buf[i] = letoh16(buf[i]); +#endif + (void)buf; (void)count; +} - if (table == 0 || table == loaded_cp_table) - return 1; +static int move_callback(int handle, void *current, void *new) +{ + /* we don't keep a pointer but we have to stop it if this applies to a + buffer not yet swapped-in since it will likely be in use in an I/O + call */ + return (handle != default_cp_handle || default_cp_table_ref != 0) ? + BUFLIB_CB_CANNOT_MOVE : BUFLIB_CB_OK; + (void)current; (void)new; +} - file = open(filename[table-1], O_RDONLY|O_BINARY); +static int alloc_and_load_cp_table(int cp, void *buf) +{ + static struct buflib_callbacks ops = + { .move_callback = move_callback }; - if (file < 0) { - DEBUGF("Can't open codepage file: %s.cp\n", filename[table-1]); + /* alloc and read only if there is an associated file */ + const char *filename = cp_info[cp].filename; + if (!filename) return 0; + + char path[MAX_PATH]; + if (path_append(path, CODEPAGE_DIR, filename, sizeof (path)) + >= sizeof (path)) { + return -1; } - tablesize = filesize(file) / 2; + /* must be opened without a chance of reentering from FS code */ + int fd = open_noiso_internal(path, O_RDONLY); + if (fd < 0) + return -1; - if (tablesize > MAX_CP_TABLE_SIZE) { - DEBUGF("Invalid codepage file: %s.cp\n", filename[table-1]); - close(file); - return 0; - } + off_t size = filesize(fd); - while (i < tablesize) { - if (!read(file, tmp, 2)) { - DEBUGF("Can't read from codepage file: %s.cp\n", - filename[table-1]); - loaded_cp_table = 0; - return 0; + if (size > 0 && size <= MAX_CP_TABLE_SIZE*2 && + !(size % (off_t)sizeof (uint16_t))) { + + /* if the buffer is provided, use that but don't alloc */ + int handle = buf ? 0 : core_alloc_ex(filename, size, &ops); + if (handle > 0) + buf = core_get_data(handle); + + if (buf && read(fd, buf, size) == size) { + close(fd); + cptable_tohw16(buf, size / sizeof (uint16_t)); + return handle; } - codepage_table[i++] = (tmp[1] << 8) | tmp[0]; + + if (handle > 0) + core_free(handle); } - loaded_cp_table = table; - close(file); - return 1; + close(fd); + return -1; } /* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */ @@ -205,47 +271,96 @@ unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8) unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, int cp, int count) { - unsigned short ucs, tmp; + uint16_t *table = NULL; + + cp_lock_enter(); + + if (cp < 0 || cp >= NUM_CODEPAGES) + cp = default_cp; - if (cp == -1) /* use default codepage */ - cp = default_codepage; + int tid = cp_info[cp].tid; - if (!load_cp_table(cp)) cp = 0; + while (1) { + if (tid == default_cp_tid) { + /* use default table */ + if (default_cp_handle > 0) { + table = core_get_data(default_cp_handle); + default_cp_table_ref++; + } + + break; + } + + bool load = false; + + if (tid == loaded_cp_tid) { + /* use loaded table */ + if (!(cp_table_ref & CP_LOADING)) { + if (tid != CP_TID_NONE) { + table = codepage_table; + cp_table_ref++; + } + + break; + } + } else if (cp_table_ref == 0) { + load = true; + cp_table_ref |= CP_LOADING; + } + + /* alloc and load must be done outside the lock */ + cp_lock_leave(); + + if (!load) { + yield(); + } else if (alloc_and_load_cp_table(cp, codepage_table) < 0) { + cp = INIT_CODEPAGE; /* table may be clobbered now */ + tid = cp_info[cp].tid; + } + + cp_lock_enter(); + + if (load) { + loaded_cp_tid = tid; + cp_table_ref &= ~CP_LOADING; + } + } + + cp_lock_leave(); while (count--) { + unsigned short ucs, tmp; + if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */ *utf8++ = *iso++; else { - - /* cp tells us which codepage to convert from */ - switch (cp) { - case ISO_8859_7: /* Greek */ - case WIN_1252: /* Western European */ - case WIN_1251: /* Cyrillic */ - case ISO_8859_9: /* Turkish */ - case ISO_8859_2: /* Latin Extended */ - case WIN_1250: /* Central European */ -#ifdef HAVE_LCD_BITMAP - case ISO_8859_8: /* Hebrew */ - case ISO_8859_11: /* Thai */ - case WIN_1256: /* Arabic */ -#endif + /* tid tells us which table to use and how */ + switch (tid) { + case CP_TID_ISO: /* Greek */ + /* Hebrew */ + /* Cyrillic */ + /* Thai */ + /* Arabic */ + /* Turkish */ + /* Latin Extended */ + /* Central European */ + /* Western European */ tmp = ((cp-1)*128) + (*iso++ - 128); - ucs = codepage_table[tmp]; + ucs = table[tmp]; break; #ifdef HAVE_LCD_BITMAP - case SJIS: /* Japanese */ + case CP_TID_932: /* Japanese */ if (*iso > 0xA0 && *iso < 0xE0) { tmp = *iso++ | (0xA100 - 0x8000); - ucs = codepage_table[tmp]; + ucs = table[tmp]; break; } - case GB_2312: /* Simplified Chinese */ - case KSX_1001: /* Korean */ - case BIG_5: /* Traditional Chinese */ + case CP_TID_936: /* Simplified Chinese */ + case CP_TID_949: /* Korean */ + case CP_TID_950: /* Traditional Chinese */ if (count < 1 || !iso[1]) { ucs = *iso++; break; @@ -256,7 +371,7 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, tmp = *iso++ << 8; tmp |= *iso++; tmp -= 0x8000; - ucs = codepage_table[tmp]; + ucs = table[tmp]; count--; break; #endif /* HAVE_LCD_BITMAP */ @@ -271,6 +386,17 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, utf8 = utf8encode(ucs, utf8); } } + + if (table) { + cp_lock_enter(); + if (table == codepage_table) { + cp_table_ref--; + } else { + default_cp_table_ref--; + } + cp_lock_leave(); + } + return utf8; } @@ -288,7 +414,7 @@ unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, utf16 += 4; count -= 2; } else { - ucs = (utf16[0] | (utf16[1] << 8)); + ucs = getle16(utf16); utf16 += 2; count -= 1; } @@ -310,7 +436,7 @@ unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, utf16 += 4; count -= 2; } else { - ucs = (utf16[0] << 8) | utf16[1]; + ucs = getbe16(utf16); utf16 += 2; count -= 1; } @@ -400,8 +526,50 @@ const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs) void set_codepage(int cp) { - default_codepage = cp; - return; + if (cp < 0 || cp >= NUM_CODEPAGES) + cp = NUM_CODEPAGES; + + /* load first then swap if load is successful, else just leave it; if + handle is 0 then we just free the current one; this won't happen often + thus we don't worry about reusing it and consequently avoid possible + clobbering of the existing one */ + + int handle = -1; + int tid = cp_info[cp].tid; + + while (1) { + cp_lock_enter(); + + if (default_cp_tid == tid) + break; + + if (handle >= 0 && default_cp_table_ref == 0) { + int hold = default_cp_handle; + default_cp_handle = handle; + handle = hold; + default_cp_tid = tid; + break; + } + + /* alloc and load must be done outside the lock */ + cp_lock_leave(); + + if (handle < 0 && (handle = alloc_and_load_cp_table(cp, NULL)) < 0) + return; /* OOM; change nothing */ + + yield(); + } + + default_cp = cp; + cp_lock_leave(); + + if (handle > 0) + core_free(handle); +} + +int get_codepage(void) +{ + return default_cp; } /* seek to a given char in a utf8 string and @@ -418,9 +586,16 @@ int utf8seek(const unsigned char* utf8, int offset) return pos; } -const char* get_codepage_name(int cp) +const char * get_codepage_name(int cp) { - if (cp < 0 || cp>= NUM_CODEPAGES) - return name_codepages[NUM_CODEPAGES]; - return name_codepages[cp]; + if (cp < 0 || cp >= NUM_CODEPAGES) + cp = NUM_CODEPAGES; + return cp_info[cp].name; } + +#if 0 /* not needed just now */ +void unicode_init(void) +{ + cp_lock_init(); +} +#endif |