summaryrefslogtreecommitdiff
path: root/firmware/common/unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'firmware/common/unicode.c')
-rw-r--r--firmware/common/unicode.c451
1 files changed, 313 insertions, 138 deletions
diff --git a/firmware/common/unicode.c b/firmware/common/unicode.c
index 3ff1814c4b..954ad47e1d 100644
--- a/firmware/common/unicode.c
+++ b/firmware/common/unicode.c
@@ -28,161 +28,227 @@
#include <stdio.h>
#include "config.h"
+#include "system.h"
+#include "thread.h"
#include "file.h"
#include "debug.h"
#include "rbunicode.h"
#include "rbpaths.h"
+#include "pathfuncs.h"
+#include "core_alloc.h"
#ifndef O_BINARY
#define O_BINARY 0
#endif
+#ifndef O_NOISODECODE
+#define O_NOISODECODE 0
+#endif
-static int default_codepage = 0;
-static int loaded_cp_table = 0;
-
-#ifdef HAVE_LCD_BITMAP
+#define getle16(p) (p[0] | (p[1] >> 8))
+#define getbe16(p) ((p[1] << 8) | p[0])
-#define MAX_CP_TABLE_SIZE 32768
-#define NUM_TABLES 5
+#if !defined (__PCTOOL__) && (CONFIG_PLATFORM & PLATFORM_NATIVE)
+/* Because file scanning uses the default CP table when matching entries,
+ on-demand loading is not feasible; we also must use the filesystem lock */
+#include "file_internal.h"
+#else /* APPLICATION */
+#ifdef __PCTOOL__
+#define yield()
+#endif
+#define open_noiso_internal open
+#endif /* !APPLICATION */
+
+#if 0 /* not needed just now (will probably end up a spinlock) */
+#include "mutex.h"
+static struct mutex cp_mutex SHAREDBSS_ATTR;
+#define cp_lock_init() mutex_init(&cp_mutex)
+#define cp_lock_enter() mutex_lock(&cp_mutex)
+#define cp_lock_leave() mutex_unlock(&cp_mutex)
+#else
+#define cp_lock_init() do {} while (0)
+#define cp_lock_enter() asm volatile ("")
+#define cp_lock_leave() asm volatile ("")
+#endif
-static const char * const filename[NUM_TABLES] =
+enum cp_tid
{
- CODEPAGE_DIR"/iso.cp",
- CODEPAGE_DIR"/932.cp", /* SJIS */
- CODEPAGE_DIR"/936.cp", /* GB2312 */
- CODEPAGE_DIR"/949.cp", /* KSX1001 */
- CODEPAGE_DIR"/950.cp" /* BIG5 */
+ CP_TID_NONE = -1,
+ CP_TID_ISO,
+ CP_TID_932,
+ CP_TID_936,
+ CP_TID_949,
+ CP_TID_950,
};
-static const char cp_2_table[NUM_CODEPAGES] =
+struct cp_info
{
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0
+ int8_t tid;
+ const char *filename;
+ const char *name;
};
-static const char * const name_codepages[NUM_CODEPAGES+1] =
-{
- "ISO-8859-1",
- "ISO-8859-7",
- "ISO-8859-8",
- "CP1251",
- "ISO-8859-11",
- "CP1256",
- "ISO-8859-9",
- "ISO-8859-2",
- "CP1250",
- "CP1252",
- "SJIS",
- "GB-2312",
- "KSX-1001",
- "BIG5",
- "UTF-8",
- "unknown"
-};
+#ifdef HAVE_LCD_BITMAP
-#if defined(APPLICATION) && defined(__linux__)
-static const char * const name_codepages_linux[NUM_CODEPAGES+1] =
-{
- /* "ISO-8859-1" */ "iso8859-1",
- /* "ISO-8859-7" */ "iso8859-7",
- /* "ISO-8859-8" */ "iso8859-8",
- /* "CP1251" */ "cp1251",
- /* "ISO-8859-11"*/ "iso8859-11",
- /* "CP1256" */ "cp1256",
- /* "ISO-8859-9" */ "iso8859-9",
- /* "ISO-8859-2" */ "iso8859-2",
- /* "CP1250" */ "cp1250",
- /* "CP1252" */ "iso8859-15", /* closest, linux doesnt have a codepage named cp1252 */
- /* "SJIS" */ "cp932",
- /* "GB-2312" */ "cp936",
- /* "KSX-1001" */ "cp949",
- /* "BIG5" */ "cp950",
- /* "UTF-8" */ "utf8",
- /* "unknown" */ "cp437"
-};
+#define MAX_CP_TABLE_SIZE 32768
-const char *get_current_codepage_name_linux(void)
+#define CPF_ISO "iso.cp"
+#define CPF_932 "932.cp" /* SJIS */
+#define CPF_936 "936.cp" /* GB2312 */
+#define CPF_949 "949.cp" /* KSX1001 */
+#define CPF_950 "950.cp" /* BIG5 */
+
+static const struct cp_info cp_info[NUM_CODEPAGES+1] =
{
- if (default_codepage < 0 || default_codepage >= NUM_CODEPAGES)
- return name_codepages_linux[NUM_CODEPAGES];
- return name_codepages_linux[default_codepage];
-}
-#endif
+ [0 ... NUM_CODEPAGES] = { CP_TID_NONE, NULL , "unknown" },
+ [ISO_8859_1] = { CP_TID_NONE, NULL , "ISO-8859-1" },
+ [ISO_8859_7] = { CP_TID_ISO , CPF_ISO, "ISO-8859-7" },
+ [ISO_8859_8] = { CP_TID_ISO , CPF_ISO, "ISO-8859-8" },
+ [WIN_1251] = { CP_TID_ISO , CPF_ISO, "CP1251" },
+ [ISO_8859_11] = { CP_TID_ISO , CPF_ISO, "ISO-8859-11" },
+ [WIN_1256] = { CP_TID_ISO , CPF_ISO, "CP1256" },
+ [ISO_8859_9] = { CP_TID_ISO , CPF_ISO, "ISO-8859-9" },
+ [ISO_8859_2] = { CP_TID_ISO , CPF_ISO, "ISO-8859-2" },
+ [WIN_1250] = { CP_TID_ISO , CPF_ISO, "CP1250" },
+ [WIN_1252] = { CP_TID_ISO , CPF_ISO, "CP1252" },
+ [SJIS] = { CP_TID_932 , CPF_932, "SJIS" },
+ [GB_2312] = { CP_TID_936 , CPF_936, "GB-2312" },
+ [KSX_1001] = { CP_TID_949 , CPF_949, "KSX-1001" },
+ [BIG_5] = { CP_TID_950 , CPF_950, "BIG5" },
+ [UTF_8] = { CP_TID_NONE, NULL , "UTF-8" },
+};
#else /* !HAVE_LCD_BITMAP, reduced support */
#define MAX_CP_TABLE_SIZE 768
-#define NUM_TABLES 1
-static const char * const filename[NUM_TABLES] = {
- CODEPAGE_DIR"/isomini.cp"
-};
+#define CPF_ISOMINI "isomini.cp"
-static const char cp_2_table[NUM_CODEPAGES] =
+static const struct cp_info cp_info[NUM_CODEPAGES+1] =
{
- 0, 1, 1, 1, 1, 1, 1, 0
+ [0 ... NUM_CODEPAGES] = { CP_TID_NONE, NULL , "unknown" },
+ [ISO_8859_1] = { CP_TID_NONE, NULL , "ISO-8859-1" },
+ [ISO_8859_7] = { CP_TID_ISO , CPF_ISOMINI, "ISO-8859-7" },
+ [WIN_1251] = { CP_TID_ISO , CPF_ISOMINI, "CP1251" },
+ [ISO_8859_9] = { CP_TID_ISO , CPF_ISOMINI, "ISO-8859-9" },
+ [ISO_8859_2] = { CP_TID_ISO , CPF_ISOMINI, "ISO-8859-2" },
+ [WIN_1250] = { CP_TID_ISO , CPF_ISOMINI, "CP1250" },
+ [WIN_1252] = { CP_TID_ISO , CPF_ISOMINI, "CP1252" },
+ [UTF_8] = { CP_TID_ISO , NULL , "UTF-8" },
};
-static const char * const name_codepages[NUM_CODEPAGES+1] =
+#endif /* HAVE_LCD_BITMAP */
+
+static int default_cp = INIT_CODEPAGE;
+static int default_cp_tid = CP_TID_NONE;
+static int default_cp_handle = 0;
+static int volatile default_cp_table_ref = 0;
+
+static int loaded_cp_tid = CP_TID_NONE;
+static int volatile cp_table_ref = 0;
+#define CP_LOADING BIT_N(sizeof(int)*8-1) /* guard against multi loaders */
+
+/* non-default codepage table buffer (cannot be bufalloced! playback itself
+ may be making the load request) */
+static unsigned short codepage_table[MAX_CP_TABLE_SIZE+1];
+
+#if defined(APPLICATION) && defined(__linux__)
+static const char * const name_codepages_linux[NUM_CODEPAGES+1] =
{
- "ISO-8859-1",
- "ISO-8859-7",
- "CP1251",
- "ISO-8859-9",
- "ISO-8859-2",
- "CP1250",
- "CP1252",
- "UTF-8",
- "unknown"
+ [0 ... NUM_CODEPAGES] = "unknown",
+ [ISO_8859_1] = "iso8859-1",
+ [ISO_8859_7] = "iso8859-7",
+ [ISO_8859_8] = "iso8859-8",
+ [WIN_1251] = "cp1251",
+ [ISO_8859_11] = "iso8859-11",
+ [WIN_1256] = "cp1256",
+ [ISO_8859_9] = "iso8859-9",
+ [ISO_8859_2] = "iso8859-2",
+ [WIN_1250] = "cp1250",
+ /* iso8859-15 is closest, linux doesnt have a codepage named cp1252 */
+ [WIN_1252] = "iso8859-15",
+ [SJIS] = "cp932",
+ [GB_2312] = "cp936",
+ [KSX_1001] = "cp949",
+ [BIG_5] = "cp950",
+ [UTF_8] = "utf8",
};
-#endif
-
-static unsigned short codepage_table[MAX_CP_TABLE_SIZE];
+const char *get_current_codepage_name_linux(void)
+{
+ int cp = default_cp;
+ if (cp < 0 || cp>= NUM_CODEPAGES)
+ cp = NUM_CODEPAGES;
+ return name_codepages_linux[cp];
+}
+#endif /* defined(APPLICATION) && defined(__linux__) */
static const unsigned char utf8comp[6] =
{
0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
};
-/* Load codepage file into memory */
-static int load_cp_table(int cp)
+static inline void cptable_tohw16(uint16_t *buf, unsigned int count)
{
- int i = 0;
- int table = cp_2_table[cp];
- int file, tablesize;
- unsigned char tmp[2];
+#ifdef ROCKBOX_BIG_ENDIAN
+ for (unsigned int i = 0; i < count; i++)
+ buf[i] = letoh16(buf[i]);
+#endif
+ (void)buf; (void)count;
+}
- if (table == 0 || table == loaded_cp_table)
- return 1;
+static int move_callback(int handle, void *current, void *new)
+{
+ /* we don't keep a pointer but we have to stop it if this applies to a
+ buffer not yet swapped-in since it will likely be in use in an I/O
+ call */
+ return (handle != default_cp_handle || default_cp_table_ref != 0) ?
+ BUFLIB_CB_CANNOT_MOVE : BUFLIB_CB_OK;
+ (void)current; (void)new;
+}
- file = open(filename[table-1], O_RDONLY|O_BINARY);
+static int alloc_and_load_cp_table(int cp, void *buf)
+{
+ static struct buflib_callbacks ops =
+ { .move_callback = move_callback };
- if (file < 0) {
- DEBUGF("Can't open codepage file: %s.cp\n", filename[table-1]);
+ /* alloc and read only if there is an associated file */
+ const char *filename = cp_info[cp].filename;
+ if (!filename)
return 0;
+
+ char path[MAX_PATH];
+ if (path_append(path, CODEPAGE_DIR, filename, sizeof (path))
+ >= sizeof (path)) {
+ return -1;
}
- tablesize = filesize(file) / 2;
+ /* must be opened without a chance of reentering from FS code */
+ int fd = open_noiso_internal(path, O_RDONLY);
+ if (fd < 0)
+ return -1;
- if (tablesize > MAX_CP_TABLE_SIZE) {
- DEBUGF("Invalid codepage file: %s.cp\n", filename[table-1]);
- close(file);
- return 0;
- }
+ off_t size = filesize(fd);
- while (i < tablesize) {
- if (!read(file, tmp, 2)) {
- DEBUGF("Can't read from codepage file: %s.cp\n",
- filename[table-1]);
- loaded_cp_table = 0;
- return 0;
+ if (size > 0 && size <= MAX_CP_TABLE_SIZE*2 &&
+ !(size % (off_t)sizeof (uint16_t))) {
+
+ /* if the buffer is provided, use that but don't alloc */
+ int handle = buf ? 0 : core_alloc_ex(filename, size, &ops);
+ if (handle > 0)
+ buf = core_get_data(handle);
+
+ if (buf && read(fd, buf, size) == size) {
+ close(fd);
+ cptable_tohw16(buf, size / sizeof (uint16_t));
+ return handle;
}
- codepage_table[i++] = (tmp[1] << 8) | tmp[0];
+
+ if (handle > 0)
+ core_free(handle);
}
- loaded_cp_table = table;
- close(file);
- return 1;
+ close(fd);
+ return -1;
}
/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
@@ -205,47 +271,96 @@ unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
int cp, int count)
{
- unsigned short ucs, tmp;
+ uint16_t *table = NULL;
+
+ cp_lock_enter();
+
+ if (cp < 0 || cp >= NUM_CODEPAGES)
+ cp = default_cp;
- if (cp == -1) /* use default codepage */
- cp = default_codepage;
+ int tid = cp_info[cp].tid;
- if (!load_cp_table(cp)) cp = 0;
+ while (1) {
+ if (tid == default_cp_tid) {
+ /* use default table */
+ if (default_cp_handle > 0) {
+ table = core_get_data(default_cp_handle);
+ default_cp_table_ref++;
+ }
+
+ break;
+ }
+
+ bool load = false;
+
+ if (tid == loaded_cp_tid) {
+ /* use loaded table */
+ if (!(cp_table_ref & CP_LOADING)) {
+ if (tid != CP_TID_NONE) {
+ table = codepage_table;
+ cp_table_ref++;
+ }
+
+ break;
+ }
+ } else if (cp_table_ref == 0) {
+ load = true;
+ cp_table_ref |= CP_LOADING;
+ }
+
+ /* alloc and load must be done outside the lock */
+ cp_lock_leave();
+
+ if (!load) {
+ yield();
+ } else if (alloc_and_load_cp_table(cp, codepage_table) < 0) {
+ cp = INIT_CODEPAGE; /* table may be clobbered now */
+ tid = cp_info[cp].tid;
+ }
+
+ cp_lock_enter();
+
+ if (load) {
+ loaded_cp_tid = tid;
+ cp_table_ref &= ~CP_LOADING;
+ }
+ }
+
+ cp_lock_leave();
while (count--) {
+ unsigned short ucs, tmp;
+
if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */
*utf8++ = *iso++;
else {
-
- /* cp tells us which codepage to convert from */
- switch (cp) {
- case ISO_8859_7: /* Greek */
- case WIN_1252: /* Western European */
- case WIN_1251: /* Cyrillic */
- case ISO_8859_9: /* Turkish */
- case ISO_8859_2: /* Latin Extended */
- case WIN_1250: /* Central European */
-#ifdef HAVE_LCD_BITMAP
- case ISO_8859_8: /* Hebrew */
- case ISO_8859_11: /* Thai */
- case WIN_1256: /* Arabic */
-#endif
+ /* tid tells us which table to use and how */
+ switch (tid) {
+ case CP_TID_ISO: /* Greek */
+ /* Hebrew */
+ /* Cyrillic */
+ /* Thai */
+ /* Arabic */
+ /* Turkish */
+ /* Latin Extended */
+ /* Central European */
+ /* Western European */
tmp = ((cp-1)*128) + (*iso++ - 128);
- ucs = codepage_table[tmp];
+ ucs = table[tmp];
break;
#ifdef HAVE_LCD_BITMAP
- case SJIS: /* Japanese */
+ case CP_TID_932: /* Japanese */
if (*iso > 0xA0 && *iso < 0xE0) {
tmp = *iso++ | (0xA100 - 0x8000);
- ucs = codepage_table[tmp];
+ ucs = table[tmp];
break;
}
- case GB_2312: /* Simplified Chinese */
- case KSX_1001: /* Korean */
- case BIG_5: /* Traditional Chinese */
+ case CP_TID_936: /* Simplified Chinese */
+ case CP_TID_949: /* Korean */
+ case CP_TID_950: /* Traditional Chinese */
if (count < 1 || !iso[1]) {
ucs = *iso++;
break;
@@ -256,7 +371,7 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
tmp = *iso++ << 8;
tmp |= *iso++;
tmp -= 0x8000;
- ucs = codepage_table[tmp];
+ ucs = table[tmp];
count--;
break;
#endif /* HAVE_LCD_BITMAP */
@@ -271,6 +386,17 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
utf8 = utf8encode(ucs, utf8);
}
}
+
+ if (table) {
+ cp_lock_enter();
+ if (table == codepage_table) {
+ cp_table_ref--;
+ } else {
+ default_cp_table_ref--;
+ }
+ cp_lock_leave();
+ }
+
return utf8;
}
@@ -288,7 +414,7 @@ unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8,
utf16 += 4;
count -= 2;
} else {
- ucs = (utf16[0] | (utf16[1] << 8));
+ ucs = getle16(utf16);
utf16 += 2;
count -= 1;
}
@@ -310,7 +436,7 @@ unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8,
utf16 += 4;
count -= 2;
} else {
- ucs = (utf16[0] << 8) | utf16[1];
+ ucs = getbe16(utf16);
utf16 += 2;
count -= 1;
}
@@ -400,8 +526,50 @@ const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs)
void set_codepage(int cp)
{
- default_codepage = cp;
- return;
+ if (cp < 0 || cp >= NUM_CODEPAGES)
+ cp = NUM_CODEPAGES;
+
+ /* load first then swap if load is successful, else just leave it; if
+ handle is 0 then we just free the current one; this won't happen often
+ thus we don't worry about reusing it and consequently avoid possible
+ clobbering of the existing one */
+
+ int handle = -1;
+ int tid = cp_info[cp].tid;
+
+ while (1) {
+ cp_lock_enter();
+
+ if (default_cp_tid == tid)
+ break;
+
+ if (handle >= 0 && default_cp_table_ref == 0) {
+ int hold = default_cp_handle;
+ default_cp_handle = handle;
+ handle = hold;
+ default_cp_tid = tid;
+ break;
+ }
+
+ /* alloc and load must be done outside the lock */
+ cp_lock_leave();
+
+ if (handle < 0 && (handle = alloc_and_load_cp_table(cp, NULL)) < 0)
+ return; /* OOM; change nothing */
+
+ yield();
+ }
+
+ default_cp = cp;
+ cp_lock_leave();
+
+ if (handle > 0)
+ core_free(handle);
+}
+
+int get_codepage(void)
+{
+ return default_cp;
}
/* seek to a given char in a utf8 string and
@@ -418,9 +586,16 @@ int utf8seek(const unsigned char* utf8, int offset)
return pos;
}
-const char* get_codepage_name(int cp)
+const char * get_codepage_name(int cp)
{
- if (cp < 0 || cp>= NUM_CODEPAGES)
- return name_codepages[NUM_CODEPAGES];
- return name_codepages[cp];
+ if (cp < 0 || cp >= NUM_CODEPAGES)
+ cp = NUM_CODEPAGES;
+ return cp_info[cp].name;
}
+
+#if 0 /* not needed just now */
+void unicode_init(void)
+{
+ cp_lock_init();
+}
+#endif