summaryrefslogtreecommitdiff
path: root/src/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/utf8.c')
-rw-r--r--src/utf8.c87
1 files changed, 87 insertions, 0 deletions
diff --git a/src/utf8.c b/src/utf8.c
new file mode 100644
index 000000000..aa427e99b
--- /dev/null
+++ b/src/utf8.c
@@ -0,0 +1,87 @@
+#include "utf8.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+unsigned char * asciiToUtf8(unsigned char c) {
+ static unsigned char utf8[3];
+
+ memset(utf8,0,3);
+
+ if(c < 128) utf8[0] = c;
+ else if(c<192) {
+ utf8[0] = 194;
+ utf8[1] = c;
+ }
+ else {
+ utf8[0] = 195;
+ utf8[1] = c-64;
+ }
+
+ return utf8;
+}
+
+unsigned char * asciiStrToUtf8Dup(unsigned char * ascii) {
+ /* utf8 should have at most two char's per ascii char */
+ int len = strlen(ascii)*2+1;
+ unsigned char * ret = malloc(len);
+ unsigned char * cp = ret;
+ unsigned char * utf8;
+
+ memset(ret,0,len);
+
+ len = 0;
+
+ while(*ascii) {
+ utf8 = asciiToUtf8(*ascii);
+ while(*utf8) {
+ *(cp++) = *(utf8++);
+ len++;
+ }
+ ascii++;
+ }
+
+ return realloc(ret,len+1);
+}
+
+unsigned char utf8ToAscii(unsigned char * utf8) {
+ unsigned char c = 0;
+
+ if(utf8[0]<128) return utf8[0];
+ else if(utf8[0]==195) c+=64;
+ else if(utf8[0]!=194) return '?';
+ return c+utf8[1];
+}
+
+int validateUtf8Char(unsigned char * utf8Char) {
+ if(utf8Char[0]<0x80) return 1;
+
+ if(utf8Char[0]>=0xC0 && utf8Char[0]<=0xFD) {
+ int count = 1;
+ unsigned char t = 0x20;
+ int i;
+ while(count < 6 && (t & utf8Char[0])) {
+ t = (t >> 1);
+ count++;
+ }
+ if(count > 5) return 0;
+ for(i=1;i<=count;i++) {
+ if(utf8Char[i] < 0x80 || utf8Char[i] > 0xBF) return 0;
+ }
+ return count;
+ }
+ else return 0;
+}
+
+int validUtf8String(unsigned char * string) {
+ int ret;
+
+ while(*string) {
+ ret = validateUtf8Char(string);
+ if(!ret) return 0;
+ string+= ret;
+ }
+
+ return 1;
+}