forked from hmgle/led_fan
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathencoding_convert.c
177 lines (161 loc) · 3.45 KB
/
encoding_convert.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#include "encoding_convert.h"
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#ifndef MAX_LINE
#define MAX_LINE 1024
#endif
#ifndef GB2312_MEM_SIZE
#define GB2312_MEM_SIZE (7580 * 4)
#endif
extern uint8_t MEM_GB2312_UNICODE[] asm("_binary_GB2312_start");
int
get_utf8_length(const uint8_t *src)
{
switch (*src) {
case 0x0 ... 0x7f:
return 1;
case 0xC0 ... 0xDF:
return 2;
case 0xE0 ... 0xEF:
return 3;
case 0xF0 ... 0xF7:
return 4;
default:
return -1;
}
}
static int
utf8tounicode(const uint8_t *src, uint8_t *dst)
{
int length;
uint8_t unicode[2] = {0}; /* 小端序 */
length = get_utf8_length(src);
if (length < 0)
return -1;
switch (length) {
case 1:
*dst = *src;
*(dst + 1) = 0;
return 1;
break;
case 2:
unicode[0] = *(src + 1) & 0x3f;
unicode[0] += (*src & 0x3) << 6;
unicode[1] = (*src & 0x7 << 2) >> 2;
break;
case 3:
unicode[0] = *(src + 2) & 0x3f;
unicode[0] += (*(src + 1) & 0x3) << 6;
unicode[1] = (*(src + 1) & 0xF << 2) >> 2;
unicode[1] += (*src & 0xf) << 4;
break;
case 4:
/* not support now */
return -1;
}
*dst = unicode[0];
*(dst + 1) = unicode[1];
return length;
}
static uint16_t
unicode_to_gb2312(uint16_t unicode, const uint16_t *mem_gb2312, int gb2312_num)
{
int i;
for (i = 0; i < gb2312_num; i++)
if (mem_gb2312[2 * i] == unicode)
return mem_gb2312[2 * i + 1];
assert(0);
exit(1);
}
static uint16_t *MEM_GB2312;
static int GB2312_NUM;
static inline uint8_t
hex_ch_to_val(char hex_ch)
{
if (hex_ch >= '0' && hex_ch <= '9')
return hex_ch - '0';
else if (hex_ch >= 'A' && hex_ch <= 'F')
return hex_ch - 'A' + 10;
else if (hex_ch >= 'a' && hex_ch <= 'f')
return hex_ch - 'a' + 10;
return -1;
}
/* 非线程安全 */
static char *
buf_getline(const char *from, char *to)
{
int ret;
static int start_flag = 1;
static const char *start;
if (start_flag) {
start_flag = 0;
start = from;
}
if (start == NULL)
return NULL;
ret = sscanf(start, "%[^\n]", to);
if (ret <= 0)
return NULL;
start = strchr(start, '\n');
if (start != NULL)
start++;
return to;
}
static uint16_t *
mem_gb2312(int *gb2312_num)
{
uint16_t *ptrmem;
char *ptrch;
char buf[MAX_LINE];
int i = 0;
ptrmem = malloc(GB2312_MEM_SIZE);
if (!ptrmem) {
perror("malloc");
exit(1);
}
memset(ptrmem, 0, GB2312_MEM_SIZE);
while (buf_getline((const char *)MEM_GB2312_UNICODE, buf) != NULL) {
if (strstr(buf, "/x") == NULL)
continue;
/* unicode */
ptrch = strchr(buf, 'U');
ptrch++;
*(ptrmem + i * 2) = hex_ch_to_val(ptrch[0]) * 0x1000
+ hex_ch_to_val(ptrch[1]) * 0x100
+ hex_ch_to_val(ptrch[2]) * 0x10
+ hex_ch_to_val(ptrch[3]);
/* gb2312 */
ptrch = strstr(ptrch, "/x");
ptrch += 2;
if (ptrch[2] != '/') { /* 单字节 */
*(ptrmem + i * 2 + 1) = hex_ch_to_val(ptrch[1])
+ hex_ch_to_val(ptrch[0])*0x10;
} else { /* 两个字节 */
*(ptrmem + i * 2 + 1) = hex_ch_to_val(ptrch[5]) * 0x100
+ hex_ch_to_val(ptrch[4])*0x1000
+ hex_ch_to_val(ptrch[1])
+ hex_ch_to_val(ptrch[0])*0x10;
}
i++;
} /* i should be 7573 */
*gb2312_num = i;
return ptrmem;
}
uint16_t
get_gb2312_by_utf8(const uint8_t *utf8)
{
uint8_t unicode[2] = {0};
int ret;
if (MEM_GB2312 == NULL) {
MEM_GB2312 = mem_gb2312(&GB2312_NUM);
if (MEM_GB2312 == NULL) {
fprintf(stderr, "mem_gb2312() failed!\n");
exit(1);
}
}
ret = utf8tounicode(utf8, unicode);
assert(ret > 0);
return unicode_to_gb2312(unicode[0] + unicode[1] * 0x100,
MEM_GB2312, GB2312_NUM);
}