本帖最后由 tomzbj 于 2018-2-16 15:38 编辑
需要在点阵LCD之类场合使用少量汉字时一般是做成小字库.
以前用纯c做过, 这次用python3重新实现了一遍, gb2312和utf-8编码的源文件都可以识别.
- import sys
- import chardet
- target = "AVR"
- def GetContent(path): # 读取文件内容
- with open(path, mode="rb") as f:
- return f.read()
- # main
- if __name__ == "__main__":
- if target == "AVR":
- suffix = "PROGMEM" # 如果是AVR, 需要用PROGMEM关键字来把字库放进flash. stm32因为是统一寻址, 只要加上const就足够了.
- else:
- suffix = ""
- if len(sys.argv) < 4:
- exit()
-
- ziku = GetContent(sys.argv[1]) # 把全部字库文件读到内存
- source = GetContent(sys.argv[2]) # 含有汉字字符串的源文件
- enc = chardet.detect(source)["encoding"] # 判断源文件编码
- if enc == "UTF-8-SIG": # 有些utf-8编码文件前面有3字节的签名, 要去掉
- source = source[3:]
- enc = chardet.detect(source)["encoding"].upper()
- source = source.decode(encoding=enc) # 至此获得了unicode的输入文件
- cstr = ""
- for c in source:
- if ord(c) > 127: # 把非ASCII的都挑出来, 假定都是需要的汉字
- cstr += c
- cstr = "".join(sorted(set(cstr))) # 去掉重复的汉字, 再排个序(否则顺序随机)
- cstr_gb2312 = cstr.encode(encoding="gb2312")
- cstr_utf8 = cstr.encode(encoding="utf-8") # 得到了gb2312和utf-8编码的源文件
- ofss = []
- icodes_gb2312 = []
- icodes_utf8 = []
- while len(cstr_gb2312) > 0:
- icodes_gb2312.append(int(cstr_gb2312[0]) * 256 + int(cstr_gb2312[1]))
- icodes_utf8.append(int(cstr_utf8[0]) * 65536 + int(cstr_utf8[1]) * 256 + int(cstr_utf8[2]))
- ofss.append(((int(cstr_gb2312[0]) - 0xa1) * 94 + (int(cstr_gb2312[1]) - 0xa1)) * 32) # 根据gb2312编码计算出在HZK16V文件中的偏移量, 读出字模
- cstr_gb2312 = cstr_gb2312[2:]
- cstr_utf8 = cstr_utf8[3:]
- if target == "AVR":
- output = "#include <avr/pgmspace.h>\n\n" # 如果是AVR需要加上这个头文件, 否则不认得PROGMEM关键字
- else:
- output = ""
- output += "const int cfont_num = %d;\n" % len(cstr)
- output += "const char cfont_source_encoding[] = "" + enc + "";\n"
- output += "const unsigned short cfont_icodes_gb2312[] %s = {\n " % suffix
- n = 0
- for icode in icodes_gb2312:
- output += "0x%04x, " % icode
- n += 8
- if n > 66:
- n = 0
- output += "\n " # 处理折行
- if n != 0:
- output = output[:-2]
- else:
- output = output[:-11]
- output += "\n};\n"
- output += "const unsigned long cfont_icodes_utf8[] %s = {\n " % suffix
- n = 0
- for icode in icodes_utf8:
- output += "0x%06x, " % icode
- n += 10
- if n > 62:
- n = 0
- output += "\n "
- if n != 0:
- output = output[:-2]
- else:
- output = output[:-11]
- output += "\n};\n"
- output += "const unsigned char cfont_mask[] %s = {\n " % suffix
- n = 0
- for ofs in ofss:
- mask = ziku[ofs:ofs + 32]
- for i in mask:
- output += "0x%02x, " % i
- n += 6
- if n > 66:
- n = 0
- output += "\n "
- if n != 0:
- output = output[:-2]
- else:
- output = output[:-11]
- output += "\n};"
- try:
- f = open(sys.argv[3], "w") # 写入目标文件
- except:
- print("err 3")
- exit()
- f.write(output)
- f.close()
复制代码
使用方法: font_gen.py HZK16V cstring.c cfont.c
其中HZK16V是16点阵/纵向取模的字库文件, 见附件:
把所有需要用到的汉字字符串常量放进cstring.c, 比如:
- unsigned char STRING_1[] = "浔阳江头夜送客";
- unsigned char STRING_2[] = "枫叶荻花秋瑟瑟";
复制代码
cfont.c是输出文件名, 上面例子的效果:
- #include <avr/pgmspace.h>
- const int cfont_num = 13;
- const char cfont_source_encoding[] = "UTF-8";
- const unsigned short cfont_icodes_gb2312[] PROGMEM = {
- 0xd2b6, 0xd2b9, 0xcdb7, 0xbfcd, 0xb7e3, 0xbdad, 0xe4b1, 0xc9aa, 0xc7ef,
- 0xbba8, 0xddb6, 0xcbcd, 0xd1f4
- };
- const unsigned long cfont_icodes_utf8[] PROGMEM = {
- 0xe58fb6, 0xe5a49c, 0xe5a4b4, 0xe5aea2, 0xe69eab, 0xe6b19f, 0xe6b594,
- 0xe7919f, 0xe7a78b, 0xe88ab1, 0xe88dbb, 0xe98081, 0xe998b3
- };
- const unsigned char cfont_mask[] PROGMEM = {
- 0x00, 0x00, 0xfc, 0x1f, 0x04, 0x08, 0x04, 0x08, 0xfe, 0x1f, 0x44, 0x00,
- 0x40, 0x00, 0x40, 0x00, 0x40, 0x00, 0xff, 0xff, 0x40, 0x00, 0x40, 0x00,
- 0x40, 0x00, 0x60, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x02, 0x04, 0x01,
- 0x84, 0x00, 0xe4, 0xff, 0x1c, 0x82, 0x04, 0x81, 0x85, 0x41, 0x46, 0x22,
- 0xbc, 0x14, 0x24, 0x09, 0x24, 0x14, 0x24, 0x23, 0xe4, 0x40, 0x06, 0xc0,
- 0x04, 0x40, 0x00, 0x00, 0x00, 0x01, 0x00, 0x81, 0x10, 0x81, 0x20, 0x41,
- 0x64, 0x41, 0x08, 0x21, 0x18, 0x11, 0x00, 0x0d, 0xff, 0x03, 0x00, 0x09,
- 0x00, 0x09, 0x00, 0x11, 0x00, 0x61, 0x80, 0xc1, 0x00, 0x01, 0x00, 0x00,
- 0x10, 0x04, 0x0c, 0x04, 0x84, 0x02, 0x44, 0x02, 0x3c, 0xfd, 0x54, 0x45,
- 0x95, 0x44, 0x96, 0x44, 0x94, 0x44, 0x54, 0x45, 0x34, 0xfd, 0x14, 0x02,
- 0x04, 0x02, 0x14, 0x06, 0x0c, 0x02, 0x00, 0x00, 0x10, 0x04, 0x10, 0x03,
- 0xd0, 0x00, 0xff, 0xff, 0x50, 0x80, 0x90, 0x60, 0xfc, 0x1f, 0x44, 0x08,
- 0x84, 0x04, 0x04, 0x03, 0x84, 0x04, 0x44, 0x08, 0xfe, 0x7f, 0x04, 0x80,
- 0x00, 0xe0, 0x00, 0x00, 0x10, 0x04, 0x21, 0x04, 0x62, 0xfe, 0x06, 0x01,
- 0x80, 0x20, 0x04, 0x20, 0x04, 0x20, 0x04, 0x20, 0x04, 0x20, 0xfc, 0x3f,
- 0x04, 0x20, 0x04, 0x20, 0x06, 0x20, 0x04, 0x30, 0x00, 0x20, 0x00, 0x00,
- 0x10, 0x04, 0x62, 0x04, 0x04, 0xfe, 0x8c, 0x01, 0x60, 0x02, 0x02, 0x02,
- 0x92, 0x06, 0x92, 0x1a, 0x92, 0x02, 0x92, 0x42, 0x92, 0x82, 0x92, 0x7f,
- 0xff, 0x02, 0x02, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x82, 0x98,
- 0x92, 0x80, 0x92, 0x7c, 0xfe, 0xc0, 0x93, 0xa0, 0x92, 0xa1, 0x00, 0x96,
- 0x92, 0x88, 0x92, 0x84, 0xfe, 0x82, 0x92, 0x80, 0xd3, 0xe0, 0x82, 0x08,
- 0x00, 0x30, 0x00, 0x00, 0x24, 0x08, 0x24, 0x06, 0xa4, 0x01, 0xfe, 0xff,
- 0x23, 0x81, 0x22, 0x42, 0x80, 0x20, 0x70, 0x18, 0x00, 0x06, 0xff, 0x01,
- 0x00, 0x06, 0x40, 0x08, 0x20, 0x30, 0x30, 0x60, 0x00, 0x20, 0x00, 0x00,
- 0x04, 0x04, 0x04, 0x02, 0x04, 0x01, 0xc4, 0xff, 0x3f, 0x00, 0x04, 0x20,
- 0x04, 0x10, 0x04, 0x08, 0xe4, 0x3f, 0x04, 0x42, 0x1f, 0x41, 0x84, 0x40,
- 0xc4, 0x40, 0x06, 0x40, 0x04, 0x70, 0x00, 0x00, 0x04, 0x10, 0x04, 0x11,
- 0x14, 0x49, 0xa4, 0x84, 0x44, 0x42, 0xaf, 0x3f, 0x04, 0x80, 0x04, 0x42,
- 0xc4, 0x31, 0x0f, 0x0c, 0xf4, 0x03, 0x04, 0x1c, 0x04, 0x61, 0xc6, 0xc0,
- 0x04, 0x40, 0x00, 0x00, 0x40, 0x00, 0x42, 0x40, 0x44, 0x20, 0xcc, 0x1f,
- 0x80, 0x20, 0x88, 0x40, 0x89, 0xa0, 0x8a, 0x90, 0x8c, 0x8c, 0xf8, 0x83,
- 0x8c, 0x88, 0x8a, 0x90, 0x89, 0xb0, 0xc8, 0xc0, 0x80, 0x40, 0x00, 0x00,
- 0x00, 0x00, 0xfe, 0xff, 0x02, 0x08, 0x22, 0x10, 0xda, 0x08, 0x06, 0x07,
- 0x00, 0x00, 0xfe, 0xff, 0x82, 0x40, 0x82, 0x40, 0x82, 0x40, 0x82, 0x40,
- 0x82, 0x40, 0xff, 0xff, 0x02, 0x00, 0x00, 0x00
- };
复制代码
在程序中调用时先判断cfont_source_encoding是"GB2312"还是"UTF-8". 如果是前者, 则依次在cfont_icodes_gb2312[]查找需要显示的汉字的内码, 找到序号号从cfont_mask[]里读取相应的字模, 之后源字符串指针+=2. 如果是"UTF-8"则在cfont_icodes_utf8[]里查找, 之后源字符串指针要+=3.
把这个py文件加入到工程中, 编译前先执行, 这样就实现了修改cstring.c里的汉字字符串常量之后自动更新cfont.c里的字库.
每个汉字需要32+2+4=38字节的存储空间. 如果不需要自动判断源程序的编码, 还可以去掉cfont_icodes_gb2312和cfont_icodes_utf8其中之一, 可以再节约几个字节.
这里没有把内码和字模按内码排序, 查找时需要逐个比较, 效率比较低. 如果需要显示的汉字稍微多一些, 可以把内码和字模按其中一种内码排序, 读取时可以用二分查找, 速度就快多了. 不过不知道GB2312和UTF-8的汉字排列顺序是不是一致, 如果不一致的话就不容易实现自动兼容两者了.
如果需要显示的汉字很多... 还是外挂一片spi flash把整个字库存进去比较好. ps. GT23/GT30系列的字库IC, 其实就是一片GD25Q16. |