写了个自动制作小字库的程序

tomzbj · 发表于 2018-2-16 00:45:22

本帖最后由 tomzbj 于 2018-2-16 15:38 编辑

需要在点阵LCD之类场合使用少量汉字时一般是做成小字库.
以前用纯c做过, 这次用python3重新实现了一遍, gb2312和utf-8编码的源文件都可以识别.

import sys
import chardet
target = "AVR"
def GetContent(path): # 读取文件内容
with open(path, mode="rb") as f:
return f.read()
# main
if __name__ == "__main__":
if target == "AVR":
suffix = "PROGMEM" # 如果是AVR, 需要用PROGMEM关键字来把字库放进flash. stm32因为是统一寻址, 只要加上const就足够了.
else:
suffix = ""
if len(sys.argv) < 4:
exit()
ziku = GetContent(sys.argv[1]) # 把全部字库文件读到内存
source = GetContent(sys.argv[2]) # 含有汉字字符串的源文件
enc = chardet.detect(source)["encoding"] # 判断源文件编码
if enc == "UTF-8-SIG": # 有些utf-8编码文件前面有3字节的签名, 要去掉
source = source[3:]
enc = chardet.detect(source)["encoding"].upper()
source = source.decode(encoding=enc) # 至此获得了unicode的输入文件
cstr = ""
for c in source:
if ord(c) > 127: # 把非ASCII的都挑出来, 假定都是需要的汉字
cstr += c
cstr = "".join(sorted(set(cstr))) # 去掉重复的汉字, 再排个序(否则顺序随机)
cstr_gb2312 = cstr.encode(encoding="gb2312")
cstr_utf8 = cstr.encode(encoding="utf-8") # 得到了gb2312和utf-8编码的源文件
ofss = []
icodes_gb2312 = []
icodes_utf8 = []
while len(cstr_gb2312) > 0:
icodes_gb2312.append(int(cstr_gb2312[0]) * 256 + int(cstr_gb2312[1]))
icodes_utf8.append(int(cstr_utf8[0]) * 65536 + int(cstr_utf8[1]) * 256 + int(cstr_utf8[2]))
ofss.append(((int(cstr_gb2312[0]) - 0xa1) * 94 + (int(cstr_gb2312[1]) - 0xa1)) * 32) # 根据gb2312编码计算出在HZK16V文件中的偏移量, 读出字模
cstr_gb2312 = cstr_gb2312[2:]
cstr_utf8 = cstr_utf8[3:]
if target == "AVR":
output = "#include <avr/pgmspace.h>\n\n" # 如果是AVR需要加上这个头文件, 否则不认得PROGMEM关键字
else:
output = ""
output += "const int cfont_num = %d;\n" % len(cstr)
output += "const char cfont_source_encoding[] = "" + enc + "";\n"
output += "const unsigned short cfont_icodes_gb2312[] %s = {\n " % suffix
n = 0
for icode in icodes_gb2312:
output += "0x%04x, " % icode
n += 8
if n > 66:
n = 0
output += "\n " # 处理折行
if n != 0:
output = output[:-2]
else:
output = output[:-11]
output += "\n};\n"
output += "const unsigned long cfont_icodes_utf8[] %s = {\n " % suffix
n = 0
for icode in icodes_utf8:
output += "0x%06x, " % icode
n += 10
if n > 62:
n = 0
output += "\n "
if n != 0:
output = output[:-2]
else:
output = output[:-11]
output += "\n};\n"
output += "const unsigned char cfont_mask[] %s = {\n " % suffix
n = 0
for ofs in ofss:
mask = ziku[ofs:ofs + 32]
for i in mask:
output += "0x%02x, " % i
n += 6
if n > 66:
n = 0
output += "\n "
if n != 0:
output = output[:-2]
else:
output = output[:-11]
output += "\n};"
try:
f = open(sys.argv[3], "w") # 写入目标文件
except:
print("err 3")
exit()
f.write(output)
f.close()

复制代码

使用方法: font_gen.py HZK16V cstring.c cfont.c
其中HZK16V是16点阵/纵向取模的字库文件, 见附件:
把所有需要用到的汉字字符串常量放进cstring.c, 比如:

unsigned char STRING_1[] = "浔阳江头夜送客";
unsigned char STRING_2[] = "枫叶荻花秋瑟瑟";

复制代码

cfont.c是输出文件名, 上面例子的效果:

#include <avr/pgmspace.h>
const int cfont_num = 13;
const char cfont_source_encoding[] = "UTF-8";
const unsigned short cfont_icodes_gb2312[] PROGMEM = {
0xd2b6, 0xd2b9, 0xcdb7, 0xbfcd, 0xb7e3, 0xbdad, 0xe4b1, 0xc9aa, 0xc7ef,
0xbba8, 0xddb6, 0xcbcd, 0xd1f4
};
const unsigned long cfont_icodes_utf8[] PROGMEM = {
0xe58fb6, 0xe5a49c, 0xe5a4b4, 0xe5aea2, 0xe69eab, 0xe6b19f, 0xe6b594,
0xe7919f, 0xe7a78b, 0xe88ab1, 0xe88dbb, 0xe98081, 0xe998b3
};
const unsigned char cfont_mask[] PROGMEM = {
0x00, 0x00, 0xfc, 0x1f, 0x04, 0x08, 0x04, 0x08, 0xfe, 0x1f, 0x44, 0x00,
0x40, 0x00, 0x40, 0x00, 0x40, 0x00, 0xff, 0xff, 0x40, 0x00, 0x40, 0x00,
0x40, 0x00, 0x60, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x02, 0x04, 0x01,
0x84, 0x00, 0xe4, 0xff, 0x1c, 0x82, 0x04, 0x81, 0x85, 0x41, 0x46, 0x22,
0xbc, 0x14, 0x24, 0x09, 0x24, 0x14, 0x24, 0x23, 0xe4, 0x40, 0x06, 0xc0,
0x04, 0x40, 0x00, 0x00, 0x00, 0x01, 0x00, 0x81, 0x10, 0x81, 0x20, 0x41,
0x64, 0x41, 0x08, 0x21, 0x18, 0x11, 0x00, 0x0d, 0xff, 0x03, 0x00, 0x09,
0x00, 0x09, 0x00, 0x11, 0x00, 0x61, 0x80, 0xc1, 0x00, 0x01, 0x00, 0x00,
0x10, 0x04, 0x0c, 0x04, 0x84, 0x02, 0x44, 0x02, 0x3c, 0xfd, 0x54, 0x45,
0x95, 0x44, 0x96, 0x44, 0x94, 0x44, 0x54, 0x45, 0x34, 0xfd, 0x14, 0x02,
0x04, 0x02, 0x14, 0x06, 0x0c, 0x02, 0x00, 0x00, 0x10, 0x04, 0x10, 0x03,
0xd0, 0x00, 0xff, 0xff, 0x50, 0x80, 0x90, 0x60, 0xfc, 0x1f, 0x44, 0x08,
0x84, 0x04, 0x04, 0x03, 0x84, 0x04, 0x44, 0x08, 0xfe, 0x7f, 0x04, 0x80,
0x00, 0xe0, 0x00, 0x00, 0x10, 0x04, 0x21, 0x04, 0x62, 0xfe, 0x06, 0x01,
0x80, 0x20, 0x04, 0x20, 0x04, 0x20, 0x04, 0x20, 0x04, 0x20, 0xfc, 0x3f,
0x04, 0x20, 0x04, 0x20, 0x06, 0x20, 0x04, 0x30, 0x00, 0x20, 0x00, 0x00,
0x10, 0x04, 0x62, 0x04, 0x04, 0xfe, 0x8c, 0x01, 0x60, 0x02, 0x02, 0x02,
0x92, 0x06, 0x92, 0x1a, 0x92, 0x02, 0x92, 0x42, 0x92, 0x82, 0x92, 0x7f,
0xff, 0x02, 0x02, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x82, 0x98,
0x92, 0x80, 0x92, 0x7c, 0xfe, 0xc0, 0x93, 0xa0, 0x92, 0xa1, 0x00, 0x96,
0x92, 0x88, 0x92, 0x84, 0xfe, 0x82, 0x92, 0x80, 0xd3, 0xe0, 0x82, 0x08,
0x00, 0x30, 0x00, 0x00, 0x24, 0x08, 0x24, 0x06, 0xa4, 0x01, 0xfe, 0xff,
0x23, 0x81, 0x22, 0x42, 0x80, 0x20, 0x70, 0x18, 0x00, 0x06, 0xff, 0x01,
0x00, 0x06, 0x40, 0x08, 0x20, 0x30, 0x30, 0x60, 0x00, 0x20, 0x00, 0x00,
0x04, 0x04, 0x04, 0x02, 0x04, 0x01, 0xc4, 0xff, 0x3f, 0x00, 0x04, 0x20,
0x04, 0x10, 0x04, 0x08, 0xe4, 0x3f, 0x04, 0x42, 0x1f, 0x41, 0x84, 0x40,
0xc4, 0x40, 0x06, 0x40, 0x04, 0x70, 0x00, 0x00, 0x04, 0x10, 0x04, 0x11,
0x14, 0x49, 0xa4, 0x84, 0x44, 0x42, 0xaf, 0x3f, 0x04, 0x80, 0x04, 0x42,
0xc4, 0x31, 0x0f, 0x0c, 0xf4, 0x03, 0x04, 0x1c, 0x04, 0x61, 0xc6, 0xc0,
0x04, 0x40, 0x00, 0x00, 0x40, 0x00, 0x42, 0x40, 0x44, 0x20, 0xcc, 0x1f,
0x80, 0x20, 0x88, 0x40, 0x89, 0xa0, 0x8a, 0x90, 0x8c, 0x8c, 0xf8, 0x83,
0x8c, 0x88, 0x8a, 0x90, 0x89, 0xb0, 0xc8, 0xc0, 0x80, 0x40, 0x00, 0x00,
0x00, 0x00, 0xfe, 0xff, 0x02, 0x08, 0x22, 0x10, 0xda, 0x08, 0x06, 0x07,
0x00, 0x00, 0xfe, 0xff, 0x82, 0x40, 0x82, 0x40, 0x82, 0x40, 0x82, 0x40,
0x82, 0x40, 0xff, 0xff, 0x02, 0x00, 0x00, 0x00
};

复制代码

在程序中调用时先判断cfont_source_encoding是"GB2312"还是"UTF-8". 如果是前者, 则依次在cfont_icodes_gb2312[]查找需要显示的汉字的内码, 找到序号号从cfont_mask[]里读取相应的字模, 之后源字符串指针+=2. 如果是"UTF-8"则在cfont_icodes_utf8[]里查找, 之后源字符串指针要+=3.

把这个py文件加入到工程中, 编译前先执行, 这样就实现了修改cstring.c里的汉字字符串常量之后自动更新cfont.c里的字库.
每个汉字需要32+2+4=38字节的存储空间. 如果不需要自动判断源程序的编码, 还可以去掉cfont_icodes_gb2312和cfont_icodes_utf8其中之一, 可以再节约几个字节.

这里没有把内码和字模按内码排序, 查找时需要逐个比较, 效率比较低. 如果需要显示的汉字稍微多一些, 可以把内码和字模按其中一种内码排序, 读取时可以用二分查找, 速度就快多了. 不过不知道GB2312和UTF-8的汉字排列顺序是不是一致, 如果不一致的话就不容易实现自动兼容两者了.

如果需要显示的汉字很多... 还是外挂一片spi flash把整个字库存进去比较好. ps. GT23/GT30系列的字库IC, 其实就是一片GD25Q16.

6623A · 发表于 2018-2-16 01:08:43

新年第一个技术帖

ourdemo · 发表于 2018-2-16 01:25:35

向你致敬

新年快乐，祝大家新春快乐！财运旺旺!

我是一个大白菜 · 发表于 2018-2-16 09:18:45

新年快乐，祝大家万事如意，财运连连！

huangqi412 · 发表于 2018-2-16 10:38:21

。。。凌晨写帖

duxingkei · 发表于 2018-2-16 11:22:28

新年快乐，我也干过类似的事情

cu_ice · 发表于 2018-2-16 11:38:44

为新年都在做技术工作的点个赞，我今天闲时也在看PDF

wei669 · 发表于 2018-2-16 11:56:17

佩服楼主

Excellence · 发表于 2018-2-16 18:44:36

新年快乐！新帖子。

gzhua20088ssj · 发表于 2018-2-16 18:55:05

新年快乐！取模软件类的吧

亲爱的混蛋 · 发表于 2018-2-18 18:10:48

感谢楼主开源

写了个自动制作小字库的程序

本帖子中包含更多资源

阿莫论坛20周年了！感谢大家的支持与爱护！！