js解析emoji表情
Emoji
公司的产品之前只有网页端,并没有提供emoji表情,之后将某个模块整合到app中,里面有个评论功能,在手机端可以输入emoji,显示的时候是空白,说明数据库并没有存储成功,查阅资料后得知emoji是四个字节,而mysql5.5.3前的版本utf8编码最多只支持3个字节。
js解析emoji
先需要了解几个概念,js的编码方式、utf16、unicode
1.JavaScript语言采用Unicode字符集,但是只支持一种编码方法ucs-2
2.utf16编码
utf16是ucs-2的超集
3.Unicode只规定了每个字符的码点,到底用什么样的字节序表示这个码点,就涉及到编码方法
由于JavaScript只能处理UCS-2编码,造成所有字符在这门语言中都是2个字节,如果是4个字节的字符,会当作两个双字节的字符处理。JavaScript的字符函数都受到这一点的影响,无法返回正确结果
emoji表情是由utf16编码的,可能是2个字节,也可能是四个字节
这里的解析我用的是twemoji库,原理是将utf16编码转为unicode的十六机制并以此十六进制作为emoji图片的命名
这里的关键是如何将utf16转为unicode十六进制
UTF-16的转码公式
将unicode转为utf16,官方给了公式
Unicode码点转成UTF-16的时候,首先区分这是基本平面字符(2字节),还是辅助平面字符(4字节)。如果是前者,直接将码点转为对应的十六进制形式,长度为两字节。
如果是辅助平面字符,Unicode 3.0版给出了转码公式。
H= Math.floor((c-0x10000)/0x400)+0xD800 //高位
L = (c-0x10000)%0x400+0xDC00 //低位
将utf16转为unicode则是知道H,L,求c,学过方程组的应该都会解答吧
给出上述转化的函数
1 /*unicode编码范围 2字节0x0000-0xffff 2 四字节为0x010000-0x10ffff 3 U+D800到U+DFFF 为空段 4 由于JavaScript只能处理UCS-2编码,造成所有字符在这门语言中都是2个字节,如果是4个字节的字符,会当作两个双字节的字符处理 5 */ 6 function toCodePoint(unicodeSurrogates, sep) { 7 var 8 r = [], 9 c = 0, 10 p = 0, 11 i = 0; 12 while (i < unicodeSurrogates.length) { 13 c = unicodeSurrogates.charCodeAt(i++);//返回位置的字符的 Unicode 编码 14 15 if (p) { 16 r.push((0x10000 + ((p - 0xD800) << 10) + (c - 0xDC00)).toString(16)); //计算4字节的unicode 17 p = 0; 18 } else if (0xD800 <= c && c <= 0xDBFF) { 19 p = c; //如果unicode编码在oxD800-0xDBff之间,则需要与后一个字符放在一起 20 } else { 21 r.push(c.toString(16)); //如果是2字节,直接将码点转为对应的十六进制形式 22 } 23 } 24 return r.join(sep || ‘-‘); 25 }
emojipicker
页面上选择emoji表情,插入input,发送给后端时需要转为utf16
这里我用的库是jquery-emoji-picker,这里遇到一个问题,它的css中背景图片是datauri,我又需要兼容ie6,我需要将它的样式文件重写,并将图片保存起来。如果数量少,我会手动改下,结果一看,855个,果断写脚本
1 <?php 2 function formatData($str){ 3 $data=array(); 4 $reg=‘/^\.emoji-([^\{]+)\s+\{background-size:100% !important; background-image: url\(\‘(.+)\‘\);\}/‘; //匹配样式 5 preg_match($reg, $str, $matches); 6 $data=array(‘filename‘=>$matches[1].‘.png‘,‘base64‘=>substr($matches[2],22),‘name‘=>$matches[1]); 7 return $data; 8 } 9 function basetopng($base64,$filename){ 10 $str=base64_decode($base64); 11 file_put_contents(‘images/‘.$filename, $str); 12 } 13 $css_file = ‘emojipicker.css‘; 14 $start = 0; // 从第0行开始读取 15 $num = 855; // 读取855行 16 $data = array(); 17 $str=‘‘;//生成css文件 18 $spl_object = new SplFileObject($css_file, ‘rb‘); 19 $spl_object->seek($start); 20 while ($num-- && !$spl_object->eof()) { 21 $tmp = $spl_object->fgets(); 22 $tmpData=formatData($tmp); 23 $filepath=‘../images/‘.$tmpData[‘filename‘]; 24 $str.=".emoji-{$tmpData[‘name‘]} { background-size:100% !important; background-image: url(‘/Public/plugin/emojipicker/images/{$tmpData[‘filename‘]}‘);}\n"; 25 $spl_object->next(); 26 } 27 file_put_contents(‘emojipicker.ff.css‘, $str); 28 29 ?>
点击icon获得emoji的name,将name转化为‘<微笑>’字样插入input,提交给后台的时候再将‘<微笑>‘字样转化为utf16(先转化为unicode,在转化为utf16)
我又跑去微信界面盗了点资源过来,领导说做成微信类似就好了,原来的jquery.emojipicker.a.js中的数据结构是
{
"name": "sunny",
"unicode": "2600",
"shortcode": "sunny",
"description": "BLACK SUN WITH RAYS",
"category": "thing"
}
我需要给它加点东西,变成这样
{
"name": "sunny",
"unicode": "2600",
"shortcode": "sunny",
"desc": "<太阳>",
"title": "太阳",
"category": "thing"
}
而微信的数据结构是这样 {"<太阳>" : "2600"},应该怎么对应,unicode与wx的value相等,以这个为基准
1 var a=[],//需要的微信表情unicode 2 wxemojis=window.gQQFaceMap, 3 b=[];//格式化数据,添加desc,title 4 for(var i in wxemojis){ 5 if(wxemojis[i].length>3){ 6 if(a.indexOf(wxemojis[i]) < 0){ 7 a[a.length]=wxemojis[i]; 8 b[b.length]={ 9 ‘unicode‘:wxemojis[i], 10 ‘desc‘:i, 11 ‘title‘:i.replace(/(<|>)/g,‘‘) 12 }; 13 } 14 } 15 } 16 var myEmojis=[];//我需要的表情数组 17 for(var i in emojis){ 18 var tmpIndex=a.indexOf(emojis[i].unicode.toLowerCase()); 19 if(tmpIndex > -1){ 20 myEmojis[myEmojis.length]={ 21 "name":emojis[i].name, 22 "unicode":emojis[i].unicode, 23 "shortcode":emojis[i].shortcode, 24 "desc":b[tmpIndex].desc, 25 "title":b[tmpIndex].title, 26 "category":emojis[i].category 27 } 28 } 29 } 30 console.log(JSON.stringify(myEmojis));
生成了自己的表情数组。
将中文字样转为utf16,传给后端
1 function toUnicode(code) { 2 var codes = code.split(‘-‘).map(function(value, index) { 3 return parseInt(value, 16); 4 }); 5 return String.fromCodePoint.apply(null, codes); 6 } 7 8 if (!String.fromCodePoint) { 9 // ES6 Unicode Shims 0.1 , © 2012 Steven Levithan http://slevithan.com/ , MIT License 10 String.fromCodePoint = function fromCodePoint () { 11 var chars = [], point, offset, units, i; 12 for (i = 0; i < arguments.length; ++i) { 13 point = arguments[i]; 14 offset = point - 0x10000; 15 units = point > 0xFFFF ? [0xD800 + (offset >> 10), 0xDC00 + (offset & 0x3FF)] : [point]; 16 chars.push(String.fromCharCode.apply(null, units)); 17 } 18 return chars.join(""); 19 } 20 } 21 function htmlEncode(a) { 22 return a && a.replace ? a.replace(/&/g, "&").replace(/\"/g, """).replace(/</g, "<").replace(/>/g, ">").replace(/\‘/g, "'") : a 23 } 24 function afterEncodeEmoji(str){ 25 var faceMap={"<笑脸>":"1f604","<开心>":"1f60a","<大笑>":"1f603","<热情>":"263a","<眨眼>":"1f609","<色>":"1f60d","<接吻>":"1f618","<亲吻>":"1f61a","<脸红>":"1f633","<露齿笑>":"1f63c","<满意>":"1f60c","<戏弄>":"1f61c","<吐舌>":"1f445","<无语>":"1f612","<得意>":"1f60f","<汗>":"1f613","<失望>":"1f640","<低落>":"1f61e","<呸>":"1f616","<焦虑>":"1f625","<担心>":"1f630","<震惊>":"1f628","<悔恨>":"1f62b","<眼泪>":"1f622","<哭>":"1f62d","<破涕为笑>":"1f602","<晕>":"1f632","<恐惧>":"1f631","<心烦>":"1f620","<生气>":"1f63e","<睡觉>":"1f62a","<生病>":"1f637","<恶魔>":"1f47f","<外星人>":"1f47d","<心>":"2764","<心碎>":"1f494","<丘比特>":"1f498","<闪烁>":"2728","<星星>":"1f31f","<叹号>":"2755","<问号>":"2754","<睡着>":"1f4a4","<水滴>":"1f4a6","<音乐>":"1f3b5","<火>":"1f525","<便便>":"1f4a9","<强>":"1f44d","<弱>":"1f44e","<拳头>":"1f44a","<胜利>":"270c","<上>":"1f446","<下>":"1f447","<右>":"1f449","<左>":"1f448","<第一>":"261d","<强壮>":"1f4aa","<吻>":"1f48f","<热恋>":"1f491","<男孩>":"1f466","<女孩>":"1f467","<女士>":"1f469","<男士>":"1f468","<天使>":"1f47c","<骷髅>":"1f480","<红唇>":"1f48b","<太阳>":"2600","<下雨>":"2614","<多云>":"2601","<雪人>":"26c4","<月亮>":"1f319","<闪电>":"26a1","<海浪>":"1f30a","<猫>":"1f431","<小狗>":"1f429","<老鼠>":"1f42d","<仓鼠>":"1f439","<兔子>":"1f430","<狗>":"1f43a","<青蛙>":"1f438","<老虎>":"1f42f","<考拉>":"1f428","<熊>":"1f43b","<猪>":"1f437","<牛>":"1f42e","<野猪>":"1f417","<猴子>":"1f435","<马>":"1f434","<蛇>":"1f40d","<鸽子>":"1f426","<鸡>":"1f414","<企鹅>":"1f427","<毛虫>":"1f41b","<章鱼>":"1f419","<鱼>":"1f420","<鲸鱼>":"1f433","<海豚>":"1f42c","<玫瑰>":"1f339","<花>":"1f33a","<棕榈树>":"1f334","<仙人掌>":"1f335","<礼盒>":"1f49d","<南瓜灯>":"1f383","<鬼魂>":"1f47b","<圣诞老人>":"1f385","<圣诞树>":"1f384","<礼物>":"1f381","<铃>":"1f514","<庆祝>":"1f389","<气球>":"1f388","<CD>":"1f4bf","<相机>":"1f4f7","<录像机>":"1f3a5","<电脑>":"1f4bb","<电视>":"1f4fa","<电话>":"1f4de","<解锁>":"1f513","<锁>":"1f512","<钥匙>":"1f511","<成交>":"1f528","<灯泡>":"1f4a1","<邮箱>":"1f4eb","<浴缸>":"1f6c0","<钱>":"1f4b2","<药丸>":"1f48a","<橄榄球>":"1f3c8","<篮球>":"1f3c0","<足球>":"26bd","<棒球>":"26be","<高尔夫>":"26f3","<奖杯>":"1f3c6","<入侵者>":"1f47e","<唱歌>":"1f3a4","<吉他>":"1f3b8","<比基尼>":"1f459","<皇冠>":"1f451","<雨伞>":"1f302","<手提包>":"1f45c","<口红>":"1f484","<戒指>":"1f48d","<钻石>":"1f48e","<咖啡>":"2615","<啤酒>":"1f37a","<干杯>":"1f37b","<鸡尾酒>":"1f377","<汉堡>":"1f354","<薯条>":"1f35f","<意面>":"1f35d","<寿司>":"1f363","<面条>":"1f35c","<煎蛋>":"1f373","<冰激凌>":"1f366","<蛋糕>":"1f382","<苹果>":"1f34f","<飞机>":"2708","<火箭>":"1f680","<自行车>":"1f6b2","<高铁>":"1f684","<警告>":"26a0","<旗>":"1f3c1","<男人>":"1f6b9","<女人>":"1f6ba","<O>":"2b55","<X>":"274e","<商标>":"2122"}; 26 var unicodeStr=str.replace(/<.*?>/g,function(a){ 27 if(faceMap[a]){ 28 return toUnicode(faceMap[a]); 29 }else{ 30 return a; 31 } 32 // return a?toUnicode(faceMap[a]):‘‘; 33 }); 34 return htmlEncode(unicodeStr); 35 }
已经基本完成我需要的功能了,效果如下图(兼容至ie6+)
结语
献上本人拙劣的demo(css不行)
http://pan.baidu.com/s/1A2Oxw