diff options
author | LuK1337 <priv.luk@gmail.com> | 2020-01-07 21:13:50 +0200 |
---|---|---|
committer | Michael Bestas <mkbestas@lineageos.org> | 2020-01-07 21:15:35 +0200 |
commit | 313906b9d1af0ec66492ba606979b01c8d224d45 (patch) | |
tree | ebb5f8c418b8d0dddca282952c5e29ba4957584d | |
parent | ac419fa548302bbed78fd09faacfcc8638767276 (diff) | |
download | scripts-313906b9d1af0ec66492ba606979b01c8d224d45.tar.gz scripts-313906b9d1af0ec66492ba606979b01c8d224d45.tar.bz2 scripts-313906b9d1af0ec66492ba606979b01c8d224d45.zip |
Add emoji-updater script
* Exports emoji list from unicode.org in LatinIME compatible xml format
Change-Id: I6b86ac3253ee775c0c1c206ac62bf34a8f317093
-rwxr-xr-x | emoji-updater/emoji-updater.py | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/emoji-updater/emoji-updater.py b/emoji-updater/emoji-updater.py new file mode 100755 index 0000000..a0293c7 --- /dev/null +++ b/emoji-updater/emoji-updater.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import sys + +import requests +from lxml import etree + +if __name__ == '__main__': + if len(sys.argv) < 2: + sys.exit(f'usage: {sys.argv[0]} [url|https://unicode.org/emoji/charts-12.0/full-emoji-list.html]') + + url = sys.argv[1] + req = requests.get(url=url) + + parser = etree.HTMLParser(recover=True, encoding='utf-8') + doc = etree.fromstring(text=req.content, parser=parser) + + for tr in doc.xpath('.//tr'): + mediumhead = tr.xpath('.//th[@class="mediumhead"]/a') + + if len(mediumhead) > 0: + print(f' <!-- {mediumhead[0].text} -->') + continue + + code = tr.xpath('.//td[@class="code"]/a') + + if len(code) > 0: + codes = ','.join([x[2:] for x in code[0].text.split()]) + print(f' <item>{codes}</item>') + continue |