libs/minikin/unicode_emoji_h_gen.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

#!/usr/bin/env python
#
# Copyright (C) 2016 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Generate header file for unicode data."""

import optparse
import sys


UNICODE_EMOJI_TEMPLATE="""
/* file generated by frameworks/minikin/lib/minikin/Android.mk */
#ifndef MINIKIN_UNICODE_EMOJI_H
#define MINIKIN_UNICODE_EMOJI_H

#include <stdint.h>

namespace android {
namespace generated {

int32_t EMOJI_LIST[] = {
@@@EMOJI_DATA@@@
};

}  // namespace generated
}  // namespace android

#endif  // MINIKIN_UNICODE_EMOJI_H
"""


def _create_opt_parser():
  parser = optparse.OptionParser()
  parser.add_option('-i', '--input', type='str', action='store',
                    help='path to input emoji-data.txt')
  parser.add_option('-o', '--output', type='str', action='store',
                    help='path to output UnicodeEmoji.h')
  return parser


def _read_emoji_data(emoji_data_file_path):
  result = []
  with open(emoji_data_file_path) as emoji_data_file:
    for line in emoji_data_file:
      if '#' in line:
        line = line[:line.index('#')]  # Drop comments.
      if not line.strip():
        continue  # Skip empty line.

      code_points, prop = line.split(';')
      code_points = code_points.strip()
      prop = prop.strip()
      if prop != 'Emoji':
        break  # Only collect Emoji property code points

      if '..' in code_points:  # code point range
        cp_start, cp_end = code_points.split('..')
        result.extend(xrange(int(cp_start, 16), int(cp_end, 16) + 1))
      else:
        code_point = int(code_points, 16)
        result.append(code_point)
  return result


def _generate_header_contents(emoji_list):
  INDENT = ' ' * 4
  JOINER = ', '

  hex_list = ['0x%04X' % x for x in emoji_list]
  lines = []
  tmp_line = '%s%s' % (INDENT, hex_list[0])
  for hex_str in hex_list[1:]:
    if len(tmp_line) + len(JOINER) + len(hex_str) >= 100:
      lines.append(tmp_line + ',')
      tmp_line = '%s%s' % (INDENT, hex_str)
    else:
      tmp_line = '%s%s%s' % (tmp_line, JOINER, hex_str)
  lines.append(tmp_line)

  template = UNICODE_EMOJI_TEMPLATE
  template = template.replace('@@@EMOJI_DATA@@@', '\n'.join(lines))
  return template


if __name__ == '__main__':
  opt_parser = _create_opt_parser()
  opts, _ = opt_parser.parse_args()

  emoji_list = _read_emoji_data(opts.input)
  header = _generate_header_contents(emoji_list)
  with open(opts.output, 'w') as header_file:
    header_file.write(header)