summaryrefslogtreecommitdiffstats
path: root/src/sfnt/ttcmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/sfnt/ttcmap.c')
-rw-r--r--src/sfnt/ttcmap.c160
1 files changed, 112 insertions, 48 deletions
diff --git a/src/sfnt/ttcmap.c b/src/sfnt/ttcmap.c
index b70b64c..6830391 100644
--- a/src/sfnt/ttcmap.c
+++ b/src/sfnt/ttcmap.c
@@ -4,7 +4,7 @@
/* */
/* TrueType character mapping table (cmap) support (body). */
/* */
-/* Copyright 2002, 2003, 2004, 2005, 2006, 2007 by */
+/* Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 by */
/* David Turner, Robert Wilhelm, and Werner Lemberg. */
/* */
/* This file is part of the FreeType project, and may only be used, */
@@ -134,7 +134,7 @@
FT_UInt gindex = 0;
- table += 6; /* go to glyph ids */
+ table += 6; /* go to glyph IDs */
while ( ++charcode < 256 )
{
gindex = table[charcode];
@@ -231,7 +231,7 @@
/* language 4 USHORT Mac language code */
/* keys 6 USHORT[256] sub-header keys */
/* subs 518 SUBHEAD[NSUBS] sub-headers array */
- /* glyph_ids 518+NSUB*8 USHORT[] glyph id array */
+ /* glyph_ids 518+NSUB*8 USHORT[] glyph ID array */
/* */
/* The `keys' table is used to map charcode high-bytes to sub-headers. */
/* The value of `NSUBS' is the number of sub-headers defined in the */
@@ -260,14 +260,14 @@
/* */
/* * The value of `offset' is read. This is a _byte_ distance from the */
/* location of the `offset' field itself into a slice of the */
- /* `glyph_ids' table. Let's call it `slice' (it's a USHORT[] too). */
+ /* `glyph_ids' table. Let's call it `slice' (it is a USHORT[] too). */
/* */
/* * The value `slice[char.lo - first]' is read. If it is 0, there is */
/* no glyph for the charcode. Otherwise, the value of `delta' is */
/* added to it (modulo 65536) to form a new glyph index. */
/* */
/* It is up to the validation routine to check that all offsets fall */
- /* within the glyph ids table (and not within the `subs' table itself or */
+ /* within the glyph IDs table (and not within the `subs' table itself or */
/* outside of the CMap). */
/* */
@@ -282,7 +282,7 @@
FT_UInt n, max_subs;
FT_Byte* keys; /* keys table */
FT_Byte* subs; /* sub-headers */
- FT_Byte* glyph_ids; /* glyph id array */
+ FT_Byte* glyph_ids; /* glyph ID array */
if ( table + length > valid->limit || length < 6 + 512 )
@@ -328,6 +328,10 @@
delta = TT_NEXT_SHORT( p );
offset = TT_NEXT_USHORT( p );
+ /* many Dynalab fonts have empty sub-headers */
+ if ( code_count == 0 )
+ continue;
+
/* check range within 0..255 */
if ( valid->level >= FT_VALIDATE_PARANOID )
{
@@ -342,7 +346,7 @@
if ( ids < glyph_ids || ids + code_count*2 > table + length )
FT_INVALID_OFFSET;
- /* check glyph ids */
+ /* check glyph IDs */
if ( valid->level >= FT_VALIDATE_TIGHT )
{
FT_Byte* limit = p + code_count * 2;
@@ -393,7 +397,7 @@
sub = subs; /* jump to first sub-header */
/* check that the sub-header for this byte is 0, which */
- /* indicates that it's really a valid one-byte value */
+ /* indicates that it is really a valid one-byte value */
/* Otherwise, return 0 */
/* */
p += char_lo * 2;
@@ -601,14 +605,14 @@
/* each segment; can be */
/* zero */
/* */
- /* glyphIds 16+NUM_SEGS*8 USHORT[] array of glyph id */
+ /* glyphIds 16+NUM_SEGS*8 USHORT[] array of glyph ID */
/* ranges */
/* */
/* Character codes are modelled by a series of ordered (increasing) */
/* intervals called segments. Each segment has start and end codes, */
/* provided by the `startCount' and `endCount' arrays. Segments must */
- /* not be overlapping and the last segment should always contain the */
- /* `0xFFFF' endCount. */
+ /* not overlap, and the last segment should always contain the value */
+ /* 0xFFFF for `endCount'. */
/* */
/* The fields `searchRange', `entrySelector' and `rangeShift' are better */
/* ignored (they are traces of over-engineering in the TrueType */
@@ -621,14 +625,14 @@
/* charcode within the segment is obtained by adding the value of */
/* `idDelta' directly to the charcode, modulo 65536. */
/* */
- /* Otherwise, a glyph index is taken from the glyph ids sub-array for */
+ /* Otherwise, a glyph index is taken from the glyph IDs sub-array for */
/* the segment, and the value of `idDelta' is added to it. */
/* */
/* */
- /* Finally, note that certain fonts contain invalid charmaps that */
- /* contain end=0xFFFF, start=0xFFFF, delta=0x0001, offset=0xFFFF at the */
- /* of their charmaps (e.g. opens___.ttf which comes with OpenOffice.org) */
- /* we need special code to deal with them correctly... */
+ /* Finally, note that a lot of fonts contain an invalid last segment, */
+ /* where `start' and `end' are correctly set to 0xFFFF but both `delta' */
+ /* and `offset' are incorrect (e.g., `opens___.ttf' which comes with */
+ /* OpenOffice.org). We need special code to deal with them correctly. */
/* */
#ifdef TT_CONFIG_CMAP_FORMAT_4
@@ -693,6 +697,23 @@
p += num_ranges * 2;
offset = FT_PEEK_USHORT( p );
+ /* some fonts have an incorrect last segment; */
+ /* we have to catch it */
+ if ( range_index >= num_ranges - 1 &&
+ cmap->cur_start == 0xFFFFU &&
+ cmap->cur_end == 0xFFFFU )
+ {
+ TT_Face face = (TT_Face)cmap->cmap.cmap.charmap.face;
+ FT_Byte* limit = face->cmap_table + face->cmap_size;
+
+
+ if ( offset && p + offset + 2 > limit )
+ {
+ cmap->cur_delta = 1;
+ offset = 0;
+ }
+ }
+
if ( offset != 0xFFFFU )
{
cmap->cur_values = offset ? p + offset : NULL;
@@ -831,7 +852,7 @@
/* */
if ( valid->level >= FT_VALIDATE_PARANOID )
{
- /* check the values of 'searchRange', 'entrySelector', 'rangeShift' */
+ /* check the values of `searchRange', `entrySelector', `rangeShift' */
FT_UInt search_range = TT_NEXT_USHORT( p );
FT_UInt entry_selector = TT_NEXT_USHORT( p );
FT_UInt range_shift = TT_NEXT_USHORT( p );
@@ -858,7 +879,7 @@
offsets = deltas + num_segs * 2;
glyph_ids = offsets + num_segs * 2;
- /* check last segment, its end count must be FFFF */
+ /* check last segment; its end count value must be 0xFFFF */
if ( valid->level >= FT_VALIDATE_PARANOID )
{
p = ends + ( num_segs - 1 ) * 2;
@@ -867,9 +888,9 @@
}
{
- FT_UInt start, end, offset, n;
- FT_UInt last_start = 0, last_end = 0;
- FT_Int delta;
+ FT_UInt start, end, offset, n;
+ FT_UInt last_start = 0, last_end = 0;
+ FT_Int delta;
FT_Byte* p_start = starts;
FT_Byte* p_end = ends;
FT_Byte* p_delta = deltas;
@@ -887,10 +908,10 @@
if ( start > end )
FT_INVALID_DATA;
- /* this test should be performed at default validation level; */
- /* unfortunately, some popular Asian fonts present overlapping */
- /* ranges in their charmaps */
- /* */
+ /* this test should be performed at default validation level; */
+ /* unfortunately, some popular Asian fonts have overlapping */
+ /* ranges in their charmaps */
+ /* */
if ( start <= last_end && n > 0 )
{
if ( valid->level >= FT_VALIDATE_TIGHT )
@@ -898,7 +919,7 @@
else
{
/* allow overlapping segments, provided their start points */
- /* and end points, respectively, are in ascending order. */
+ /* and end points, respectively, are in ascending order */
/* */
if ( last_start > start || last_end > end )
error |= TT_CMAP_FLAG_UNSORTED;
@@ -909,16 +930,27 @@
if ( offset && offset != 0xFFFFU )
{
- p += offset; /* start of glyph id array */
+ p += offset; /* start of glyph ID array */
- /* check that we point within the glyph ids table only */
+ /* check that we point within the glyph IDs table only */
if ( valid->level >= FT_VALIDATE_TIGHT )
{
if ( p < glyph_ids ||
p + ( end - start + 1 ) * 2 > table + length )
FT_INVALID_DATA;
}
- else
+ /* Some fonts handle the last segment incorrectly. In */
+ /* theory, 0xFFFF might point to an ordinary glyph -- */
+ /* a cmap 4 is versatile and could be used for any */
+ /* encoding, not only Unicode. However, reality shows */
+ /* that far too many fonts are sloppy and incorrectly */
+ /* set all fields but `start' and `end' for the last */
+ /* segment if it contains only a single character. */
+ /* */
+ /* We thus omit the test here, delaying it to the */
+ /* routines which actually access the cmap. */
+ else if ( n != num_segs - 1 ||
+ !( start == 0xFFFFU && end == 0xFFFFU ) )
{
if ( p < glyph_ids ||
p + ( end - start + 1 ) * 2 > valid->limit )
@@ -946,12 +978,12 @@
}
else if ( offset == 0xFFFFU )
{
- /* Some fonts (erroneously?) use a range offset of 0xFFFF */
+ /* some fonts (erroneously?) use a range offset of 0xFFFF */
/* to mean missing glyph in cmap table */
/* */
- if ( valid->level >= FT_VALIDATE_PARANOID ||
- n != num_segs - 1 ||
- !( start == 0xFFFFU && end == 0xFFFFU && delta == 0x1U ) )
+ if ( valid->level >= FT_VALIDATE_PARANOID ||
+ n != num_segs - 1 ||
+ !( start == 0xFFFFU && end == 0xFFFFU ) )
FT_INVALID_DATA;
}
@@ -965,9 +997,9 @@
static FT_UInt
- tt_cmap4_char_map_linear( TT_CMap cmap,
- FT_UInt* pcharcode,
- FT_Bool next )
+ tt_cmap4_char_map_linear( TT_CMap cmap,
+ FT_UInt32* pcharcode,
+ FT_Bool next )
{
FT_UInt num_segs2, start, end, offset;
FT_Int delta;
@@ -1009,6 +1041,22 @@
p += num_segs2;
offset = TT_PEEK_USHORT( p );
+ /* some fonts have an incorrect last segment; */
+ /* we have to catch it */
+ if ( i >= num_segs - 1 &&
+ start == 0xFFFFU && end == 0xFFFFU )
+ {
+ TT_Face face = (TT_Face)cmap->cmap.charmap.face;
+ FT_Byte* limit = face->cmap_table + face->cmap_size;
+
+
+ if ( offset && p + offset + 2 > limit )
+ {
+ delta = 1;
+ offset = 0;
+ }
+ }
+
if ( offset == 0xFFFFU )
continue;
@@ -1038,9 +1086,9 @@
static FT_UInt
- tt_cmap4_char_map_binary( TT_CMap cmap,
- FT_UInt* pcharcode,
- FT_Bool next )
+ tt_cmap4_char_map_binary( TT_CMap cmap,
+ FT_UInt32* pcharcode,
+ FT_Bool next )
{
FT_UInt num_segs2, start, end, offset;
FT_Int delta;
@@ -1088,6 +1136,22 @@
p += num_segs2;
offset = TT_PEEK_USHORT( p );
+ /* some fonts have an incorrect last segment; */
+ /* we have to catch it */
+ if ( mid >= num_segs - 1 &&
+ start == 0xFFFFU && end == 0xFFFFU )
+ {
+ TT_Face face = (TT_Face)cmap->cmap.charmap.face;
+ FT_Byte* limit = face->cmap_table + face->cmap_size;
+
+
+ if ( offset && p + offset + 2 > limit )
+ {
+ delta = 1;
+ offset = 0;
+ }
+ }
+
/* search the first segment containing `charcode' */
if ( cmap->flags & TT_CMAP_FLAG_OVERLAPPING )
{
@@ -1359,7 +1423,7 @@
/* */
/* first 6 USHORT first segment code */
/* count 8 USHORT segment size in chars */
- /* glyphIds 10 USHORT[count] glyph ids */
+ /* glyphIds 10 USHORT[count] glyph IDs */
/* */
/* A very simplified segment mapping. */
/* */
@@ -1506,7 +1570,7 @@
/***** *****/
/***** FORMAT 8 *****/
/***** *****/
- /***** It's hard to completely understand what the OpenType spec *****/
+ /***** It is hard to completely understand what the OpenType spec *****/
/***** says about this format, but here is my conclusion. *****/
/***** *****/
/***** The purpose of this format is to easily map UTF-16 text to *****/
@@ -1521,7 +1585,7 @@
/***** `char_hi' and `char_lo' must be in the Surrogates Area. *****/
/***** Area. *****/
/***** *****/
- /***** The 'is32' table embedded in the charmap indicates whether a *****/
+ /***** The `is32' table embedded in the charmap indicates whether a *****/
/***** given 16-bit value is in the surrogates area or not. *****/
/***** *****/
/***** So, for any given `char_code', we can assert the following: *****/
@@ -1548,11 +1612,11 @@
/* is32 12 BYTE[8192] 32-bitness bitmap */
/* count 8204 ULONG number of groups */
/* */
- /* This header is followed by 'count' groups of the following format: */
+ /* This header is followed by `count' groups of the following format: */
/* */
/* start 0 ULONG first charcode */
/* end 4 ULONG last charcode */
- /* startId 8 ULONG start glyph id for the group */
+ /* startId 8 ULONG start glyph ID for the group */
/* */
#ifdef TT_CONFIG_CMAP_FORMAT_8
@@ -1934,7 +1998,7 @@
/* */
/* start 0 ULONG first charcode */
/* end 4 ULONG last charcode */
- /* startId 8 ULONG start glyph id for the group */
+ /* startId 8 ULONG start glyph ID for the group */
/* */
#ifdef TT_CONFIG_CMAP_FORMAT_12
@@ -2727,7 +2791,7 @@
FT_UInt tot = 0;
- p += 3; /* point to the first 'cnt' field */
+ p += 3; /* point to the first `cnt' field */
for ( ; numRanges > 0; numRanges-- )
{
tot += 1 + p[0];
@@ -2774,7 +2838,7 @@
}
- static FT_UInt*
+ static FT_UInt32*
tt_cmap14_get_nondef_chars( TT_CMap cmap,
FT_Byte *p,
FT_Memory memory )
@@ -2962,7 +3026,7 @@
(TT_CMap_Info_GetFunc)tt_cmap14_get_info
};
-#endif /* TT_CONFIG_CMAP_FORMAT_0 */
+#endif /* TT_CONFIG_CMAP_FORMAT_14 */
static const TT_CMap_Class tt_cmap_classes[] =