aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--Makefile.am4
-rw-r--r--catalog.c8
-rwxr-xr-xchvalid.c186
-rwxr-xr-xchvalid.def342
-rw-r--r--chvalid.h105
-rwxr-xr-xgenChRanges.py465
-rw-r--r--include/libxml/Makefile.am3
-rw-r--r--include/libxml/parserInternals.h23
-rw-r--r--parserInternals.c490
10 files changed, 1122 insertions, 514 deletions
diff --git a/ChangeLog b/ChangeLog
index c4f9a84b..50dac47d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Sat Oct 11 23:11:22 HKT 2003 William Brack <wbrack@mmm.com.hk>
+
+ * genChRange.py, chvalid.def, chvalid.c, include/libxml/chvalid.h:
+ new files for a different method for doing range validation
+ of character data.
+ * Makefile.am, parserInternals.c, include/libxml/Makefile.am,
+ include/libxml/parserInternals.h: modified for new range method.
+ * catalog.c: small enhance for warning message (using one
+ of the new range routines)
+
Sat Oct 11 13:24:57 CEST 2003 Daniel Veillard <daniel@veillard.com>
* valid.c include/libxml/valid.h: adding an serror field to
diff --git a/Makefile.am b/Makefile.am
index 8fc01b5b..88723415 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -27,7 +27,7 @@ libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \
catalog.c globals.c threads.c c14n.c \
xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c \
triostr.c trio.c xmlreader.c relaxng.c dict.c SAX2.c \
- legacy.c walker.c
+ legacy.c walker.c chvalid.c
else
libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \
parser.c tree.c hash.c list.c xmlIO.c xmlmemory.c uri.c \
@@ -36,7 +36,7 @@ libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \
catalog.c globals.c threads.c c14n.c \
xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c \
xmlreader.c relaxng.c dict.c SAX2.c \
- legacy.c xmldwalk.c
+ legacy.c xmldwalk.c chvalid.c
endif
DEPS = $(top_builddir)/libxml2.la
diff --git a/catalog.c b/catalog.c
index 06dbb52e..4375ebb0 100644
--- a/catalog.c
+++ b/catalog.c
@@ -2932,11 +2932,11 @@ xmlInitializeCatalog(void) {
cur = catalogs;
nextent = &catal->xml;
while (*cur != '\0') {
- while (IS_BLANK(*cur))
+ while (xmlIsBlank_ch(*cur))
cur++;
if (*cur != 0) {
paths = cur;
- while ((*cur != 0) && (!IS_BLANK(*cur)))
+ while ((*cur != 0) && (!xmlIsBlank_ch(*cur)))
cur++;
path = (char *) xmlStrndup((const xmlChar *)paths, cur - paths);
if (path != NULL) {
@@ -3015,10 +3015,10 @@ xmlLoadCatalogs(const char *pathss) {
cur = pathss;
while ((cur != NULL) && (*cur != 0)) {
- while (IS_BLANK(*cur)) cur++;
+ while (xmlIsBlank_ch(*cur)) cur++;
if (*cur != 0) {
paths = cur;
- while ((*cur != 0) && (*cur != ':') && (!IS_BLANK(*cur)))
+ while ((*cur != 0) && (*cur != ':') && (!xmlIsBlank_ch(*cur)))
cur++;
path = xmlStrndup((const xmlChar *)paths, cur - paths);
if (path != NULL) {
diff --git a/chvalid.c b/chvalid.c
new file mode 100755
index 00000000..fbfd32b4
--- /dev/null
+++ b/chvalid.c
@@ -0,0 +1,186 @@
+/*
+ * chvalid.c: this module implements the character range
+ * validation APIs
+ *
+ * This file is automatically generated from the cvs source
+ * definition files using the genChRanges.py Python script
+ *
+ * Generation date: Sat Oct 11 20:57:37 2003
+ * Sources: chvalid.def
+ * William Brack <wbrack@mmm.com.hk>
+ */
+
+#include "chvalid.h"
+
+/*
+ * The initial tables ({func_name}_tab) are used to validate whether a
+ * single-byte character is within the specified group. Each table
+ * contains 256 bytes, with each byte representing one of the 256
+ * possible characters. If the table byte is set, the character is
+ * allowed.
+ *
+ */
+unsigned char xmlIsPubidChar_tab[256] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+ 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01,
+ 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x00, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x01,
+ 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00 };
+
+static xmlChSRange xmlIsBaseChar_srng[] = { {0x100, 0x131}, {0x134, 0x13e},
+ {0x141, 0x148}, {0x14a, 0x17e}, {0x180, 0x1c3}, {0x1cd, 0x1f0},
+ {0x1f4, 0x1f5}, {0x1fa, 0x217}, {0x250, 0x2a8}, {0x2bb, 0x2c1},
+ {0x386, 0x386}, {0x388, 0x38a}, {0x38c, 0x38c}, {0x38e, 0x3a1},
+ {0x3a3, 0x3ce}, {0x3d0, 0x3d6}, {0x3da, 0x3da}, {0x3dc, 0x3dc},
+ {0x3de, 0x3de}, {0x3e0, 0x3e0}, {0x3e2, 0x3f3}, {0x401, 0x40c},
+ {0x40e, 0x44f}, {0x451, 0x45c}, {0x45e, 0x481}, {0x490, 0x4c4},
+ {0x4c7, 0x4c8}, {0x4cb, 0x4cc}, {0x4d0, 0x4eb}, {0x4ee, 0x4f5},
+ {0x4f8, 0x4f9}, {0x531, 0x556}, {0x559, 0x559}, {0x561, 0x586},
+ {0x5d0, 0x5ea}, {0x5f0, 0x5f2}, {0x621, 0x63a}, {0x641, 0x64a},
+ {0x671, 0x6b7}, {0x6ba, 0x6be}, {0x6c0, 0x6ce}, {0x6d0, 0x6d3},
+ {0x6d5, 0x6d5}, {0x6e5, 0x6e6}, {0x905, 0x939}, {0x93d, 0x93d},
+ {0x958, 0x961}, {0x985, 0x98c}, {0x98f, 0x990}, {0x993, 0x9a8},
+ {0x9aa, 0x9b0}, {0x9b2, 0x9b2}, {0x9b6, 0x9b9}, {0x9dc, 0x9dd},
+ {0x9df, 0x9e1}, {0x9f0, 0x9f1}, {0xa05, 0xa0a}, {0xa0f, 0xa10},
+ {0xa13, 0xa28}, {0xa2a, 0xa30}, {0xa32, 0xa33}, {0xa35, 0xa36},
+ {0xa38, 0xa39}, {0xa59, 0xa5c}, {0xa5e, 0xa5e}, {0xa72, 0xa74},
+ {0xa85, 0xa8b}, {0xa8d, 0xa8d}, {0xa8f, 0xa91}, {0xa93, 0xaa8},
+ {0xaaa, 0xab0}, {0xab2, 0xab3}, {0xab5, 0xab9}, {0xabd, 0xabd},
+ {0xae0, 0xae0}, {0xb05, 0xb0c}, {0xb0f, 0xb10}, {0xb13, 0xb28},
+ {0xb2a, 0xb30}, {0xb32, 0xb33}, {0xb36, 0xb39}, {0xb3d, 0xb3d},
+ {0xb5c, 0xb5d}, {0xb5f, 0xb61}, {0xb85, 0xb8a}, {0xb8e, 0xb90},
+ {0xb92, 0xb95}, {0xb99, 0xb9a}, {0xb9c, 0xb9c}, {0xb9e, 0xb9f},
+ {0xba3, 0xba4}, {0xba8, 0xbaa}, {0xbae, 0xbb5}, {0xbb7, 0xbb9},
+ {0xc05, 0xc0c}, {0xc0e, 0xc10}, {0xc12, 0xc28}, {0xc2a, 0xc33},
+ {0xc35, 0xc39}, {0xc60, 0xc61}, {0xc85, 0xc8c}, {0xc8e, 0xc90},
+ {0xc92, 0xca8}, {0xcaa, 0xcb3}, {0xcb5, 0xcb9}, {0xcde, 0xcde},
+ {0xce0, 0xce1}, {0xd05, 0xd0c}, {0xd0e, 0xd10}, {0xd12, 0xd28},
+ {0xd2a, 0xd39}, {0xd60, 0xd61}, {0xe01, 0xe2e}, {0xe30, 0xe30},
+ {0xe32, 0xe33}, {0xe40, 0xe45}, {0xe81, 0xe82}, {0xe84, 0xe84},
+ {0xe87, 0xe88}, {0xe8a, 0xe8a}, {0xe8d, 0xe8d}, {0xe94, 0xe97},
+ {0xe99, 0xe9f}, {0xea1, 0xea3}, {0xea5, 0xea5}, {0xea7, 0xea7},
+ {0xeaa, 0xeab}, {0xead, 0xeae}, {0xeb0, 0xeb0}, {0xeb2, 0xeb3},
+ {0xebd, 0xebd}, {0xec0, 0xec4}, {0xf40, 0xf47}, {0xf49, 0xf69},
+ {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x1100}, {0x1102, 0x1103},
+ {0x1105, 0x1107}, {0x1109, 0x1109}, {0x110b, 0x110c}, {0x110e, 0x1112},
+ {0x113c, 0x113c}, {0x113e, 0x113e}, {0x1140, 0x1140}, {0x114c, 0x114c},
+ {0x114e, 0x114e}, {0x1150, 0x1150}, {0x1154, 0x1155}, {0x1159, 0x1159},
+ {0x115f, 0x1161}, {0x1163, 0x1163}, {0x1165, 0x1165}, {0x1167, 0x1167},
+ {0x1169, 0x1169}, {0x116d, 0x116e}, {0x1172, 0x1173}, {0x1175, 0x1175},
+ {0x119e, 0x119e}, {0x11a8, 0x11a8}, {0x11ab, 0x11ab}, {0x11ae, 0x11af},
+ {0x11b7, 0x11b8}, {0x11ba, 0x11ba}, {0x11bc, 0x11c2}, {0x11eb, 0x11eb},
+ {0x11f0, 0x11f0}, {0x11f9, 0x11f9}, {0x1e00, 0x1e9b}, {0x1ea0, 0x1ef9},
+ {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, {0x1f48, 0x1f4d},
+ {0x1f50, 0x1f57}, {0x1f59, 0x1f59}, {0x1f5b, 0x1f5b}, {0x1f5d, 0x1f5d},
+ {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fbc}, {0x1fbe, 0x1fbe},
+ {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3}, {0x1fd6, 0x1fdb},
+ {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc}, {0x2126, 0x2126},
+ {0x212a, 0x212b}, {0x212e, 0x212e}, {0x2180, 0x2182}, {0x3041, 0x3094},
+ {0x30a1, 0x30fa}, {0x3105, 0x312c}, {0xac00, 0xd7a3}};
+xmlChRangeGroup xmlIsBaseCharGroup = {197, 0, xmlIsBaseChar_srng};
+
+static xmlChSRange xmlIsChar_srng[] = { {0x100, 0xd7ff}, {0xe000, 0xfffd}};
+static xmlChLRange xmlIsChar_lrng[] = { {0x10000, 0x10ffff}};
+xmlChRangeGroup xmlIsCharGroup = {2, 1, xmlIsChar_srng, xmlIsChar_lrng};
+
+static xmlChSRange xmlIsCombining_srng[] = { {0x300, 0x345},
+ {0x360, 0x361}, {0x483, 0x486}, {0x591, 0x5a1}, {0x5a3, 0x5b9},
+ {0x5bb, 0x5bd}, {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4},
+ {0x64b, 0x652}, {0x670, 0x670}, {0x6d6, 0x6dc}, {0x6dd, 0x6df},
+ {0x6e0, 0x6e4}, {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x901, 0x903},
+ {0x93c, 0x93c}, {0x93e, 0x94c}, {0x94d, 0x94d}, {0x951, 0x954},
+ {0x962, 0x963}, {0x981, 0x983}, {0x9bc, 0x9bc}, {0x9be, 0x9be},
+ {0x9bf, 0x9bf}, {0x9c0, 0x9c4}, {0x9c7, 0x9c8}, {0x9cb, 0x9cd},
+ {0x9d7, 0x9d7}, {0x9e2, 0x9e3}, {0xa02, 0xa02}, {0xa3c, 0xa3c},
+ {0xa3e, 0xa3e}, {0xa3f, 0xa3f}, {0xa40, 0xa42}, {0xa47, 0xa48},
+ {0xa4b, 0xa4d}, {0xa70, 0xa71}, {0xa81, 0xa83}, {0xabc, 0xabc},
+ {0xabe, 0xac5}, {0xac7, 0xac9}, {0xacb, 0xacd}, {0xb01, 0xb03},
+ {0xb3c, 0xb3c}, {0xb3e, 0xb43}, {0xb47, 0xb48}, {0xb4b, 0xb4d},
+ {0xb56, 0xb57}, {0xb82, 0xb83}, {0xbbe, 0xbc2}, {0xbc6, 0xbc8},
+ {0xbca, 0xbcd}, {0xbd7, 0xbd7}, {0xc01, 0xc03}, {0xc3e, 0xc44},
+ {0xc46, 0xc48}, {0xc4a, 0xc4d}, {0xc55, 0xc56}, {0xc82, 0xc83},
+ {0xcbe, 0xcc4}, {0xcc6, 0xcc8}, {0xcca, 0xccd}, {0xcd5, 0xcd6},
+ {0xd02, 0xd03}, {0xd3e, 0xd43}, {0xd46, 0xd48}, {0xd4a, 0xd4d},
+ {0xd57, 0xd57}, {0xe31, 0xe31}, {0xe34, 0xe3a}, {0xe47, 0xe4e},
+ {0xeb1, 0xeb1}, {0xeb4, 0xeb9}, {0xebb, 0xebc}, {0xec8, 0xecd},
+ {0xf18, 0xf19}, {0xf35, 0xf35}, {0xf37, 0xf37}, {0xf39, 0xf39},
+ {0xf3e, 0xf3e}, {0xf3f, 0xf3f}, {0xf71, 0xf84}, {0xf86, 0xf8b},
+ {0xf90, 0xf95}, {0xf97, 0xf97}, {0xf99, 0xfad}, {0xfb1, 0xfb7},
+ {0xfb9, 0xfb9}, {0x20d0, 0x20dc}, {0x20e1, 0x20e1}, {0x302a, 0x302f},
+ {0x3099, 0x3099}, {0x309a, 0x309a}};
+xmlChRangeGroup xmlIsCombiningGroup = {95, 0, xmlIsCombining_srng};
+
+static xmlChSRange xmlIsDigit_srng[] = { {0x660, 0x669}, {0x6f0, 0x6f9},
+ {0x966, 0x96f}, {0x9e6, 0x9ef}, {0xa66, 0xa6f}, {0xae6, 0xaef},
+ {0xb66, 0xb6f}, {0xbe7, 0xbef}, {0xc66, 0xc6f}, {0xce6, 0xcef},
+ {0xd66, 0xd6f}, {0xe50, 0xe59}, {0xed0, 0xed9}, {0xf20, 0xf29}};
+xmlChRangeGroup xmlIsDigitGroup = {14, 0, xmlIsDigit_srng};
+
+static xmlChSRange xmlIsExtender_srng[] = { {0x2d0, 0x2d0}, {0x2d1, 0x2d1},
+ {0x387, 0x387}, {0x640, 0x640}, {0xe46, 0xe46}, {0xec6, 0xec6},
+ {0x3005, 0x3005}, {0x3031, 0x3031}, {0x3032, 0x3032}, {0x3033, 0x3033},
+ {0x3034, 0x3034}, {0x3035, 0x3035}, {0x309d, 0x309d}, {0x309e, 0x309e},
+ {0x30fc, 0x30fc}, {0x30fd, 0x30fd}, {0x30fe, 0x30fe}};
+xmlChRangeGroup xmlIsExtenderGroup = {17, 0, xmlIsExtender_srng};
+
+static xmlChSRange xmlIsIdeographic_srng[] = { {0x3007, 0x3007},
+ {0x3021, 0x3029}, {0x4300, 0x9fa5}, {0xf900, 0xfa2d}};
+xmlChRangeGroup xmlIsIdeographicGroup = {4, 0, xmlIsIdeographic_srng};
+
+
+int
+xmlCharInRange (unsigned int val, xmlChRangeGroupPtr rptr) {
+ int low, high, mid;
+ xmlChSRangePtr sptr;
+ xmlChLRangePtr lptr;
+ if (val < 0x10000) { /* is val in 'short' or 'long' array? */
+ if (rptr->nbShortRange == 0)
+ return 0;
+ low = 0;
+ high = rptr->nbShortRange;
+ sptr = rptr->shortRange;
+ while (low <= high) {
+ mid = (low + high) / 2;
+ if ((unsigned short) val < sptr[mid].low)
+ high = mid - 1;
+ else if ((unsigned short) val > sptr[mid].high)
+ low = mid + 1;
+ else
+ return 1;
+ }
+ } else {
+ if (rptr->nbLongRange == 0)
+ return 0;
+ low = 0;
+ high = rptr->nbLongRange;
+ lptr = rptr->longRange;
+ while (low <= high) {
+ mid = (low + high) / 2;
+ if (val < lptr[mid].low)
+ high = mid - 1;
+ else if (val > lptr[mid].high)
+ low = mid + 1;
+ else
+ return 1;
+ }
+ }
+ return 0;
+}
+
diff --git a/chvalid.def b/chvalid.def
new file mode 100755
index 00000000..eed7ab5f
--- /dev/null
+++ b/chvalid.def
@@ -0,0 +1,342 @@
+name xmlIsChar
+ur 0x09 0x0a 0x0d 0x20..0xff
+ur 0x000100..0x00d7ff
+ur 0x00e000..0x00fffd
+ur 0x010000..0x10ffff
+end xmlIsChar
+
+name xmlIsPubidChar
+ur 0x20 0x0d 0x0a 'a'..'z' 'A'..'Z' '0'..'9'
+ur '-' '\' '(' ')' '+' ',' '.' '/'
+ur ':' '=' '?' ';' '!' '*' '#' '@'
+ur '$' '_' '%'
+end
+
+name xmlIsBlank
+ur 0x09 0x0a 0x0d 0x20
+end xmlIsBlank
+
+name xmlIsBaseChar
+ur 0x41..0x5a 0x61..0x7a 0xc0..0xd6 0xd8..0xf6 0xf8..0xff
+ur 0x000100..0x000131
+ur 0x000134..0x00013e
+ur 0x000141..0x000148
+ur 0x00014a..0x00017e
+ur 0x000180..0x0001c3
+ur 0x0001cd..0x0001f0
+ur 0x0001f4..0x0001f5
+ur 0x0001fa..0x000217
+ur 0x000250..0x0002a8
+ur 0x0002bb..0x0002c1
+ur 0x000386..0x000386
+ur 0x000388..0x00038a
+ur 0x00038c
+ur 0x00038e..0x0003a1
+ur 0x0003a3..0x0003ce
+ur 0x0003d0..0x0003d6
+ur 0x0003da
+ur 0x0003dc
+ur 0x0003de
+ur 0x0003e0
+ur 0x0003e2..0x0003f3
+ur 0x000401..0x00040c
+ur 0x00040e..0x00044f
+ur 0x000451..0x00045c
+ur 0x00045e..0x000481
+ur 0x000490..0x0004c4
+ur 0x0004c7..0x0004c8
+ur 0x0004cb..0x0004cc
+ur 0x0004d0..0x0004eb
+ur 0x0004ee..0x0004f5
+ur 0x0004f8..0x0004f9
+ur 0x000531..0x000556
+ur 0x000559
+ur 0x000561..0x000586
+ur 0x0005d0..0x0005ea
+ur 0x0005f0..0x0005f2
+ur 0x000621..0x00063a
+ur 0x000641..0x00064a
+ur 0x000671..0x0006b7
+ur 0x0006ba..0x0006be
+ur 0x0006c0..0x0006ce
+ur 0x0006d0..0x0006d3
+ur 0x0006d5
+ur 0x0006e5..0x0006e6
+ur 0x000905..0x000939
+ur 0x00093d
+ur 0x000958..0x000961
+ur 0x000985..0x00098c
+ur 0x00098f..0x000990
+ur 0x000993..0x0009a8
+ur 0x0009aa..0x0009b0
+ur 0x0009b2
+ur 0x0009b6..0x0009b9
+ur 0x0009dc..0x0009dd
+ur 0x0009df..0x0009e1
+ur 0x0009f0..0x0009f1
+ur 0x000a05..0x000a0a
+ur 0x000a0f..0x000a10
+ur 0x000a13..0x000a28
+ur 0x000a2a..0x000a30
+ur 0x000a32..0x000a33
+ur 0x000a35..0x000a36
+ur 0x000a38..0x000a39
+ur 0x000a59..0x000a5c
+ur 0x000a5e
+ur 0x000a72..0x000a74
+ur 0x000a85..0x000a8b
+ur 0x000a8d
+ur 0x000a8f..0x000a91
+ur 0x000a93..0x000aa8
+ur 0x000aaa..0x000ab0
+ur 0x000ab2..0x000ab3
+ur 0x000ab5..0x000ab9
+ur 0x000abd
+ur 0x000ae0
+ur 0x000b05..0x000b0c
+ur 0x000b0f..0x000b10
+ur 0x000b13..0x000b28
+ur 0x000b2a..0x000b30
+ur 0x000b32..0x000b33
+ur 0x000b36..0x000b39
+ur 0x000b3d
+ur 0x000b5c..0x000b5d
+ur 0x000b5f..0x000b61
+ur 0x000b85..0x000b8a
+ur 0x000b8e..0x000b90
+ur 0x000b92..0x000b95
+ur 0x000b99..0x000b9a
+ur 0x000b9c
+ur 0x000b9e..0x000b9f
+ur 0x000ba3..0x000ba4
+ur 0x000ba8..0x000baa
+ur 0x000bae..0x000bb5
+ur 0x000bb7..0x000bb9
+ur 0x000c05..0x000c0c
+ur 0x000c0e..0x000c10
+ur 0x000c12..0x000c28
+ur 0x000c2a..0x000c33
+ur 0x000c35..0x000c39
+ur 0x000c60..0x000c61
+ur 0x000c85..0x000c8c
+ur 0x000c8e..0x000c90
+ur 0x000c92..0x000ca8
+ur 0x000caa..0x000cb3
+ur 0x000cb5..0x000cb9
+ur 0x000cde
+ur 0x000ce0..0x000ce1
+ur 0x000d05..0x000d0c
+ur 0x000d0e..0x000d10
+ur 0x000d12..0x000d28
+ur 0x000d2a..0x000d39
+ur 0x000d60..0x000d61
+ur 0x000e01..0x000e2e
+ur 0x000e30
+ur 0x000e32..0x000e33
+ur 0x000e40..0x000e45
+ur 0x000e81..0x000e82
+ur 0x000e84..0x000e84
+ur 0x000e87..0x000e88
+ur 0x000e8a
+ur 0x000e8d
+ur 0x000e94..0x000e97
+ur 0x000e99..0x000e9f
+ur 0x000ea1..0x000ea3
+ur 0x000ea5
+ur 0x000ea7
+ur 0x000eaa..0x000eab
+ur 0x000ead..0x000eae
+ur 0x000eb0
+ur 0x000eb2..0x000eb3
+ur 0x000ebd
+ur 0x000ec0..0x000ec4
+ur 0x000f40..0x000f47
+ur 0x000f49..0x000f69
+ur 0x0010a0..0x0010c5
+ur 0x0010d0..0x0010f6
+ur 0x001100
+ur 0x001102..0x001103
+ur 0x001105..0x001107
+ur 0x001109
+ur 0x00110b..0x00110c
+ur 0x00110e..0x001112
+ur 0x00113c
+ur 0x00113e
+ur 0x001140
+ur 0x00114c
+ur 0x00114e
+ur 0x001150
+ur 0x001154..0x001155
+ur 0x001159
+ur 0x00115f..0x001161
+ur 0x001163
+ur 0x001165
+ur 0x001167
+ur 0x001169
+ur 0x00116d..0x00116e
+ur 0x001172..0x001173
+ur 0x001175
+ur 0x00119e
+ur 0x0011a8
+ur 0x0011ab
+ur 0x0011ae..0x0011af
+ur 0x0011b7..0x0011b8
+ur 0x0011ba
+ur 0x0011bc..0x0011c2
+ur 0x0011eb
+ur 0x0011f0
+ur 0x0011f9
+ur 0x001e00..0x001e9b
+ur 0x001ea0..0x001ef9
+ur 0x001f00..0x001f15
+ur 0x001f18..0x001f1d
+ur 0x001f20..0x001f45
+ur 0x001f48..0x001f4d
+ur 0x001f50..0x001f57
+ur 0x001f59
+ur 0x001f5b
+ur 0x001f5d
+ur 0x001f5f..0x001f7d
+ur 0x001f80..0x001fb4
+ur 0x001fb6..0x001fbc
+ur 0x001fbe
+ur 0x001fc2..0x001fc4
+ur 0x001fc6..0x001fcc
+ur 0x001fd0..0x001fd3
+ur 0x001fd6..0x001fdb
+ur 0x001fe0..0x001fec
+ur 0x001ff2..0x001ff4
+ur 0x001ff6..0x001ffc
+ur 0x002126
+ur 0x00212a..0x00212b
+ur 0x00212e
+ur 0x002180..0x002182
+ur 0x003041..0x003094
+ur 0x0030a1..0x0030fa
+ur 0x003105..0x00312c
+ur 0x00ac00..0x00d7a3
+end
+name xmlIsDigit
+ur 0x30..0x39
+ur 0x660..0x669
+ur 0x6f0..0x6f9
+ur 0x966..0x96f
+ur 0x9e6..0x9ef
+ur 0xa66..0xa6f
+ur 0xae6..0xaef
+ur 0xb66..0xb6f
+ur 0xbe7..0xbef
+ur 0xc66..0xc6f
+ur 0xce6..0xcef
+ur 0xd66..0xd6f
+ur 0xe50..0xe59
+ur 0xed0..0xed9
+ur 0xf20..0xf29
+end
+name xmlIsCombining
+ur 0x0300..0x0345
+ur 0x0360..0x0361
+ur 0x0483..0x0486
+ur 0x0591..0x05A1
+ur 0x05A3..0x05B9
+ur 0x05BB..0x05BD
+ur 0x05BF
+ur 0x05C1..0x05C2
+ur 0x05C4
+ur 0x064B..0x0652
+ur 0x0670
+ur 0x06D6..0x06DC
+ur 0x06DD..0x06DF
+ur 0x06E0..0x06E4
+ur 0x06E7..0x06E8
+ur 0x06EA..0x06ED
+ur 0x0901..0x0903
+ur 0x093C
+ur 0x093E..0x094C
+ur 0x094D
+ur 0x0951..0x0954
+ur 0x0962..0x0963
+ur 0x0981..0x0983
+ur 0x09BC
+ur 0x09BE
+ur 0x09BF
+ur 0x09C0..0x09C4
+ur 0x09C7..0x09C8
+ur 0x09CB..0x09CD
+ur 0x09D7
+ur 0x09E2..0x09E3
+ur 0x0A02
+ur 0x0A3C
+ur 0x0A3E
+ur 0x0A3F
+ur 0x0A40..0x0A42
+ur 0x0A47..0x0A48
+ur 0x0A4B..0x0A4D
+ur 0x0A70..0x0A71
+ur 0x0A81..0x0A83
+ur 0x0ABC
+ur 0x0ABE..0x0AC5
+ur 0x0AC7..0x0AC9
+ur 0x0ACB..0x0ACD
+ur 0x0B01..0x0B03
+ur 0x0B3C
+ur 0x0B3E..0x0B43
+ur 0x0B47..0x0B48
+ur 0x0B4B..0x0B4D
+ur 0x0B56..0x0B57
+ur 0x0B82..0x0B83
+ur 0x0BBE..0x0BC2
+ur 0x0BC6..0x0BC8
+ur 0x0BCA..0x0BCD
+ur 0x0BD7
+ur 0x0C01..0x0C03
+ur 0x0C3E..0x0C44
+ur 0x0C46..0x0C48
+ur 0x0C4A..0x0C4D
+ur 0x0C55..0x0C56
+ur 0x0C82..0x0C83
+ur 0x0CBE..0x0CC4
+ur 0x0CC6..0x0CC8
+ur 0x0CCA..0x0CCD
+ur 0x0CD5..0x0CD6
+ur 0x0D02..0x0D03
+ur 0x0D3E..0x0D43
+ur 0x0D46..0x0D48
+ur 0x0D4A..0x0D4D
+ur 0x0D57
+ur 0x0E31
+ur 0x0E34..0x0E3A
+ur 0x0E47..0x0E4E
+ur 0x0EB1
+ur 0x0EB4..0x0EB9
+ur 0x0EBB..0x0EBC
+ur 0x0EC8..0x0ECD
+ur 0x0F18..0x0F19
+ur 0x0F35
+ur 0x0F37
+ur 0x0F39
+ur 0x0F3E
+ur 0x0F3F
+ur 0x0F71..0x0F84
+ur 0x0F86..0x0F8B
+ur 0x0F90..0x0F95
+ur 0x0F97
+ur 0x0F99..0x0FAD
+ur 0x0FB1..0x0FB7
+ur 0x0FB9
+ur 0x20D0..0x20DC
+ur 0x20E1
+ur 0x302A..0x302F
+ur 0x3099
+ur 0x309A
+end
+name xmlIsExtender
+ur 0xb7 0x2d0 0x2d1 0x387 0x640 0xe46 0xec6 0x3005 0x3031 0x3032
+ur 0x3033 0x3034 0x3035 0x309d 0x309e 0x30fc 0x30fd 0x30fe
+end
+name xmlIsIdeographic
+ur 0x4300..0x9fa5
+ur 0xf900..0xfa2d
+ur 0x3021..0x3029
+ur 0x3007
+end
diff --git a/chvalid.h b/chvalid.h
new file mode 100644
index 00000000..0b538ebd
--- /dev/null
+++ b/chvalid.h
@@ -0,0 +1,105 @@
+/*
+ * chvalid.h: this header exports interfaces for the character
+ * range validation APIs
+ *
+ * This file is automatically generated from the cvs source
+ * definition files using the genChRanges.py Python script
+ *
+ * Generation date: Sat Oct 11 20:57:37 2003
+ * Sources: chvalid.def
+ * William Brack <wbrack@mmm.com.hk>
+ */
+
+#ifndef __XML_CHVALID_H__
+#define __XML_CHVALID_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Define our typedefs and structures
+ *
+ */
+typedef struct _xmlChSRange xmlChSRange;
+typedef xmlChSRange *xmlChSRangePtr;
+struct _xmlChSRange {
+ unsigned short low;
+ unsigned short high;
+};
+
+typedef struct _xmlChLRange xmlChLRange;
+typedef xmlChLRange *xmlChLRangePtr;
+struct _xmlChLRange {
+ unsigned low;
+ unsigned high;
+};
+
+typedef struct _xmlChRangeGroup xmlChRangeGroup;
+typedef xmlChRangeGroup *xmlChRangeGroupPtr;
+struct _xmlChRangeGroup {
+ int nbShortRange;
+ int nbLongRange;
+ xmlChSRangePtr shortRange; /* points to an array of ranges */
+ xmlChLRangePtr longRange;
+};
+
+/* Range checking routine */
+int xmlCharInRange(unsigned int val, const xmlChRangeGroupPtr group);
+
+#define xmlIsBaseChar_ch(c) ( ((0x41<= (c)) && ((c) <= 0x5a)) || \
+ ((0x61<= (c)) && ((c) <= 0x7a)) || \
+ ((0xc0<= (c)) && ((c) <= 0xd6)) || \
+ ((0xd8<= (c)) && ((c) <= 0xf6)) || \
+ ((0xf8<= (c)) && ((c) <= 0xff)))
+#define xmlIsBaseChar(c) (((c) < 0x100) ? \
+ xmlIsBaseChar_ch((c)) : \
+ xmlCharInRange((c), &xmlIsBaseCharGroup))
+
+extern xmlChRangeGroup xmlIsBaseCharGroup;
+#define xmlIsBlank_ch(c) ( ((c) == 0x20) || \
+ ((0x9<= (c)) && ((c) <= 0xa)) || \
+ ((c) == 0xd))
+#define xmlIsBlank(c) (((c) < 0x100) ? \
+ xmlIsBlank_ch((c)) : 0)
+
+#define xmlIsChar_ch(c) ( ((0x9<= (c)) && ((c) <= 0xa)) || \
+ ((c) == 0xd) || \
+ ((0x20<= (c)) && ((c) <= 0xff)))
+#define xmlIsChar(c) (((c) < 0x100) ? \
+ xmlIsChar_ch((c)) : \
+ xmlCharInRange((c), &xmlIsCharGroup))
+
+extern xmlChRangeGroup xmlIsCharGroup;
+#define xmlIsCombining(c) (((c) < 0x100) ? \
+ 0 : \
+ xmlCharInRange((c), &xmlIsCombiningGroup))
+
+extern xmlChRangeGroup xmlIsCombiningGroup;
+#define xmlIsDigit_ch(c) ( ((0x30<= (c)) && ((c) <= 0x39)))
+#define xmlIsDigit(c) (((c) < 0x100) ? \
+ xmlIsDigit_ch((c)) : \
+ xmlCharInRange((c), &xmlIsDigitGroup))
+
+extern xmlChRangeGroup xmlIsDigitGroup;
+#define xmlIsExtender_ch(c) ( ((c) == 0xb7))
+#define xmlIsExtender(c) (((c) < 0x100) ? \
+ xmlIsExtender_ch((c)) : \
+ xmlCharInRange((c), &xmlIsExtenderGroup))
+
+extern xmlChRangeGroup xmlIsExtenderGroup;
+#define xmlIsIdeographic(c) (((c) < 0x100) ? \
+ 0 : \
+ xmlCharInRange((c), &xmlIsIdeographicGroup))
+
+extern xmlChRangeGroup xmlIsIdeographicGroup;
+extern unsigned char xmlIsPubidChar_tab[256];
+#define xmlIsPubidChar_ch(c) (xmlIsPubidChar_tab[(c)])
+#define xmlIsPubidChar(c) (((c) < 0x100) ? \
+ xmlIsPubidChar_ch((c)) : 0)
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* __XML_CHVALID_H__ */
diff --git a/genChRanges.py b/genChRanges.py
new file mode 100755
index 00000000..f35cc5cb
--- /dev/null
+++ b/genChRanges.py
@@ -0,0 +1,465 @@
+#!/usr/bin/python -u
+#
+# Portions of this script have been (shamelessly) stolen from the
+# prior work of Daniel Veillard (genUnicode.py)
+#
+# I, however, take full credit for any bugs, errors or difficulties :-)
+#
+# William Brack
+# October 2003
+#
+
+import sys
+import string
+import time
+
+#
+# A little routine to assign a 'meaningful' name to a range
+#
+def rangename( intvl ):
+ (start, end) = intvl
+ rname = "r" + hex(start)[2:] + "x" + hex(end)[2:]
+ return rname
+
+#
+# A routine to take a list of yes/no (1, 0) values and turn it
+# into a list of ranges. This will later be used to determine whether
+# to generate single-byte lookup tables, or inline comparisons
+#
+def makeRange(lst):
+ ret = []
+ pos = 0
+ while pos < len(lst):
+ try: # index generates exception if not present
+ s = lst[pos:].index(1) # look for start of next range
+ except:
+ break # if no more, finished
+ pos += s # pointer to start of possible range
+ try:
+ e = lst[pos:].index(0) # look for end of range
+ e += pos
+ except: # if no end, set to end of list
+ e = len(lst)
+ ret.append((pos, e-1)) # append range tuple to list
+ pos = e + 1 # ready to check for next range
+ return ret
+
+sources = "chvalid.def" # input filename
+
+# minTableSize gives the minimum number of ranges which must be present
+# before a 256-byte lookup table is produced. If there are less than this
+# number, a macro with inline comparisons is generated
+minTableSize = 6
+
+# dictionary of ranges, key=range, element contains list of funcs using it
+Ranges = {}
+
+# dictionary of functions, key=name, element contains char-map and range-list
+Functs = {}
+
+state = 0
+
+try:
+ defines = open("chvalid.def", "r")
+except:
+ print "Missing chvalid.def, aborting ..."
+ sys.exit(1)
+
+#
+# The lines in the .def file have three types:-
+# name: Defines a new function block
+# ur: Defines individual or ranges of unicode values
+# end: Indicates the end of the function block
+#
+# These lines are processed below.
+#
+for line in defines.readlines():
+ # ignore blank lines, or lines beginning with '#'
+ if line[0] == '#':
+ continue
+ line = string.strip(line)
+ if line == '':
+ continue
+ # split line into space-separated fields, then split on type
+ try:
+ fields = string.split(line, ' ')
+ #
+ # name line:
+ # validate any previous function block already ended
+ # validate this function not already defined
+ # initialize an entry in the function dicitonary
+ # including a mask table with no values yet defined
+ #
+ if fields[0] == 'name':
+ name = fields[1]
+ if state != 0:
+ print "'name' %s found before previous name" \
+ "completed" % (fields[1])
+ continue
+ state = 1
+ if Functs.has_key(name):
+ print "name '%s' already present - may give" \
+ " wrong results" % (name)
+ else:
+ # dict entry with two list elements (chdata, rangedata)
+ Functs[name] = [ [], [] ]
+ for v in range(256):
+ Functs[name][0].append(0)
+ #
+ # end line:
+ # validate there was a preceding function name line
+ # set state to show no current function active
+ #
+ elif fields[0] == 'end':
+ if state == 0:
+ print "'end' found outside of function block"
+ continue
+ state = 0
+
+ #
+ # ur line:
+ # validate function has been defined
+ # process remaining fields on the line, which may be either
+ # individual unicode values or ranges of values
+ #
+ elif fields[0] == 'ur':
+ if state != 1:
+ raise ValidationError, "'ur' found outside of 'name' block"
+ for el in fields[1:]:
+ pos = string.find(el, '..')
+ # pos <=0 means not a range, so must be individual value
+ if pos <= 0:
+ # cheap handling of hex or decimal values
+ if el[0:2] == '0x':
+ value = int(el[2:],16)
+ elif el[0] == "'":
+ value = ord(el[1])
+ else:
+ value = int(el)
+ if ((value < 0) | (value > 0x1fffff)):
+ raise ValidationError, 'Illegal value (%s) in ch for'\
+ ' name %s' % (el,name)
+ # for ur we have only ranges (makes things simpler),
+ # so convert val to range
+ currange = (value, value)
+ # pos > 0 means this is a range, so isolate/validate
+ # the interval
+ else:
+ # split the range into it's first-val, last-val
+ (first, last) = string.split(el, "..")
+ # convert values from text into binary
+ if first[0:2] == '0x':
+ start = int(first[2:],16)
+ elif first[0] == "'":
+ start = ord(first[1])
+ else:
+ start = int(first)
+ if last[0:2] == '0x':
+ end = int(last[2:],16)
+ elif last[0] == "'":
+ end = ord(last[1])
+ else:
+ end = int(last)
+ if (start < 0) | (end > 0x1fffff) | (start > end):
+ raise ValidationError, "Invalid range '%s'" % el
+ currange = (start, end)
+ # common path - 'currange' has the range, now take care of it
+ # We split on single-byte values vs. multibyte
+ if currange[1] < 0x100: # single-byte
+ for ch in range(currange[0],currange[1]+1):
+ # validate that value not previously defined
+ if Functs[name][0][ch]:
+ msg = "Duplicate ch value '%s' for name '%s'" % (el, name)
+ raise ValidationError, msg
+ Functs[name][0][ch] = 1
+ else: # multi-byte
+ if Ranges.has_key(currange):
+ Ranges[currange].append(name)
+ else:
+ Ranges[currange] = [ name ]
+ if currange in Functs[name][1]:
+ raise ValidationError, "range already defined in" \
+ " function"
+ else:
+ Functs[name][1].append(currange)
+
+ except:
+ print "Failed to process line: %s" % (line)
+ raise
+#
+# At this point, the entire definition file has been processed. Now we
+# enter the output phase, where we generate the two files chvalid.c and'
+# chvalid.h
+#
+# To do this, we first output the 'static' data (heading, fixed
+# definitions, etc.), then output the 'dynamic' data (the results
+# of the above processing), and finally output closing 'static' data
+# (e.g. the subroutine to process the ranges)
+#
+
+#
+# Generate the headings:
+#
+try:
+ header = open("chvalid.h", "w")
+except:
+ print "Failed to open chvalid.h"
+ sys.exit(1)
+
+try:
+ output = open("chvalid.c", "w")
+except:
+ print "Failed to open chvalid.c"
+ sys.exit(1)
+
+date = time.asctime(time.localtime(time.time()))
+
+header.write(
+"""/*
+ * chvalid.h: this header exports interfaces for the character
+ * range validation APIs
+ *
+ * This file is automatically generated from the cvs source
+ * definition files using the genChRanges.py Python script
+ *
+ * Generation date: %s
+ * Sources: %s
+ * William Brack <wbrack@mmm.com.hk>
+ */
+
+#ifndef __XML_CHVALID_H__
+#define __XML_CHVALID_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Define our typedefs and structures
+ *
+ */
+typedef struct _xmlChSRange xmlChSRange;
+typedef xmlChSRange *xmlChSRangePtr;
+struct _xmlChSRange {
+ unsigned short low;
+ unsigned short high;
+};
+
+typedef struct _xmlChLRange xmlChLRange;
+typedef xmlChLRange *xmlChLRangePtr;
+struct _xmlChLRange {
+ unsigned low;
+ unsigned high;
+};
+
+typedef struct _xmlChRangeGroup xmlChRangeGroup;
+typedef xmlChRangeGroup *xmlChRangeGroupPtr;
+struct _xmlChRangeGroup {
+ int nbShortRange;
+ int nbLongRange;
+ xmlChSRangePtr shortRange; /* points to an array of ranges */
+ xmlChLRangePtr longRange;
+};
+
+/* Range checking routine */
+int xmlCharInRange(unsigned int val, const xmlChRangeGroupPtr group);
+
+""" % (date, sources));
+output.write(
+"""/*
+ * chvalid.c: this module implements the character range
+ * validation APIs
+ *
+ * This file is automatically generated from the cvs source
+ * definition files using the genChRanges.py Python script
+ *
+ * Generation date: %s
+ * Sources: %s
+ * William Brack <wbrack@mmm.com.hk>
+ */
+
+#include "chvalid.h"
+
+/*
+ * The initial tables ({func_name}_tab) are used to validate whether a
+ * single-byte character is within the specified group. Each table
+ * contains 256 bytes, with each byte representing one of the 256
+ * possible characters. If the table byte is set, the character is
+ * allowed.
+ *
+ */
+""" % (date, sources));
+
+#
+# Now output the generated data.
+# We try to produce the best execution times. Tests have shown that validation
+# with direct table lookup is, when there are a "small" number of valid items,
+# still not as fast as a sequence of inline compares. So, if the single-byte
+# portion of a range has a "small" number of ranges, we output a macro for inline
+# compares, otherwise we output a 256-byte table and a macro to use it.
+#
+
+fkeys = Functs.keys() # Dictionary of all defined functions
+fkeys.sort() # Put some order to our output
+
+for f in fkeys:
+
+# First we convert the specified single-byte values into a group of ranges.
+# If the total number of such ranges is less than minTableSize, we generate
+# an inline macro for direct comparisons; if greater, we generate a lookup
+# table.
+ if max(Functs[f][0]) > 0: # only check if at least one entry
+ rangeTable = makeRange(Functs[f][0])
+ numRanges = len(rangeTable)
+ if numRanges >= minTableSize: # table is worthwhile
+ header.write("extern unsigned char %s_tab[256];\n" % f)
+ header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))
+
+ # write the constant data to the code file
+ output.write("unsigned char %s_tab[256] = {\n" % f)
+ pline = " "
+ for n in range(255):
+ pline += " 0x%02x," % Functs[f][0][n]
+ if len(pline) > 72:
+ output.write(pline + "\n")
+ pline = " "
+ output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])
+
+ else: # inline check is used
+ # first another little optimisation - if space is present,
+ # put it at the front of the list so it is checked first
+ try:
+ ix = rangeTable.remove((0x20, 0x20))
+ rangeTable.insert(0, (0x20, 0x20))
+ except:
+ pass
+ pline = "#define %s_ch(c)\t( " % f
+ firstFlag = 1
+ for rg in rangeTable:
+ if not firstFlag:
+ pline += " || \\\n\t\t\t"
+ else:
+ firstFlag = 0
+ if rg[0] == rg[1]: # single value - check equal
+ pline += "((c) == " + hex(rg[0]) + ")"
+ else: # value range
+ pline += "((" + hex(rg[0]) + "<= (c)) &&"
+ pline += " ((c) <= " + hex(rg[1]) + "))"
+ pline += ")\n"
+ header.write(pline)
+
+ header.write("#define %s(c)\t(((c) < 0x100) ? \\\n\t\t\t\t" % f)
+ if max(Functs[f][0]) > 0:
+ header.write("%s_ch((c)) :" % f)
+ else:
+ header.write("0 :")
+
+ # if no ranges defined, value invalid if >= 0x100
+ if len(Functs[f][1]) == 0:
+ header.write(" 0)\n\n")
+ else:
+ header.write(" \\\n\t\t\t\txmlCharInRange((c), &%sGroup))\n\n" % f)
+
+ if len(Functs[f][1]) > 0:
+ header.write("extern xmlChRangeGroup %sGroup;\n" % f)
+
+
+#
+# Next we do the unicode ranges
+#
+
+for f in fkeys:
+ if len(Functs[f][1]) > 0: # only generate if unicode ranges present
+ rangeTable = Functs[f][1]
+ rangeTable.sort() # ascending tuple sequence
+ numShort = 0
+ numLong = 0
+ for rg in rangeTable:
+ if rg[1] < 0x10000: # if short value
+ if numShort == 0: # first occurence
+ pline = "static xmlChSRange %s_srng[] = { " % f
+ else:
+ pline += ", "
+ numShort += 1
+ if len(pline) > 60:
+ output.write(pline + "\n")
+ pline = " "
+ pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
+ else: # if long value
+ if numLong == 0: # first occurence
+ if numShort > 0: # if there were shorts, finish them off
+ output.write(pline + "};\n")
+ pline = "static xmlChLRange %s_lrng[] = { " % f
+ else:
+ pline += ", "
+ numLong += 1
+ if len(pline) > 60:
+ output.write(pline + "\n")
+ pline = " "
+ pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
+ output.write(pline + "};\n") # finish off last group
+
+ pline = "xmlChRangeGroup %sGroup = {%d, %d, " % (f, numShort, numLong)
+ if numShort > 0:
+ pline += "%s_srng" % f
+ if numLong > 0:
+ pline += ", %s_lrng" % f
+
+ output.write(pline + "};\n\n")
+#
+# Run complete - write trailers and close the output files
+#
+
+header.write("""
+#ifdef __cplusplus
+}
+#endif
+#endif /* __XML_CHVALID_H__ */
+""");
+
+header.close()
+
+output.write(
+"""
+int
+xmlCharInRange (unsigned int val, xmlChRangeGroupPtr rptr) {
+ int low, high, mid;
+ xmlChSRangePtr sptr;
+ xmlChLRangePtr lptr;
+ if (val < 0x10000) { /* is val in 'short' or 'long' array? */
+ if (rptr->nbShortRange == 0)
+ return 0;
+ low = 0;
+ high = rptr->nbShortRange;
+ sptr = rptr->shortRange;
+ while (low <= high) {
+ mid = (low + high) / 2;
+ if ((unsigned short) val < sptr[mid].low)
+ high = mid - 1;
+ else if ((unsigned short) val > sptr[mid].high)
+ low = mid + 1;
+ else
+ return 1;
+ }
+ } else {
+ if (rptr->nbLongRange == 0)
+ return 0;
+ low = 0;
+ high = rptr->nbLongRange;
+ lptr = rptr->longRange;
+ while (low <= high) {
+ mid = (low + high) / 2;
+ if (val < lptr[mid].low)
+ high = mid - 1;
+ else if (val > lptr[mid].high)
+ low = mid + 1;
+ else
+ return 1;
+ }
+ }
+ return 0;
+}
+
+""");
+
+output.close()
diff --git a/include/libxml/Makefile.am b/include/libxml/Makefile.am
index b9b7014c..1bf338ce 100644
--- a/include/libxml/Makefile.am
+++ b/include/libxml/Makefile.am
@@ -43,7 +43,8 @@ xmlinc_HEADERS = \
dict.h \
SAX2.h \
xmlexports.h \
- xmldwalk.h
+ xmldwalk.h \
+ chvalid.h
install-exec-hook:
$(mkinstalldirs) $(DESTDIR)$(xmlincdir)
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index 800cdef5..29560645 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -13,6 +13,7 @@
#include <libxml/xmlversion.h>
#include <libxml/parser.h>
#include <libxml/HTMLparser.h>
+#include <libxml/chvalid.h>
#ifdef __cplusplus
extern "C" {
@@ -48,8 +49,7 @@ extern "C" {
* [2] Char ::= #x9 | #xA | #xD | [#x20...]
* any byte character in the accepted range
*/
-#define IS_BYTE_CHAR(c) \
- (((c) >= 0x20) || ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D))
+#define IS_BYTE_CHAR(c) xmlIsChar_ch(c)
/**
* IS_CHAR:
@@ -61,11 +61,7 @@ extern "C" {
* | [#x10000-#x10FFFF]
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
*/
-#define IS_CHAR(c) \
- ((((c) >= 0x20) && ((c) <= 0xD7FF)) || \
- ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
- (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
- (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
+#define IS_CHAR(c) xmlIsChar(c)
/**
* IS_BLANK:
@@ -75,8 +71,7 @@ extern "C" {
*
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
*/
-#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || \
- ((c) == 0x0D))
+#define IS_BLANK(c) xmlIsBlank(c)
/**
* IS_BASECHAR:
@@ -193,15 +188,7 @@ XMLPUBVAR const xmlChar xmlStringComment[];
/*
* Function to finish the work of the macros where needed.
*/
-XMLPUBFUN int XMLCALL xmlIsBaseChar (int c);
-XMLPUBFUN int XMLCALL xmlIsBlank (int c);
-XMLPUBFUN int XMLCALL xmlIsPubidChar (int c);
-XMLPUBFUN int XMLCALL xmlIsLetter (int c);
-XMLPUBFUN int XMLCALL xmlIsDigit (int c);
-XMLPUBFUN int XMLCALL xmlIsIdeographic(int c);
-XMLPUBFUN int XMLCALL xmlIsExtender (int c);
-XMLPUBFUN int XMLCALL xmlIsCombining (int c);
-XMLPUBFUN int XMLCALL xmlIsChar (int c);
+XMLPUBFUN int XMLCALL xmlIsLetter (int c);
/**
* Parser context.
diff --git a/parserInternals.c b/parserInternals.c
index 562c5780..2d1bd697 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -53,6 +53,7 @@
#include <libxml/catalog.h>
#endif
#include <libxml/globals.h>
+#include <libxml/chvalid.h>
/*
* Various global defaults for parsing
@@ -196,472 +197,6 @@ xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
}
}
-/************************************************************************
- * *
- * Some functions to avoid too large macros *
- * *
- ************************************************************************/
-
-/**
- * xmlIsChar:
- * @c: an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
- * | [#x10000-#x10FFFF]
- * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
- * Also available as a macro IS_CHAR()
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsChar(int c) {
- return(
- ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
- (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
- (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
- (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
-}
-
-/**
- * xmlIsBlank:
- * @c: an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [3] S ::= (#x20 | #x9 | #xD | #xA)+
- * Also available as a macro IS_BLANK()
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsBlank(int c) {
- return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
-}
-
-static int xmlBaseArray[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
- 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
- 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
-};
-
-/**
- * xmlIsBaseChar:
- * @c: an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [85] BaseChar ::= ... long list see REC ...
- *
- * VI is your friend !
- * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
- * and
- * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsBaseChar(int c) {
- if (c < 0x0100) return(xmlBaseArray[c]);
- return((((c) >= 0x0100) && ((c) <= 0x0131)) ||
- (((c) >= 0x0134) && ((c) <= 0x013E)) ||
- (((c) >= 0x0141) && ((c) <= 0x0148)) ||
- (((c) >= 0x014A) && ((c) <= 0x017E)) ||
- (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
- (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
- (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
- (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
- (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
- (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
- ((c) == 0x0386) ||
- (((c) >= 0x0388) && ((c) <= 0x038A)) ||
- ((c) == 0x038C) ||
- (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
- (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
- (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
- ((c) == 0x03DA) ||
- ((c) == 0x03DC) ||
- ((c) == 0x03DE) ||
- ((c) == 0x03E0) ||
- (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
- (((c) >= 0x0401) && ((c) <= 0x040C)) ||
- (((c) >= 0x040E) && ((c) <= 0x044F)) ||
- (((c) >= 0x0451) && ((c) <= 0x045C)) ||
- (((c) >= 0x045E) && ((c) <= 0x0481)) ||
- (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
- (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
- (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
- (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
- (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
- (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
- (((c) >= 0x0531) && ((c) <= 0x0556)) ||
- ((c) == 0x0559) ||
- (((c) >= 0x0561) && ((c) <= 0x0586)) ||
- (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
- (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
- (((c) >= 0x0621) && ((c) <= 0x063A)) ||
- (((c) >= 0x0641) && ((c) <= 0x064A)) ||
- (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
- (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
- (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
- (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
- ((c) == 0x06D5) ||
- (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
- (((c) >= 0x905) && ( /* accelerator */
- (((c) >= 0x0905) && ((c) <= 0x0939)) ||
- ((c) == 0x093D) ||
- (((c) >= 0x0958) && ((c) <= 0x0961)) ||
- (((c) >= 0x0985) && ((c) <= 0x098C)) ||
- (((c) >= 0x098F) && ((c) <= 0x0990)) ||
- (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
- (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
- ((c) == 0x09B2) ||
- (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
- (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
- (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
- (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
- (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
- (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
- (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
- (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
- (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
- (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
- (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
- (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
- ((c) == 0x0A5E) ||
- (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
- (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
- ((c) == 0x0A8D) ||
- (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
- (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
- (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
- (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
- (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
- ((c) == 0x0ABD) ||
- ((c) == 0x0AE0) ||
- (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
- (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
- (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
- (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
- (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
- (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
- ((c) == 0x0B3D) ||
- (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
- (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
- (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
- (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
- (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
- (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
- ((c) == 0x0B9C) ||
- (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
- (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
- (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
- (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
- (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
- (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
- (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
- (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
- (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
- (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
- (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
- (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
- (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
- (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
- (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
- (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
- ((c) == 0x0CDE) ||
- (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
- (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
- (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
- (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
- (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
- (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
- (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
- ((c) == 0x0E30) ||
- (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
- (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
- (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
- ((c) == 0x0E84) ||
- (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
- ((c) == 0x0E8A) ||
- ((c) == 0x0E8D) ||
- (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
- (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
- (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
- ((c) == 0x0EA5) ||
- ((c) == 0x0EA7) ||
- (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
- (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
- ((c) == 0x0EB0) ||
- (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
- ((c) == 0x0EBD) ||
- (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
- (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
- (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
- (((c) >= 0x10A0) && ( /* accelerator */
- (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
- (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
- ((c) == 0x1100) ||
- (((c) >= 0x1102) && ((c) <= 0x1103)) ||
- (((c) >= 0x1105) && ((c) <= 0x1107)) ||
- ((c) == 0x1109) ||
- (((c) >= 0x110B) && ((c) <= 0x110C)) ||
- (((c) >= 0x110E) && ((c) <= 0x1112)) ||
- ((c) == 0x113C) ||
- ((c) == 0x113E) ||
- ((c) == 0x1140) ||
- ((c) == 0x114C) ||
- ((c) == 0x114E) ||
- ((c) == 0x1150) ||
- (((c) >= 0x1154) && ((c) <= 0x1155)) ||
- ((c) == 0x1159) ||
- (((c) >= 0x115F) && ((c) <= 0x1161)) ||
- ((c) == 0x1163) ||
- ((c) == 0x1165) ||
- ((c) == 0x1167) ||
- ((c) == 0x1169) ||
- (((c) >= 0x116D) && ((c) <= 0x116E)) ||
- (((c) >= 0x1172) && ((c) <= 0x1173)) ||
- ((c) == 0x1175) ||
- ((c) == 0x119E) ||
- ((c) == 0x11A8) ||
- ((c) == 0x11AB) ||
- (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
- (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
- ((c) == 0x11BA) ||
- (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
- ((c) == 0x11EB) ||
- ((c) == 0x11F0) ||
- ((c) == 0x11F9) ||
- (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
- (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
- (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
- (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
- (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
- (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
- (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
- ((c) == 0x1F59) ||
- ((c) == 0x1F5B) ||
- ((c) == 0x1F5D) ||
- (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
- (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
- (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
- ((c) == 0x1FBE) ||
- (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
- (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
- (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
- (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
- (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
- (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
- (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
- ((c) == 0x2126) ||
- (((c) >= 0x212A) && ((c) <= 0x212B)) ||
- ((c) == 0x212E) ||
- (((c) >= 0x2180) && ((c) <= 0x2182)) ||
- (((c) >= 0x3041) && ((c) <= 0x3094)) ||
- (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
- (((c) >= 0x3105) && ((c) <= 0x312C)) ||
- (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */
- ))));
-}
-
-/**
- * xmlIsDigit:
- * @c: an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [88] Digit ::= ... long list see REC ...
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsDigit(int c) {
- return(
- (((c) >= 0x0030) && ((c) <= 0x0039)) ||
- (((c) >= 0x660) && ( /* accelerator */
- (((c) >= 0x0660) && ((c) <= 0x0669)) ||
- (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
- (((c) >= 0x0966) && ((c) <= 0x096F)) ||
- (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
- (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
- (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
- (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
- (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
- (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
- (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
- (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
- (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
- (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
- (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
-}
-
-/**
- * xmlIsCombining:
- * @c: an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [87] CombiningChar ::= ... long list see REC ...
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsCombining(int c) {
- return(
- (((c) >= 0x300) && ( /* accelerator */
- (((c) >= 0x0300) && ((c) <= 0x0345)) ||
- (((c) >= 0x0360) && ((c) <= 0x0361)) ||
- (((c) >= 0x0483) && ((c) <= 0x0486)) ||
- (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
- (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
- (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
- ((c) == 0x05BF) ||
- (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
- ((c) == 0x05C4) ||
- (((c) >= 0x064B) && ((c) <= 0x0652)) ||
- ((c) == 0x0670) ||
- (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
- (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
- (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
- (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
- (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
- (((c) >= 0x0901) && ( /* accelerator */
- (((c) >= 0x0901) && ((c) <= 0x0903)) ||
- ((c) == 0x093C) ||
- (((c) >= 0x093E) && ((c) <= 0x094C)) ||
- ((c) == 0x094D) ||
- (((c) >= 0x0951) && ((c) <= 0x0954)) ||
- (((c) >= 0x0962) && ((c) <= 0x0963)) ||
- (((c) >= 0x0981) && ((c) <= 0x0983)) ||
- ((c) == 0x09BC) ||
- ((c) == 0x09BE) ||
- ((c) == 0x09BF) ||
- (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
- (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
- (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
- ((c) == 0x09D7) ||
- (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
- (((c) >= 0x0A02) && ( /* accelerator */
- ((c) == 0x0A02) ||
- ((c) == 0x0A3C) ||
- ((c) == 0x0A3E) ||
- ((c) == 0x0A3F) ||
- (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
- (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
- (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
- (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
- (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
- ((c) == 0x0ABC) ||
- (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
- (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
- (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
- (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
- ((c) == 0x0B3C) ||
- (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
- (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
- (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
- (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
- (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
- (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
- (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
- (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
- ((c) == 0x0BD7) ||
- (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
- (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
- (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
- (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
- (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
- (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
- (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
- (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
- (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
- (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
- (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
- (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
- (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
- (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
- ((c) == 0x0D57) ||
- (((c) >= 0x0E31) && ( /* accelerator */
- ((c) == 0x0E31) ||
- (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
- (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
- ((c) == 0x0EB1) ||
- (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
- (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
- (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
- (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
- ((c) == 0x0F35) ||
- ((c) == 0x0F37) ||
- ((c) == 0x0F39) ||
- ((c) == 0x0F3E) ||
- ((c) == 0x0F3F) ||
- (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
- (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
- (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
- ((c) == 0x0F97) ||
- (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
- (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
- ((c) == 0x0FB9) ||
- (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
- ((c) == 0x20E1) ||
- (((c) >= 0x302A) && ((c) <= 0x302F)) ||
- ((c) == 0x3099) ||
- ((c) == 0x309A))))))))));
-}
-
-/**
- * xmlIsExtender:
- * @c: an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
- * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
- * [#x309D-#x309E] | [#x30FC-#x30FE]
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsExtender(int c) {
- switch (c) {
- case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
- case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
- case 0x3031: case 0x3032: case 0x3033: case 0x3034:
- case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
- case 0x30FD: case 0x30FE:
- return 1;
- default:
- return 0;
- }
-}
-
-/**
- * xmlIsIdeographic:
- * @c: an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsIdeographic(int c) {
- return(((c) < 0x0100) ? 0 :
- (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
- (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
- (((c) >= 0x3021) && ((c) <= 0x3029)) ||
- ((c) == 0x3007));
-}
-
/**
* xmlIsLetter:
* @c: an unicode character (int)
@@ -676,29 +211,6 @@ xmlIsLetter(int c) {
return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
}
-/**
- * xmlIsPubidChar:
- * @c: an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsPubidChar(int c) {
- return(
- ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
- (((c) >= 'a') && ((c) <= 'z')) ||
- (((c) >= 'A') && ((c) <= 'Z')) ||
- (((c) >= '0') && ((c) <= '9')) ||
- ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
- ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
- ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
- ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
- ((c) == '$') || ((c) == '_') || ((c) == '%'));
-}
-
/************************************************************************
* *
* Input handling functions for progressive parsing *