#include "XMLCharacterClasses.h" #include "UnicodeCharacters.h" const wchar_t base_char_ranges[][2] = { { 0x0041, 0x005A }, { 0x0061, 0x007A }, { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, { 0x00F8, 0x00FF }, { 0x0100, 0x0131 }, { 0x0134, 0x013E }, { 0x0141, 0x0148 }, { 0x014A, 0x017E }, { 0x0180, 0x01C3 }, { 0x01CD, 0x01F0 }, { 0x01F4, 0x01F5 }, { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 }, { 0x02BB, 0x02C1 }, { 0x0386, 0x0386 }, { 0x0388, 0x038A }, { 0x038C, 0x038C }, { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 }, { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE }, { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 }, { 0x0401, 0x040C }, { 0x040E, 0x044F }, { 0x0451, 0x045C }, { 0x045E, 0x0481 }, { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 }, { 0x04CB, 0x04CC }, { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 }, { 0x04F8, 0x04F9 }, { 0x0531, 0x0556 }, { 0x0559, 0x0559 }, { 0x0561, 0x0586 }, { 0x05D0, 0x05EA }, { 0x05F0, 0x05F2 }, { 0x0621, 0x063A }, { 0x0641, 0x064A }, { 0x0671, 0x06B7 }, { 0x06BA, 0x06BE }, { 0x06C0, 0x06CE }, { 0x06D0, 0x06D3 }, { 0x06D5, 0x06D5 }, { 0x06E5, 0x06E6 }, { 0x0905, 0x0939 }, { 0x093D, 0x093D }, { 0x0958, 0x0961 }, { 0x0985, 0x098C }, { 0x098F, 0x0990 }, { 0x0993, 0x09A8 }, { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 }, { 0x09B6, 0x09B9 }, { 0x09DC, 0x09DD }, { 0x09DF, 0x09E1 }, { 0x09F0, 0x09F1 }, { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 }, { 0x0A13, 0x0A28 }, { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 }, { 0x0A35, 0x0A36 }, { 0x0A38, 0x0A39 }, { 0x0A59, 0x0A5C }, { 0x0A5E, 0x0A5E }, { 0x0A72, 0x0A74 }, { 0x0A85, 0x0A8B }, { 0x0A8D, 0x0A8D }, { 0x0A8F, 0x0A91 }, { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 }, { 0x0AB2, 0x0AB3 }, { 0x0AB5, 0x0AB9 }, { 0x0ABD, 0x0ABD }, { 0x0AE0, 0x0AE0 }, { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 }, { 0x0B13, 0x0B28 }, { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 }, { 0x0B36, 0x0B39 }, { 0x0B3D, 0x0B3D }, { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 }, { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 }, { 0x0B92, 0x0B95 }, { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C }, { 0x0B9E, 0x0B9F }, { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA }, { 0x0BAE, 0x0BB5 }, { 0x0BB7, 0x0BB9 }, { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, { 0x0C12, 0x0C28 }, { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 }, { 0x0C60, 0x0C61 }, { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 }, { 0x0C92, 0x0CA8 }, { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 }, { 0x0CDE, 0x0CDE }, { 0x0CE0, 0x0CE1 }, { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 }, { 0x0D12, 0x0D28 }, { 0x0D2A, 0x0D39 }, { 0x0D60, 0x0D61 }, { 0x0E01, 0x0E2E }, { 0x0E30, 0x0E30 }, { 0x0E32, 0x0E33 }, { 0x0E40, 0x0E45 }, { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E88 }, { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D }, { 0x0E94, 0x0E97 }, { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 }, { 0x0EA5, 0x0EA5 }, { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAB }, { 0x0EAD, 0x0EAE }, { 0x0EB0, 0x0EB0 }, { 0x0EB2, 0x0EB3 }, { 0x0EBD, 0x0EBD }, { 0x0EC0, 0x0EC4 }, { 0x0F40, 0x0F47 }, { 0x0F49, 0x0F69 }, { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 }, { 0x1100, 0x1100 }, { 0x1102, 0x1103 }, { 0x1105, 0x1107 }, { 0x1109, 0x1109 }, { 0x110B, 0x110C }, { 0x110E, 0x1112 }, { 0x113C, 0x113C }, { 0x113E, 0x113E }, { 0x1140, 0x1140 }, { 0x114C, 0x114C }, { 0x114E, 0x114E }, { 0x1150, 0x1150 }, { 0x1154, 0x1155 }, { 0x1159, 0x1159 }, { 0x115F, 0x1161 }, { 0x1163, 0x1163 }, { 0x1165, 0x1165 }, { 0x1167, 0x1167 }, { 0x1169, 0x1169 }, { 0x116D, 0x116E }, { 0x1172, 0x1173 }, { 0x1175, 0x1175 }, { 0x119E, 0x119E }, { 0x11A8, 0x11A8 }, { 0x11AB, 0x11AB }, { 0x11AE, 0x11AF }, { 0x11B7, 0x11B8 }, { 0x11BA, 0x11BA }, { 0x11BC, 0x11C2 }, { 0x11EB, 0x11EB }, { 0x11F0, 0x11F0 }, { 0x11F9, 0x11F9 }, { 0x1E00, 0x1E9B }, { 0x1EA0, 0x1EF9 }, { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 }, { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 }, { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D }, { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC }, { 0x1FBE, 0x1FBE }, { 0x1FC2, 0x1FC4 }, { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 }, { 0x1FD6, 0x1FDB }, { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 }, { 0x1FF6, 0x1FFC }, { 0x2126, 0x2126 }, { 0x212A, 0x212B }, { 0x212E, 0x212E }, { 0x2180, 0x2182 }, { 0x3041, 0x3094 }, { 0x30A1, 0x30FA }, { 0x3105, 0x312C }, { 0xAC00, 0xD7A3 }, { 0, 0 } }; // base_char_ranges bool Arabica::XML::is_char(wchar_t c) { return (c == Unicode::HORIZONTAL_TABULATION) || (c == Unicode::LINE_FEED) || (c == Unicode::CARRIAGE_RETURN) || ((c >= 0x0020) && (c <= 0xD7FF)) || ((c >= 0xE000) && (c <= 0xFFFD)) || ((c >= 0x10000) && (c <= 0x10FFFF)); } // is_char bool Arabica::XML::is_space(wchar_t c) { return (c == Unicode::SPACE) || (c == Unicode::HORIZONTAL_TABULATION) || (c == Unicode::CARRIAGE_RETURN) || (c == Unicode::LINE_FEED); } // is_space bool Arabica::XML::is_name_char(wchar_t c) { return is_letter(c) || is_digit(c) || (c == Unicode::FULL_STOP) || // . (c == Unicode::HYPHEN_MINUS) || // - (c == Unicode::LOW_LINE) || // _ (c == Unicode::COLON) || // : is_combining_char(c) || is_extender(c); } // is_identifier bool Arabica::XML::is_letter(wchar_t c) { return is_base_char(c) || is_ideographic(c); } // is_letter bool Arabica::XML::is_base_char(wchar_t c) { for(int i=0; base_char_ranges[i][0]; ++i) { if(c < base_char_ranges[i][0]) return false; if((c >= base_char_ranges[i][0]) && (c <= base_char_ranges[i][1])) return true; } // for ... return false; } // is_base_char bool Arabica::XML::is_ideographic(wchar_t c) { return ((c >= 0x4E00) && (c <= 0x9FA5)) || c == 0x3007 || ((c >= 0x3021) && (c <= 0x3029)); } // is_ideographic bool Arabica::XML::is_digit(wchar_t c) { return ((c >= 0x0030) && (c <= 0x0039)) || ((c >= 0x0660) && (c <= 0x0669)) || ((c >= 0x06F0) && (c <= 0x06F9)) || ((c >= 0x0966) && (c <= 0x096F)) || ((c >= 0x09E6) && (c <= 0x09EF)) || ((c >= 0x0A66) && (c <= 0x0A6F)) || ((c >= 0x0AE6) && (c <= 0x0AEF)) || ((c >= 0x0B66) && (c <= 0x0B6F)) || ((c >= 0x0BE7) && (c <= 0x0BEF)) || ((c >= 0x0C66) && (c <= 0x0C6F)) || ((c >= 0x0CE6) && (c <= 0x0CEF)) || ((c >= 0x0D66) && (c <= 0x0D6F)) || ((c >= 0x0E50) && (c <= 0x0E59)) || ((c >= 0x0ED0) && (c <= 0x0ED9)) || ((c >= 0x0F20) && (c <= 0x0F29)); } // is_digit bool Arabica::XML::is_combining_char(wchar_t c) { return ((c >= 0x0300) && (c <= 0x0345)) || ((c >= 0x0360) && (c <= 0x0361)) || ((c >= 0x0483) && (c <= 0x0486)) || ((c >= 0x0591) && (c <= 0x05A1)) || ((c >= 0x05A3) && (c <= 0x05B9)) || ((c >= 0x05BB) && (c <= 0x05BD)) || (c == 0x05BF) || ((c >= 0x05C1) && (c <= 0x05C2)) || (c == 0x05C4) || ((c >= 0x064B) && (c <= 0x0652)) || (c == 0x0670) || ((c >= 0x06D6) && (c <= 0x06DC)) || ((c >= 0x06DD) && (c <= 0x06DF)) || ((c >= 0x06E0) && (c <= 0x06E4)) || ((c >= 0x06E7) && (c <= 0x06E8)) || ((c >= 0x06EA) && (c <= 0x06ED)) || ((c >= 0x0901) && (c <= 0x0903)) || (c == 0x093C) || ((c >= 0x093E) && (c <= 0x094C)) || (c == 0x094D) || ((c >= 0x0951) && (c <= 0x0954)) || ((c >= 0x0962) && (c <= 0x0963)) || ((c >= 0x0981) && (c <= 0x0983)) || (c == 0x09BC) || (c == 0x09BE) || (c == 0x09BF) || ((c >= 0x09C0) && (c <= 0x09C4)) || ((c >= 0x09C7) && (c <= 0x09C8)) || ((c >= 0x09CB) && (c <= 0x09CD)) || (c == 0x09D7) || ((c >= 0x09E2) && (c <= 0x09E3)) || (c == 0x0A02) || (c == 0x0A3C) || (c == 0x0A3E) || (c == 0x0A3F) || ((c >= 0x0A40) && (c <= 0x0A42)) || ((c >= 0x0A47) && (c <= 0x0A48)) || ((c >= 0x0A4B) && (c <= 0x0A4D)) || ((c >= 0x0A70) && (c <= 0x0A71)) || ((c >= 0x0A81) && (c <= 0x0A83)) || (c == 0x0ABC) || ((c >= 0x0ABE) && (c <= 0x0AC5)) || ((c >= 0x0AC7) && (c <= 0x0AC9)) || ((c >= 0x0ACB) && (c <= 0x0ACD)) || ((c >= 0x0B01) && (c <= 0x0B03)) || (c == 0x0B3C) || ((c >= 0x0B3E) && (c <= 0x0B43)) || ((c >= 0x0B47) && (c <= 0x0B48)) || ((c >= 0x0B4B) && (c <= 0x0B4D)) || ((c >= 0x0B56) && (c <= 0x0B57)) || ((c >= 0x0B82) && (c <= 0x0B83)) || ((c >= 0x0BBE) && (c <= 0x0BC2)) || ((c >= 0x0BC6) && (c <= 0x0BC8)) || ((c >= 0x0BCA) && (c <= 0x0BCD)) || (c == 0x0BD7) || ((c >= 0x0C01) && (c <= 0x0C03)) || ((c >= 0x0C3E) && (c <= 0x0C44)) || ((c >= 0x0C46) && (c <= 0x0C48)) || ((c >= 0x0C4A) && (c <= 0x0C4D)) || ((c >= 0x0C55) && (c <= 0x0C56)) || ((c >= 0x0C82) && (c <= 0x0C83)) || ((c >= 0x0CBE) && (c <= 0x0CC4)) || ((c >= 0x0CC6) && (c <= 0x0CC8)) || ((c >= 0x0CCA) && (c <= 0x0CCD)) || ((c >= 0x0CD5) && (c <= 0x0CD6)) || ((c >= 0x0D02) && (c <= 0x0D03)) || ((c >= 0x0D3E) && (c <= 0x0D43)) || ((c >= 0x0D46) && (c <= 0x0D48)) || ((c >= 0x0D4A) && (c <= 0x0D4D)) || (c == 0x0D57) || (c == 0x0E31) || ((c >= 0x0E34) && (c <= 0x0E3A)) || ((c >= 0x0E47) && (c <= 0x0E4E)) || (c == 0x0EB1) || ((c >= 0x0EB4) && (c <= 0x0EB9)) || ((c >= 0x0EBB) && (c <= 0x0EBC)) || ((c >= 0x0EC8) && (c <= 0x0ECD)) || ((c >= 0x0F18) && (c <= 0x0F19)) || (c == 0x0F35) || (c == 0x0F37) || (c == 0x0F39) || (c == 0x0F3E) || (c == 0x0F3F) || ((c >= 0x0F71) && (c <= 0x0F84)) || ((c >= 0x0F86) && (c <= 0x0F8B)) || ((c >= 0x0F90) && (c <= 0x0F95)) || (c == 0x0F97) || ((c >= 0x0F99) && (c <= 0x0FAD)) || ((c >= 0x0FB1) && (c <= 0x0FB7)) || (c == 0x0FB9) || ((c >= 0x20D0) && (c <= 0x20DC)) || (c == 0x20E1) || ((c >= 0x302A) && (c <= 0x302F)) || (c == 0x3099) || (c == 0x309A); } // is_combining bool Arabica::XML::is_extender(wchar_t c) { return (c == 0x00B7) || (c == 0x02D0) || (c == 0x02D1) || (c == 0x0387) || (c == 0x0640) || (c == 0x0E46) || (c == 0x0EC6) || (c == 0x3005) || ((c >= 0x3031) && (c <= 0x3035)) || ((c >= 0x309D) && (c <= 0x309E)) || ((c >= 0x30FC) && (c <= 0x30FE)); } // is_extender // end of file