40 { PREFIX( prologTok ), PREFIX( contentTok ), PREFIX( cdataSectionTok ) }, \
41 { PREFIX( attributeValueTok ), PREFIX( entityValueTok ) }, PREFIX( sameName ), \
42 PREFIX( nameMatchesAscii ), PREFIX( nameLength ), PREFIX( skipS ), PREFIX( getAtts ), \
43 PREFIX( charRefNumber ), PREFIX( predefinedEntityName ), PREFIX( updatePosition ), \
46#define VTABLE VTABLE1, PREFIX( toUtf8 ), PREFIX( toUtf16 )
48#define UCS2_GET_NAMING( pages, hi, lo ) \
49 ( namingBitmap[( pages[hi] << 3 ) + ( ( lo ) >> 5 )] & ( 1 << ( ( lo ) & 0x1F ) ) )
55#define UTF8_GET_NAMING2( pages, byte ) \
56 ( namingBitmap[( ( pages )[( ( ( byte )[0] ) >> 2 ) & 7] << 3 ) + \
57 ( ( ( ( byte )[0] ) & 3 ) << 1 ) + ( ( ( ( byte )[1] ) >> 5 ) & 1 )] & \
58 ( 1 << ( ( ( byte )[1] ) & 0x1F ) ) )
64#define UTF8_GET_NAMING3( pages, byte ) \
65 ( namingBitmap[( ( pages )[( ( ( ( byte )[0] ) & 0xF ) << 4 ) + \
66 ( ( ( ( byte )[1] ) >> 2 ) & 0xF )] \
68 ( ( ( ( byte )[1] ) & 3 ) << 1 ) + ( ( ( ( byte )[2] ) >> 5 ) & 1 )] & \
69 ( 1 << ( ( ( byte )[2] ) & 0x1F ) ) )
71#define UTF8_GET_NAMING( pages, p, n ) \
73 ? UTF8_GET_NAMING2( pages, (const unsigned char*)( p ) ) \
74 : ( ( n ) == 3 ? UTF8_GET_NAMING3( pages, (const unsigned char*)( p ) ) : 0 ) )
76#define UTF8_INVALID3( p ) \
78 ? ( ( ( p )[1] & 0x20 ) != 0 ) \
79 : ( ( *p ) == 0xEF ? ( ( p )[1] == 0xBF && ( ( p )[2] == 0xBF || ( p )[2] == 0xBE ) ) \
82#define UTF8_INVALID4( p ) ( ( *p ) == 0xF4 && ( ( p )[1] & 0x30 ) != 0 )
84static int isNever(
const ENCODING* enc,
const char* p ) {
return 0; }
86static int utf8_isName2(
const ENCODING* enc,
const char* p ) {
90static int utf8_isName3(
const ENCODING* enc,
const char* p ) {
94#define utf8_isName4 isNever
96static int utf8_isNmstrt2(
const ENCODING* enc,
const char* p ) {
100static int utf8_isNmstrt3(
const ENCODING* enc,
const char* p ) {
104#define utf8_isNmstrt4 isNever
106#define utf8_isInvalid2 isNever
108static int utf8_isInvalid3(
const ENCODING* enc,
const char* p ) {
112static int utf8_isInvalid4(
const ENCODING* enc,
const char* p ) {
120 int ( *byteType )(
const ENCODING*,
const char* );
121 int ( *isNameMin )(
const ENCODING*,
const char* );
122 int ( *isNmstrtMin )(
const ENCODING*,
const char* );
123 int ( *byteToAscii )(
const ENCODING*,
const char* );
124 int ( *charMatches )(
const ENCODING*,
const char*, int );
126 int ( *
isName2 )(
const ENCODING*,
const char* );
127 int ( *
isName3 )(
const ENCODING*,
const char* );
128 int ( *
isName4 )(
const ENCODING*,
const char* );
139# define STANDARD_VTABLE( E ) \
140 E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches,
144# define STANDARD_VTABLE( E )
148#define NORMAL_VTABLE( E ) \
149 E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3, E##isNmstrt4, \
150 E##isInvalid2, E##isInvalid3, E##isInvalid4
152static int checkCharRefNumber(
int );
154#include "xmltok_impl.h"
157# define sb_isNameMin isNever
158# define sb_isNmstrtMin isNever
162# define MINBPC( enc ) ( ( enc )->minBytesPerChar )
165# define MINBPC( enc ) 1
168#define SB_BYTE_TYPE( enc, p ) \
169 ( ( (struct normal_encoding*)( enc ) )->type[(unsigned char)*( p )] )
172static int sb_byteType(
const ENCODING* enc,
const char* p ) {
return SB_BYTE_TYPE( enc, p ); }
173# define BYTE_TYPE( enc, p ) \
174 ( ( (const struct normal_encoding*)( enc ) )->byteType( enc, p ) )
176# define BYTE_TYPE( enc, p ) SB_BYTE_TYPE( enc, p )
180# define BYTE_TO_ASCII( enc, p ) \
181 ( ( (const struct normal_encoding*)( enc ) )->byteToAscii( enc, p ) )
182static int sb_byteToAscii(
const ENCODING* enc,
const char* p ) {
return *p; }
184# define BYTE_TO_ASCII( enc, p ) ( *p )
187#define IS_NAME_CHAR( enc, p, n ) \
188 ( ( (const struct normal_encoding*)( enc ) )->isName##n( enc, p ) )
189#define IS_NMSTRT_CHAR( enc, p, n ) \
190 ( ( (const struct normal_encoding*)( enc ) )->isNmstrt##n( enc, p ) )
191#define IS_INVALID_CHAR( enc, p, n ) \
192 ( ( (const struct normal_encoding*)( enc ) )->isInvalid##n( enc, p ) )
195# define IS_NAME_CHAR_MINBPC( enc, p ) \
196 ( ( (const struct normal_encoding*)( enc ) )->isNameMin( enc, p ) )
197# define IS_NMSTRT_CHAR_MINBPC( enc, p ) \
198 ( ( (const struct normal_encoding*)( enc ) )->isNmstrtMin( enc, p ) )
200# define IS_NAME_CHAR_MINBPC( enc, p ) ( 0 )
201# define IS_NMSTRT_CHAR_MINBPC( enc, p ) ( 0 )
205# define CHAR_MATCHES( enc, p, c ) \
206 ( ( (const struct normal_encoding*)( enc ) )->charMatches( enc, p, c ) )
207static int sb_charMatches(
const ENCODING* enc,
const char* p,
int c ) {
return *p == c; }
210# define CHAR_MATCHES( enc, p, c ) ( *( p ) == c )
213#define PREFIX( ident ) normal_##ident
214#include "xmltok_impl.c"
221#undef IS_NAME_CHAR_MINBPC
223#undef IS_NMSTRT_CHAR_MINBPC
224#undef IS_INVALID_CHAR
233static void utf8_toUtf8(
const ENCODING* enc,
const char** fromP,
const char* fromLim,
234 char** toP,
const char* toLim ) {
237 if ( fromLim - *fromP > toLim - *toP )
240 for ( fromLim = *fromP + ( toLim - *toP ); fromLim > *fromP; fromLim-- )
241 if ( ( (
unsigned char)fromLim[-1] & 0xc0 ) != 0x80 )
break;
243 for ( to = *toP, from = *fromP; from != fromLim; from++, to++ ) *to = *from;
248static void utf8_toUtf16(
const ENCODING* enc,
const char** fromP,
const char* fromLim,
249 unsigned short** toP,
const unsigned short* toLim ) {
250 unsigned short* to = *toP;
251 const char* from = *fromP;
252 while ( from != fromLim && to != toLim )
257 *to++ = ( ( from[0] & 0x1f ) << 6 ) | ( from[1] & 0x3f );
261 *to++ = ( ( from[0] & 0xf ) << 12 ) | ( ( from[1] & 0x3f ) << 6 ) | ( from[2] & 0x3f );
266 if ( to + 1 == toLim )
break;
267 n = ( ( from[0] & 0x7 ) << 18 ) | ( ( from[1] & 0x3f ) << 12 ) |
268 ( ( from[2] & 0x3f ) << 6 ) | ( from[3] & 0x3f );
270 to[0] = (
unsigned short)( (
n >> 10 ) | 0xD800 );
271 to[1] = (
unsigned short)( (
n & 0x3FF ) | 0xDC00 );
276 default: *to++ = *from++;
break;
285 {
VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
287# include "asciitab.h"
294 {
VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
296#define BT_COLON BT_NMSTRT
306 {
VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
308# include "iasciitab.h"
316 {
VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
318#define BT_COLON BT_NMSTRT
319#include "iasciitab.h"
325static void latin1_toUtf8(
const ENCODING*
enc,
const char** fromP,
const char* fromLim,
326 char** toP,
const char* toLim ) {
330 if ( *fromP == fromLim )
break;
331 c = (
unsigned char)**fromP;
334 if ( toLim - *toP < 2 )
break;
336 *( *toP )++ = ( ( c & 0x3f ) | 0x80 );
341 if ( *toP == toLim )
break;
342 *( *toP )++ = *( *fromP )++;
347static void latin1_toUtf16(
const ENCODING*
enc,
const char** fromP,
const char* fromLim,
348 unsigned short** toP,
const unsigned short* toLim ) {
349 while ( *fromP != fromLim && *toP != toLim ) *( *toP )++ = (
unsigned char)*( *fromP )++;
355 {
VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
357# include "asciitab.h"
358# include "latin1tab.h"
365 {
VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
367#define BT_COLON BT_NMSTRT
370#include "latin1tab.h"
374static void ascii_toUtf8(
const ENCODING*
enc,
const char** fromP,
const char* fromLim,
375 char** toP,
const char* toLim ) {
376 while ( *fromP != fromLim && *toP != toLim ) *( *toP )++ = *( *fromP )++;
382 {
VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
384# include "asciitab.h"
392 {
VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
394#define BT_COLON BT_NMSTRT
401static int unicode_byte_type(
char hi,
char lo ) {
402 switch ( (
unsigned char)hi )
407 case 0xDB:
return BT_LEAD4;
411 case 0xDF:
return BT_TRAIL;
413 switch ( (
unsigned char)lo )
416 case 0xFE:
return BT_NONXML;
423#define DEFINE_UTF16_TO_UTF8( E ) \
424 static void E##toUtf8( const ENCODING* enc, const char** fromP, const char* fromLim, \
425 char** toP, const char* toLim ) { \
427 for ( from = *fromP; from != fromLim; from += 2 ) \
431 unsigned char lo = GET_LO( from ); \
432 unsigned char hi = GET_HI( from ); \
438 if ( *toP == toLim ) \
454 if ( toLim - *toP < 2 ) \
459 *( *toP )++ = ( ( lo >> 6 ) | ( hi << 2 ) | UTF8_cval2 ); \
460 *( *toP )++ = ( ( lo & 0x3f ) | 0x80 ); \
463 if ( toLim - *toP < 3 ) \
469 *( *toP )++ = ( ( hi >> 4 ) | UTF8_cval3 ); \
470 *( *toP )++ = ( ( ( hi & 0xf ) << 2 ) | ( lo >> 6 ) | 0x80 ); \
471 *( *toP )++ = ( ( lo & 0x3f ) | 0x80 ); \
477 if ( toLim - *toP < 4 ) \
482 plane = ( ( ( hi & 0x3 ) << 2 ) | ( ( lo >> 6 ) & 0x3 ) ) + 1; \
483 *( *toP )++ = ( ( plane >> 2 ) | UTF8_cval4 ); \
484 *( *toP )++ = ( ( ( lo >> 2 ) & 0xF ) | ( ( plane & 0x3 ) << 4 ) | 0x80 ); \
486 lo2 = GET_LO( from ); \
487 *( *toP )++ = ( ( ( lo & 0x3 ) << 4 ) | ( ( GET_HI( from ) & 0x3 ) << 2 ) | \
488 ( lo2 >> 6 ) | 0x80 ); \
489 *( *toP )++ = ( ( lo2 & 0x3f ) | 0x80 ); \
496#define DEFINE_UTF16_TO_UTF16( E ) \
497 static void E##toUtf16( const ENCODING* enc, const char** fromP, const char* fromLim, \
498 unsigned short** toP, const unsigned short* toLim ) { \
500 if ( fromLim - *fromP > ( ( toLim - *toP ) << 1 ) && \
501 ( GET_HI( fromLim - 2 ) & 0xF8 ) == 0xD8 ) \
503 for ( ; *fromP != fromLim && *toP != toLim; *fromP += 2 ) \
504 *( *toP )++ = ( GET_HI( *fromP ) << 8 ) | GET_LO( *fromP ); \
507#define SET2( ptr, ch ) \
508 ( ( ( ptr )[0] = ( ( ch ) & 0xff ) ), ( ( ptr )[1] = ( ( ch ) >> 8 ) ) )
509#define GET_LO( ptr ) ( (unsigned char)( ptr )[0] )
510#define GET_HI( ptr ) ( (unsigned char)( ptr )[1] )
519#define SET2( ptr, ch ) \
520 ( ( ( ptr )[0] = ( ( ch ) >> 8 ) ), ( ( ptr )[1] = ( ( ch ) & 0xFF ) ) )
521#define GET_LO( ptr ) ( (unsigned char)( ptr )[1] )
522#define GET_HI( ptr ) ( (unsigned char)( ptr )[0] )
531#define LITTLE2_BYTE_TYPE( enc, p ) \
532 ( ( p )[1] == 0 ? ( (struct normal_encoding*)( enc ) )->type[(unsigned char)*( p )] \
533 : unicode_byte_type( ( p )[1], ( p )[0] ) )
534#define LITTLE2_BYTE_TO_ASCII( enc, p ) ( ( p )[1] == 0 ? ( p )[0] : -1 )
535#define LITTLE2_CHAR_MATCHES( enc, p, c ) ( ( p )[1] == 0 && ( p )[0] == c )
536#define LITTLE2_IS_NAME_CHAR_MINBPC( enc, p ) \
537 UCS2_GET_NAMING( namePages, (unsigned char)p[1], (unsigned char)p[0] )
538#define LITTLE2_IS_NMSTRT_CHAR_MINBPC( enc, p ) \
539 UCS2_GET_NAMING( nmstrtPages, (unsigned char)p[1], (unsigned char)p[0] )
543static int little2_byteType(
const ENCODING*
enc,
const char* p ) {
547static int little2_byteToAscii(
const ENCODING*
enc,
const char* p ) {
551static int little2_charMatches(
const ENCODING*
enc,
const char* p,
int c ) {
555static int little2_isNameMin(
const ENCODING*
enc,
const char* p ) {
559static int little2_isNmstrtMin(
const ENCODING*
enc,
const char* p ) {
564# define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
569# define PREFIX( ident ) little2_##ident
570# define MINBPC( enc ) 2
572# define BYTE_TYPE( enc, p ) LITTLE2_BYTE_TYPE( enc, p )
573# define BYTE_TO_ASCII( enc, p ) LITTLE2_BYTE_TO_ASCII( enc, p )
574# define CHAR_MATCHES( enc, p, c ) LITTLE2_CHAR_MATCHES( enc, p, c )
575# define IS_NAME_CHAR( enc, p, n ) 0
576# define IS_NAME_CHAR_MINBPC( enc, p ) LITTLE2_IS_NAME_CHAR_MINBPC( enc, p )
577# define IS_NMSTRT_CHAR( enc, p, n ) ( 0 )
578# define IS_NMSTRT_CHAR_MINBPC( enc, p ) LITTLE2_IS_NMSTRT_CHAR_MINBPC( enc, p )
580# include "xmltok_impl.c"
587# undef IS_NAME_CHAR_MINBPC
588# undef IS_NMSTRT_CHAR
589# undef IS_NMSTRT_CHAR_MINBPC
590# undef IS_INVALID_CHAR
597# if XML_BYTE_ORDER == 12
604# include "asciitab.h"
605# include "latin1tab.h"
612#if XML_BYTE_ORDER == 12
619#define BT_COLON BT_NMSTRT
622#include "latin1tab.h"
626#if XML_BYTE_ORDER != 21
633# include "iasciitab.h"
634# include "latin1tab.h"
643# define BT_COLON BT_NMSTRT
644# include "iasciitab.h"
646# include "latin1tab.h"
652#define BIG2_BYTE_TYPE( enc, p ) \
653 ( ( p )[0] == 0 ? ( (struct normal_encoding*)( enc ) )->type[(unsigned char)( p )[1]] \
654 : unicode_byte_type( ( p )[0], ( p )[1] ) )
655#define BIG2_BYTE_TO_ASCII( enc, p ) ( ( p )[0] == 0 ? ( p )[1] : -1 )
656#define BIG2_CHAR_MATCHES( enc, p, c ) ( ( p )[0] == 0 && ( p )[1] == c )
657#define BIG2_IS_NAME_CHAR_MINBPC( enc, p ) \
658 UCS2_GET_NAMING( namePages, (unsigned char)p[0], (unsigned char)p[1] )
659#define BIG2_IS_NMSTRT_CHAR_MINBPC( enc, p ) \
660 UCS2_GET_NAMING( nmstrtPages, (unsigned char)p[0], (unsigned char)p[1] )
664static int big2_byteType(
const ENCODING*
enc,
const char* p ) {
668static int big2_byteToAscii(
const ENCODING*
enc,
const char* p ) {
672static int big2_charMatches(
const ENCODING*
enc,
const char* p,
int c ) {
676static int big2_isNameMin(
const ENCODING*
enc,
const char* p ) {
680static int big2_isNmstrtMin(
const ENCODING*
enc,
const char* p ) {
685# define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
690# define PREFIX( ident ) big2_##ident
691# define MINBPC( enc ) 2
693# define BYTE_TYPE( enc, p ) BIG2_BYTE_TYPE( enc, p )
694# define BYTE_TO_ASCII( enc, p ) BIG2_BYTE_TO_ASCII( enc, p )
695# define CHAR_MATCHES( enc, p, c ) BIG2_CHAR_MATCHES( enc, p, c )
696# define IS_NAME_CHAR( enc, p, n ) 0
697# define IS_NAME_CHAR_MINBPC( enc, p ) BIG2_IS_NAME_CHAR_MINBPC( enc, p )
698# define IS_NMSTRT_CHAR( enc, p, n ) ( 0 )
699# define IS_NMSTRT_CHAR_MINBPC( enc, p ) BIG2_IS_NMSTRT_CHAR_MINBPC( enc, p )
701# include "xmltok_impl.c"
708# undef IS_NAME_CHAR_MINBPC
709# undef IS_NMSTRT_CHAR
710# undef IS_NMSTRT_CHAR_MINBPC
711# undef IS_INVALID_CHAR
718# if XML_BYTE_ORDER == 21
725# include "asciitab.h"
726# include "latin1tab.h"
733#if XML_BYTE_ORDER == 21
740#define BT_COLON BT_NMSTRT
743#include "latin1tab.h"
747#if XML_BYTE_ORDER != 12
753# include "iasciitab.h"
754# include "latin1tab.h"
762# define BT_COLON BT_NMSTRT
763# include "iasciitab.h"
765# include "latin1tab.h"
773static int streqci(
const char* s1,
const char* s2 ) {
778 if (
'a' <= c1 && c1 <=
'z' ) c1 +=
'A' -
'a';
779 if (
'a' <= c2 && c2 <=
'z' ) c2 +=
'A' -
'a';
780 if ( c1 != c2 )
return 0;
786static void initUpdatePosition(
const ENCODING*
enc,
const char* ptr,
const char* end,
788 normal_updatePosition( &utf8_encoding.enc, ptr, end, pos );
791static int toAscii(
const ENCODING*
enc,
const char* ptr,
const char* end ) {
794 XmlUtf8Convert(
enc, &ptr, end, &p, p + 1 );
795 if ( p == buf )
return -1;
799static int isSpace(
int c ) {
812static int parsePseudoAttribute(
const ENCODING*
enc,
const char* ptr,
const char* end,
813 const char** namePtr,
const char** valPtr,
814 const char** nextTokPtr ) {
822 if ( !isSpace( toAscii(
enc, ptr, end ) ) )
828 ptr +=
enc->minBytesPerChar;
829 }
while ( isSpace( toAscii(
enc, ptr, end ) ) );
838 c = toAscii(
enc, ptr, end );
844 if ( c ==
'=' )
break;
848 ptr +=
enc->minBytesPerChar;
849 }
while ( isSpace( c = toAscii(
enc, ptr, end ) ) );
857 ptr +=
enc->minBytesPerChar;
859 if ( ptr == *namePtr )
864 ptr +=
enc->minBytesPerChar;
865 c = toAscii(
enc, ptr, end );
866 while ( isSpace( c ) )
868 ptr +=
enc->minBytesPerChar;
869 c = toAscii(
enc, ptr, end );
871 if ( c !=
'"' && c !=
'\'' )
877 ptr +=
enc->minBytesPerChar;
879 for ( ;; ptr +=
enc->minBytesPerChar )
881 c = toAscii(
enc, ptr, end );
882 if ( c ==
open )
break;
883 if ( !(
'a' <= c && c <=
'z' ) && !(
'A' <= c && c <=
'Z' ) && !(
'0' <= c && c <=
'9' ) &&
884 c !=
'.' && c !=
'-' && c !=
'_' )
890 *nextTokPtr = ptr +
enc->minBytesPerChar;
895doParseXmlDecl(
const ENCODING* ( *encodingFinder )(
const ENCODING*,
const char*,
const char*),
896 int isGeneralTextEntity,
const ENCODING*
enc,
const char* ptr,
const char* end,
897 const char** badPtr,
const char** versionPtr,
const char** encodingName,
898 const ENCODING** encoding,
int* standalone ) {
900 const char* name = 0;
901 ptr += 5 *
enc->minBytesPerChar;
902 end -= 2 *
enc->minBytesPerChar;
903 if ( !parsePseudoAttribute(
enc, ptr, end, &name, &val, &ptr ) || !name )
908 if ( !XmlNameMatchesAscii(
enc, name,
"version" ) )
910 if ( !isGeneralTextEntity )
918 if ( versionPtr ) *versionPtr = val;
919 if ( !parsePseudoAttribute(
enc, ptr, end, &name, &val, &ptr ) )
926 if ( isGeneralTextEntity )
935 if ( XmlNameMatchesAscii(
enc, name,
"encoding" ) )
937 int c = toAscii(
enc, val, end );
938 if ( !(
'a' <= c && c <=
'z' ) && !(
'A' <= c && c <=
'Z' ) )
943 if ( encodingName ) *encodingName = val;
944 if ( encoding ) *encoding = encodingFinder(
enc, val, ptr -
enc->minBytesPerChar );
945 if ( !parsePseudoAttribute(
enc, ptr, end, &name, &val, &ptr ) )
950 if ( !name )
return 1;
952 if ( !XmlNameMatchesAscii(
enc, name,
"standalone" ) || isGeneralTextEntity )
957 if ( XmlNameMatchesAscii(
enc, val,
"yes" ) )
959 if ( standalone ) *standalone = 1;
961 else if ( XmlNameMatchesAscii(
enc, val,
"no" ) )
963 if ( standalone ) *standalone = 0;
970 while ( isSpace( toAscii(
enc, ptr, end ) ) ) ptr +=
enc->minBytesPerChar;
979static int checkCharRefNumber(
int result ) {
980 switch ( result >> 8 )
989 case 0xDF:
return -1;
991 if ( latin1_encoding.type[result] == BT_NONXML )
return -1;
994 if ( result == 0xFFFE || result == 0xFFFF )
return -1;
1008 if ( c < 0 )
return 0;
1017 buf[1] = ( ( c & 0x3f ) | 0x80 );
1023 buf[1] = ( ( ( c >> 6 ) & 0x3f ) | 0x80 );
1024 buf[2] = ( ( c & 0x3f ) | 0x80 );
1030 buf[1] = ( ( ( c >> 12 ) & 0x3f ) | 0x80 );
1031 buf[2] = ( ( ( c >> 6 ) & 0x3f ) | 0x80 );
1032 buf[3] = ( ( c & 0x3f ) | 0x80 );
1039 if ( charNum < 0 )
return 0;
1040 if ( charNum < 0x10000 )
1045 if ( charNum < 0x110000 )
1048 buf[0] = ( charNum >> 10 ) + 0xD800;
1049 buf[1] = ( charNum & 0x3FF ) + 0xDC00;
1065static int unknown_isName(
const ENCODING* enc,
const char* p ) {
1068 if ( c & ~0xFFFF )
return 0;
1072static int unknown_isNmstrt(
const ENCODING* enc,
const char* p ) {
1075 if ( c & ~0xFFFF )
return 0;
1079static int unknown_isInvalid(
const ENCODING* enc,
const char* p ) {
1082 return ( c & ~0xFFFF ) || checkCharRefNumber( c ) < 0;
1085static void unknown_toUtf8(
const ENCODING* enc,
const char** fromP,
const char* fromLim,
1086 char** toP,
const char* toLim ) {
1087 char buf[XML_UTF8_ENCODE_MAX];
1092 if ( *fromP == fromLim )
break;
1100 if (
n > toLim - *toP )
break;
1107 if (
n > toLim - *toP )
break;
1111 *( *toP )++ = *utf8++;
1112 }
while ( --
n != 0 );
1116static void unknown_toUtf16(
const ENCODING*
enc,
const char** fromP,
const char* fromLim,
1117 unsigned short** toP,
const unsigned short* toLim ) {
1118 while ( *fromP != fromLim && *toP != toLim )
1134 int ( *convert )(
void* userData,
const char* p ),
1139 ( (
char*)mem )[i] = ( (
char*)&latin1_encoding )[i];
1140 for ( i = 0; i < 128; i++ )
1141 if ( latin1_encoding.type[i] != BT_OTHER && latin1_encoding.type[i] != BT_NONXML &&
1144 for ( i = 0; i < 256; i++ )
1151 e->
utf16[i] = 0xFFFF;
1157 if ( c < -4 )
return 0;
1162 else if ( c < 0x80 )
1164 if ( latin1_encoding.type[c] != BT_OTHER && latin1_encoding.type[c] != BT_NONXML &&
1169 e->
utf8[i][1] = (char)c;
1170 e->
utf16[i] = c == 0 ? 0xFFFF : c;
1172 else if ( checkCharRefNumber( c ) < 0 )
1176 e->
utf16[i] = 0xFFFF;
1182 if ( c > 0xFFFF )
return 0;
1204 e->
normal.
enc.utf8Convert = unknown_toUtf8;
1205 e->
normal.
enc.utf16Convert = unknown_toUtf16;
1223static int getEncodingIndex(
const char* name ) {
1224 static const char* encodingNames[] = {
1233 if ( name == 0 )
return NO_ENC;
1234 for ( i = 0; i <
sizeof( encodingNames ) /
sizeof( encodingNames[0] ); i++ )
1235 if ( streqci( name, encodingNames[i] ) )
return i;
1242#define INIT_ENC_INDEX( enc ) ( ( enc )->initEnc.isUtf16 )
1251static int initScan(
const ENCODING** encodingTable,
const INIT_ENCODING*
enc,
int state,
1252 const char* ptr,
const char* end,
const char** nextTokPtr ) {
1253 const ENCODING** encPtr;
1255 if ( ptr == end )
return XML_TOK_NONE;
1256 encPtr =
enc->encPtr;
1257 if ( ptr + 1 == end )
1261 if ( state != XML_CONTENT_STATE )
return XML_TOK_PARTIAL;
1270 switch ( (
unsigned char)*ptr )
1278 case 0x3C:
return XML_TOK_PARTIAL;
1283 switch ( ( (
unsigned char)ptr[0] << 8 ) | (
unsigned char)ptr[1] )
1287 *nextTokPtr = ptr + 2;
1293 state == XML_CONTENT_STATE )
1296 return XmlTok( *encPtr, state, ptr, end, nextTokPtr );
1299 *nextTokPtr = ptr + 2;
1309 if ( state == XML_CONTENT_STATE )
1315 if ( ptr + 2 == end )
return XML_TOK_PARTIAL;
1316 if ( (
unsigned char)ptr[2] == 0xBF )
1323 if ( ptr[0] ==
'\0' )
1331 return XmlTok( *encPtr, state, ptr, end, nextTokPtr );
1333 else if ( ptr[1] ==
'\0' )
1343 if ( state == XML_CONTENT_STATE )
break;
1345 return XmlTok( *encPtr, state, ptr, end, nextTokPtr );
1351 return XmlTok( *encPtr, state, ptr, end, nextTokPtr );
1362# define NS( x ) x##NS
1363# define ns( x ) x##_ns
1370ENCODING* XmlInitUnknownEncodingNS(
void* mem,
int* table,
1371 int ( *convert )(
void* userData,
const char* p ),
m_outputFile open("YYYY/m_txt_dir/LumTau_XXXX.txt", ios_base::app)
int(* isInvalid4)(const ENCODING *, const char *)
int(* isInvalid2)(const ENCODING *, const char *)
int(* isNmstrt4)(const ENCODING *, const char *)
int(* isName3)(const ENCODING *, const char *)
int(* isName2)(const ENCODING *, const char *)
int(* isNmstrt2)(const ENCODING *, const char *)
int(* isName4)(const ENCODING *, const char *)
int(* isNmstrt3)(const ENCODING *, const char *)
int(* isInvalid3)(const ENCODING *, const char *)
unsigned short utf16[256]
int(* convert)(void *userData, const char *p)
struct normal_encoding normal
#define BIG2_BYTE_TO_ASCII(enc, p)
#define STANDARD_VTABLE(E)
int XmlUtf8Encode(int c, char *buf)
#define UTF8_GET_NAMING3(pages, byte)
#define BIG2_IS_NAME_CHAR_MINBPC(enc, p)
#define DEFINE_UTF16_TO_UTF8(E)
#define BIG2_BYTE_TYPE(enc, p)
int XmlSizeOfUnknownEncoding()
ENCODING * XmlInitUnknownEncoding(void *mem, int *table, int(*convert)(void *userData, const char *p), void *userData)
#define INIT_ENC_INDEX(enc)
int XmlUtf16Encode(int charNum, unsigned short *buf)
#define SB_BYTE_TYPE(enc, p)
#define LITTLE2_CHAR_MATCHES(enc, p, c)
#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
#define LITTLE2_BYTE_TYPE(enc, p)
#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
#define UCS2_GET_NAMING(pages, hi, lo)
#define UTF8_GET_NAMING2(pages, byte)
#define LITTLE2_BYTE_TO_ASCII(enc, p)
#define DEFINE_UTF16_TO_UTF16(E)
#define BIG2_CHAR_MATCHES(enc, p, c)
#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)