diff options
Diffstat (limited to 'contrib/tsearch2/wordparser/parser.c')
-rw-r--r-- | contrib/tsearch2/wordparser/parser.c | 1028 |
1 files changed, 1028 insertions, 0 deletions
diff --git a/contrib/tsearch2/wordparser/parser.c b/contrib/tsearch2/wordparser/parser.c new file mode 100644 index 00000000000..e414a865ffd --- /dev/null +++ b/contrib/tsearch2/wordparser/parser.c @@ -0,0 +1,1028 @@ +#include "postgres.h" + +#include "utils/builtins.h" +#include "utils/pg_locale.h" +#include "mb/pg_wchar.h" + +#include "deflex.h" +#include "parser.h" +#include "ts_locale.h" + + +static TParserPosition* +newTParserPosition(TParserPosition *prev) { + TParserPosition *res = (TParserPosition*)palloc(sizeof(TParserPosition)); + + if ( prev ) + memcpy(res, prev, sizeof(TParserPosition)); + else + memset(res, 0, sizeof(TParserPosition)); + + res->prev = prev; + + res->pushedAtAction = NULL; + + return res; +} + +TParser* +TParserInit( char *str, int len ) { + TParser *prs = (TParser*)palloc0( sizeof(TParser) ); + + prs->charmaxlen = pg_database_encoding_max_length(); + prs->str = str; + prs->lenstr = len; + +#ifdef TS_USE_WIDE + /* + * Use wide char code only when max encoding length > 1 and ctype != C. + * Some operating systems fail with multi-byte encodings and a C locale. + * Also, for a C locale there is no need to process as multibyte. + * From backend/utils/adt/oracle_compat.c Teodor + */ + + if ( prs->charmaxlen > 1 && !lc_ctype_is_c() ) { + prs->usewide=true; + prs->wstr = (wchar_t*)palloc( sizeof(wchar_t) * prs->lenstr ); + prs->lenwstr = char2wchar( prs->wstr, prs->str, prs->lenstr ); + } else +#endif + prs->usewide=false; + + prs->state = newTParserPosition(NULL); + prs->state->state = TPS_Base; + + return prs; +} + +void +TParserClose( TParser* prs ) { + while( prs->state ) { + TParserPosition *ptr = prs->state->prev; + pfree( prs->state ); + prs->state = ptr; + } + + if ( prs->wstr ) + pfree( prs->wstr ); + pfree( prs ); +} + +/* + * defining support function, equvalent is* macroses, but + * working with any possible encodings and locales + */ + +#ifdef TS_USE_WIDE + +#define p_iswhat(type) \ +static int \ +p_is##type(TParser *prs) { \ + Assert( prs->state ); \ + return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \ + is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \ +} \ + \ +static int \ +p_isnot##type(TParser *prs) { \ + return !p_is##type(prs); \ +} + + + +/* p_iseq should be used only for ascii symbols */ + +static int +p_iseq(TParser *prs, char c) { + Assert( prs->state ); + return ( ( prs->state->charlen==1 && *( prs->str + prs->state->posbyte ) == c ) ) ? 1 : 0; +} + +#else /* TS_USE_WIDE */ + +#define p_iswhat(type) \ +static int \ +p_is##type(TParser *prs) { \ + Assert( prs->state ); \ + return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \ +} \ + \ +static int \ +p_isnot##type(TParser *prs) { \ + return !p_is##type(prs); \ +} + + +static int +p_iseq(TParser *prs, char c) { + Assert( prs->state ); + return ( *( prs->str + prs->state->posbyte ) == c ) ) ? 1 : 0; +} + +#endif /* TS_USE_WIDE */ + +p_iswhat(alnum) +p_iswhat(alpha) +p_iswhat(digit) +p_iswhat(lower) +p_iswhat(print) +p_iswhat(punct) +p_iswhat(space) +p_iswhat(upper) +p_iswhat(xdigit) + +static int +p_isEOF(TParser *prs) { + Assert( prs->state ); + return (prs->state->posbyte == prs->lenstr || prs->state->charlen==0) ? 1 : 0; +} + +static int +p_iseqC(TParser *prs) { + return p_iseq(prs, prs->c); +} + +static int +p_isneC(TParser *prs) { + return !p_iseq(prs, prs->c); +} + +static int +p_isascii(TParser *prs) { + return ( prs->state->charlen==1 && isascii( (unsigned char) *( prs->str + prs->state->posbyte ) ) ) ? 1 : 0; +} + +static int +p_islatin(TParser *prs) { + return ( p_isalpha(prs) && p_isascii(prs) ) ? 1 : 0; +} + +static int +p_isnonlatin(TParser *prs) { + return ( p_isalpha(prs) && !p_isascii(prs) ) ? 1 : 0; +} + +void _make_compiler_happy(void); +void +_make_compiler_happy(void) { + p_isalnum(NULL); p_isnotalnum(NULL); + p_isalpha(NULL); p_isnotalpha(NULL); + p_isdigit(NULL); p_isnotdigit(NULL); + p_islower(NULL); p_isnotlower(NULL); + p_isprint(NULL); p_isnotprint(NULL); + p_ispunct(NULL); p_isnotpunct(NULL); + p_isspace(NULL); p_isnotspace(NULL); + p_isupper(NULL); p_isnotupper(NULL); + p_isxdigit(NULL); p_isnotxdigit(NULL); + p_isEOF(NULL); + p_iseqC(NULL); p_isneC(NULL); +} + + +static void +SpecialTags(TParser *prs) { + switch( prs->state->lencharlexeme ) { + case 8: /* </script */ + if ( pg_strncasecmp( prs->lexeme, "</script", 8 ) == 0 ) + prs->ignore = false; + break; + case 7: /* <script || </style */ + if ( pg_strncasecmp( prs->lexeme, "</style", 7 ) == 0 ) + prs->ignore = false; + else if ( pg_strncasecmp( prs->lexeme, "<script", 7 ) == 0 ) + prs->ignore = true; + break; + case 6: /* <style */ + if ( pg_strncasecmp( prs->lexeme, "<style", 6 ) == 0 ) + prs->ignore = true; + break; + default: break; + } +} + +static void +SpecialFURL(TParser *prs) { + prs->wanthost = true; + prs->state->posbyte -= prs->state->lenbytelexeme; + prs->state->poschar -= prs->state->lencharlexeme; +} + +static void +SpecialHyphen(TParser *prs) { + prs->state->posbyte -= prs->state->lenbytelexeme; + prs->state->poschar -= prs->state->lencharlexeme; +} + +static int +p_isstophost(TParser *prs) { + if ( prs->wanthost ) { + prs->wanthost = false; + return 1; + } + return 0; +} + +static int +p_isignore(TParser *prs) { + return (prs->ignore) ? 1 : 0; +} + +static int +p_ishost(TParser *prs) { + TParser *tmpprs = TParserInit( prs->str+prs->state->posbyte, prs->lenstr - prs->state->posbyte ); + int res = 0; + + if ( TParserGet(tmpprs) && tmpprs->type == HOST ) { + prs->state->posbyte += tmpprs->lenbytelexeme; + prs->state->poschar += tmpprs->lencharlexeme; + prs->state->lenbytelexeme += tmpprs->lenbytelexeme; + prs->state->lencharlexeme += tmpprs->lencharlexeme; + prs->state->charlen = tmpprs->state->charlen; + res = 1; + } + TParserClose(tmpprs); + + return res; +} + +static int +p_isURI(TParser *prs) { + TParser *tmpprs = TParserInit( prs->str+prs->state->posbyte, prs->lenstr - prs->state->posbyte ); + int res = 0; + + tmpprs->state = newTParserPosition( tmpprs->state ); + tmpprs->state->state = TPS_InFileFirst; + + if ( TParserGet(tmpprs) && (tmpprs->type == URI || tmpprs->type == FILEPATH) ) { + prs->state->posbyte += tmpprs->lenbytelexeme; + prs->state->poschar += tmpprs->lencharlexeme; + prs->state->lenbytelexeme += tmpprs->lenbytelexeme; + prs->state->lencharlexeme += tmpprs->lencharlexeme; + prs->state->charlen = tmpprs->state->charlen; + res = 1; + } + TParserClose(tmpprs); + + return res; +} + +/* + * Table of state/action of parser + */ + +#define A_NEXT 0x0000 +#define A_BINGO 0x0001 +#define A_POP 0x0002 +#define A_PUSH 0x0004 +#define A_RERUN 0x0008 +#define A_CLEAR 0x0010 +#define A_MERGE 0x0020 +#define A_CLRALL 0x0040 + +static TParserStateActionItem actionTPS_Base[] = { + {p_isEOF, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '<', A_PUSH, TPS_InTagFirst, 0, NULL}, + {p_isignore, 0, A_NEXT, TPS_InSpace, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InLatWord, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InCyrWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InUnsignedInt, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InSignedIntFirst, 0, NULL}, + {p_iseqC, '+', A_PUSH, TPS_InSignedIntFirst, 0, NULL}, + {p_iseqC, '&', A_PUSH, TPS_InHTMLEntityFirst, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InSpace, 0, NULL} +}; + + +static TParserStateActionItem actionTPS_InUWord[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, UWORD, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, UWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InLatWord[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, LATWORD, NULL}, + {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InFileFirst, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst,0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {p_iseqC, ':', A_PUSH, TPS_InProtocolFirst, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, LATWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InCyrWord[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, CYRWORD, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst,0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, CYRWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InUnsignedInt[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}, + {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InUDecimalFirst, 0, NULL}, + {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_islatin, 0, A_PUSH, TPS_InHost, 0, NULL}, + {p_isalpha, 0, A_NEXT, TPS_InUWord, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL} +}; + +static TParserStateActionItem actionTPS_InSignedIntFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT|A_CLEAR, TPS_InSignedInt, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InSignedInt[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL}, + {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InDecimalFirst, 0, NULL}, + {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL} +}; + +static TParserStateActionItem actionTPS_InSpace[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, SPACE, NULL}, + {p_iseqC, '<', A_BINGO, TPS_Base, SPACE, NULL}, + {p_isignore, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_BINGO, TPS_Base, SPACE, NULL}, + {p_iseqC, '+', A_BINGO, TPS_Base, SPACE, NULL}, + {p_iseqC, '&', A_BINGO, TPS_Base, SPACE, NULL}, + {p_iseqC, '/', A_BINGO, TPS_Base, SPACE, NULL}, + {p_isnotalnum, 0, A_NEXT, TPS_InSpace, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, SPACE, NULL} +}; + +static TParserStateActionItem actionTPS_InUDecimalFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InUDecimal, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InUDecimal[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InUDecimal, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL}, + {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL} +}; + +static TParserStateActionItem actionTPS_InDecimalFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InDecimal, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InDecimal[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL}, + {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL} +}; + +static TParserStateActionItem actionTPS_InVersionFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InVersion[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InVersion, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL} +}; + +static TParserStateActionItem actionTPS_InMantissaFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL}, + {p_iseqC, '+', A_NEXT, TPS_InMantissaSign, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InMantissaSign, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InMantissaSign[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InMantissa[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InMantissa, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntityFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst,0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntity[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL}, + {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntityNum[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL}, + {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntityEnd[] = { + {NULL, 0, A_BINGO|A_CLEAR,TPS_Base, HTMLENTITY, NULL} +}; + +static TParserStateActionItem actionTPS_InTagFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InTagCloseFirst, 0, NULL}, + {p_iseqC, '!', A_PUSH, TPS_InCommentFirst, 0, NULL}, + {p_islatin, 0, A_PUSH, TPS_InTag, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagCloseFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InTag, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTag[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags}, + {p_iseqC, '\'', A_NEXT, TPS_InTagEscapeK, 0, NULL}, + {p_iseqC, '"', A_NEXT, TPS_InTagEscapeKK, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '=', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '#', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '%', A_NEXT, TPS_Null, 0, NULL}, + {p_isspace, 0, A_NEXT, TPS_Null, 0, SpecialTags}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagEscapeK[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL}, + {p_iseqC, '\'', A_NEXT, TPS_InTag, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InTagEscapeK, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagEscapeKK[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL}, + {p_iseqC, '"', A_NEXT, TPS_InTag, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InTagEscapeKK, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagBackSleshed[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {NULL, 0, A_MERGE, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagEnd[] = { + {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, TAG, NULL} +}; + +static TParserStateActionItem actionTPS_InCommentFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InCommentLast, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InCommentLast[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InComment, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InComment[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InCloseCommentFirst,0, NULL}, + {NULL, 0, A_NEXT, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InCloseCommentFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InCloseCommentLast, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InComment, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InCloseCommentLast[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '>', A_NEXT, TPS_InCommentEnd, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InComment, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InCommentEnd[] = { + {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, TAG, NULL} +}; + +static TParserStateActionItem actionTPS_InHostFirstDomen[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHostDomenSecond, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL}, + //{p_iseqC, '-', A_POP, TPS_InHostFirstAN, 0, NULL}, + //{p_iseqC, '.', A_POP, TPS_InHostFirstDomen, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHostDomenSecond[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHostDomen, 0, NULL}, + {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHostDomen[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHostDomen, 0, NULL}, + {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL}, + {p_iseqC, ':', A_PUSH, TPS_InPortFirst, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {p_isdigit, 0, A_POP, TPS_Null, 0, NULL}, + {p_isstophost, 0, A_BINGO|A_CLRALL,TPS_InURIStart, HOST, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL} +}; + +static TParserStateActionItem actionTPS_InPortFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InPort[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL}, + {p_isstophost, 0, A_BINGO|A_CLRALL,TPS_InURIStart, HOST, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL} +}; + +static TParserStateActionItem actionTPS_InHostFirstAN[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHost[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InEmail[] = { + {p_ishost, 0, A_BINGO|A_CLRALL, TPS_Base, EMAIL, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InFileFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_CLEAR, TPS_InFile, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL}, + {p_iseqC, '.', A_CLEAR, TPS_InFile, 0, NULL}, + {p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL}, + {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InFile[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, FILEPATH, NULL}, + {p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL}, + {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL} +}; + +static TParserStateActionItem actionTPS_InFileNext[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_CLEAR, TPS_InFile, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL}, + {p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InURIFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '"', A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '\'', A_POP, TPS_Null, 0, NULL}, + {p_isnotspace, 0, A_CLEAR, TPS_InURI, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL}, +}; + +static TParserStateActionItem actionTPS_InURIStart[] = { + {NULL, 0, A_NEXT, TPS_InURI, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InURI[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, URI, NULL}, + {p_iseqC, '"', A_BINGO, TPS_Base, URI, NULL}, + {p_iseqC, '\'', A_BINGO, TPS_Base, URI, NULL}, + {p_isnotspace, 0, A_NEXT, TPS_InURI, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, URI, NULL} +}; + +static TParserStateActionItem actionTPS_InFURL[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isURI, 0, A_BINGO|A_CLRALL,TPS_Base, FURL, SpecialFURL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InProtocolFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InProtocolSecond[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '/', A_NEXT, TPS_InProtocolEnd, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InProtocolEnd[] = { + {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, PROTOCOL, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenLatWordFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenLatWord[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen}, + {p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst,0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InHyphenCyrWordFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenCyrWord[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst,0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InHyphenUWordFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenUWord[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}, + {p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst,0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InHyphenValueFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenValue[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst,0, NULL}, + {p_isalpha, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InHyphenValueExact[] = { + {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL}, + {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InParseHyphen[] = { + {p_isEOF, 0, A_RERUN, TPS_Base, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart,0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart,0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt,0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InParseHyphenHyphen,0, NULL}, + {NULL, 0, A_RERUN, TPS_Base, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InParseHyphenHyphen[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isalnum, 0, A_BINGO|A_CLEAR,TPS_InParseHyphen, SPACE, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenCyrWordPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, CYRPARTHYPHENWORD,NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart,0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, CYRPARTHYPHENWORD,NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenLatWordPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, LATPARTHYPHENWORD,NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart,0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, LATPARTHYPHENWORD,NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenUWordPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, PARTHYPHENWORD, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, PARTHYPHENWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenUnsignedInt[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt,0, NULL}, + {p_isalpha, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHDecimalPartFirst,0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, UNSIGNEDINT, NULL} +}; + +static TParserStateActionItem actionTPS_InHDecimalPartFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InHDecimalPart, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHDecimalPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHDecimalPart, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst,0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, DECIMAL, NULL} +}; + +static TParserStateActionItem actionTPS_InHVersionPartFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InHVersionPart, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHVersionPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHVersionPart, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst,0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, VERSIONNUMBER, NULL} +}; + +/* + * order should be the same as in typedef enum {} TParserState!! + */ + +static const TParserStateAction Actions[] = { + { TPS_Base, actionTPS_Base }, + { TPS_InUWord, actionTPS_InUWord }, + { TPS_InLatWord, actionTPS_InLatWord }, + { TPS_InCyrWord, actionTPS_InCyrWord }, + { TPS_InUnsignedInt, actionTPS_InUnsignedInt }, + { TPS_InSignedIntFirst, actionTPS_InSignedIntFirst }, + { TPS_InSignedInt, actionTPS_InSignedInt }, + { TPS_InSpace, actionTPS_InSpace }, + { TPS_InUDecimalFirst, actionTPS_InUDecimalFirst }, + { TPS_InUDecimal, actionTPS_InUDecimal }, + { TPS_InDecimalFirst, actionTPS_InDecimalFirst }, + { TPS_InDecimal, actionTPS_InDecimal }, + { TPS_InVersionFirst, actionTPS_InVersionFirst }, + { TPS_InVersion, actionTPS_InVersion }, + { TPS_InMantissaFirst, actionTPS_InMantissaFirst }, + { TPS_InMantissaSign, actionTPS_InMantissaSign }, + { TPS_InMantissa, actionTPS_InMantissa }, + { TPS_InHTMLEntityFirst, actionTPS_InHTMLEntityFirst }, + { TPS_InHTMLEntity, actionTPS_InHTMLEntity }, + { TPS_InHTMLEntityNumFirst, actionTPS_InHTMLEntityNumFirst }, + { TPS_InHTMLEntityNum, actionTPS_InHTMLEntityNum }, + { TPS_InHTMLEntityEnd, actionTPS_InHTMLEntityEnd }, + { TPS_InTagFirst, actionTPS_InTagFirst }, + { TPS_InTagCloseFirst, actionTPS_InTagCloseFirst }, + { TPS_InTag, actionTPS_InTag }, + { TPS_InTagEscapeK, actionTPS_InTagEscapeK }, + { TPS_InTagEscapeKK, actionTPS_InTagEscapeKK }, + { TPS_InTagBackSleshed, actionTPS_InTagBackSleshed }, + { TPS_InTagEnd, actionTPS_InTagEnd }, + { TPS_InCommentFirst, actionTPS_InCommentFirst }, + { TPS_InCommentLast, actionTPS_InCommentLast }, + { TPS_InComment, actionTPS_InComment }, + { TPS_InCloseCommentFirst, actionTPS_InCloseCommentFirst }, + { TPS_InCloseCommentLast, actionTPS_InCloseCommentLast }, + { TPS_InCommentEnd, actionTPS_InCommentEnd }, + { TPS_InHostFirstDomen, actionTPS_InHostFirstDomen }, + { TPS_InHostDomenSecond, actionTPS_InHostDomenSecond }, + { TPS_InHostDomen, actionTPS_InHostDomen }, + { TPS_InPortFirst, actionTPS_InPortFirst }, + { TPS_InPort, actionTPS_InPort }, + { TPS_InHostFirstAN, actionTPS_InHostFirstAN }, + { TPS_InHost, actionTPS_InHost }, + { TPS_InEmail, actionTPS_InEmail }, + { TPS_InFileFirst, actionTPS_InFileFirst }, + { TPS_InFile, actionTPS_InFile }, + { TPS_InFileNext, actionTPS_InFileNext }, + { TPS_InURIFirst, actionTPS_InURIFirst }, + { TPS_InURIStart, actionTPS_InURIStart }, + { TPS_InURI, actionTPS_InURI }, + { TPS_InFURL, actionTPS_InFURL }, + { TPS_InProtocolFirst, actionTPS_InProtocolFirst }, + { TPS_InProtocolSecond, actionTPS_InProtocolSecond }, + { TPS_InProtocolEnd, actionTPS_InProtocolEnd }, + { TPS_InHyphenLatWordFirst, actionTPS_InHyphenLatWordFirst }, + { TPS_InHyphenLatWord, actionTPS_InHyphenLatWord }, + { TPS_InHyphenCyrWordFirst, actionTPS_InHyphenCyrWordFirst }, + { TPS_InHyphenCyrWord, actionTPS_InHyphenCyrWord }, + { TPS_InHyphenUWordFirst, actionTPS_InHyphenUWordFirst }, + { TPS_InHyphenUWord, actionTPS_InHyphenUWord }, + { TPS_InHyphenValueFirst, actionTPS_InHyphenValueFirst }, + { TPS_InHyphenValue, actionTPS_InHyphenValue }, + { TPS_InHyphenValueExact, actionTPS_InHyphenValueExact }, + { TPS_InParseHyphen, actionTPS_InParseHyphen }, + { TPS_InParseHyphenHyphen, actionTPS_InParseHyphenHyphen }, + { TPS_InHyphenCyrWordPart, actionTPS_InHyphenCyrWordPart }, + { TPS_InHyphenLatWordPart, actionTPS_InHyphenLatWordPart }, + { TPS_InHyphenUWordPart, actionTPS_InHyphenUWordPart }, + { TPS_InHyphenUnsignedInt, actionTPS_InHyphenUnsignedInt }, + { TPS_InHDecimalPartFirst, actionTPS_InHDecimalPartFirst }, + { TPS_InHDecimalPart, actionTPS_InHDecimalPart }, + { TPS_InHVersionPartFirst, actionTPS_InHVersionPartFirst }, + { TPS_InHVersionPart, actionTPS_InHVersionPart }, + { TPS_Null, NULL } +}; + + +bool +TParserGet( TParser *prs ) { + TParserStateActionItem *item=NULL; + + if ( prs->state->posbyte >= prs->lenstr ) + return false; + + Assert( prs->state ); + prs->lexeme = prs->str + prs->state->posbyte; + prs->state->pushedAtAction = NULL; + + /* look at string */ + while (prs->state->posbyte <= prs->lenstr) { + if ( prs->state->posbyte == prs->lenstr ) + prs->state->charlen = 0; + else + prs->state->charlen = ( prs->charmaxlen == 1 ) ? prs->charmaxlen : + pg_mblen( prs->str + prs->state->posbyte ); + + Assert( prs->state->posbyte + prs->state->charlen <= prs->lenstr ); + Assert( prs->state->state >=TPS_Base && prs->state->state < TPS_Null ); + Assert( Actions[ prs->state->state ].state == prs->state->state ); + + item = Actions[ prs->state->state ].action; + Assert(item!=NULL); + + if ( item < prs->state->pushedAtAction ) + item = prs->state->pushedAtAction; + + /* find action by character class */ + while( item->isclass ) { + prs->c = item->c; + if ( item->isclass(prs)!=0 ) { + if ( item > prs->state->pushedAtAction ) /* remember: after pushing we were by false way */ + break; + } + item++; + } + + prs->state->pushedAtAction = NULL; + + /* call special handler if exists */ + if ( item->special ) + item->special(prs); + + /* BINGO, lexeme is found */ + if ( item->flags & A_BINGO ) { + Assert( item->type>0 ); + prs->lenbytelexeme = prs->state->lenbytelexeme; + prs->lencharlexeme = prs->state->lencharlexeme; + prs->state->lenbytelexeme = prs->state->lencharlexeme = 0; + prs->type = item->type; + } + + /* do various actions by flags */ + if ( item->flags & A_POP ) { /* pop stored state in stack */ + TParserPosition *ptr = prs->state->prev; + pfree( prs->state ); + prs->state = ptr; + Assert( prs->state ); + } else if ( item->flags & A_PUSH ) { /* push (store) state in stack */ + prs->state->pushedAtAction = item; /* remember where we push */ + prs->state = newTParserPosition( prs->state ); + } else if ( item->flags & A_CLEAR ) { /* clear previous pushed state */ + TParserPosition *ptr; + Assert( prs->state->prev ); + ptr = prs->state->prev->prev; + pfree( prs->state->prev ); + prs->state->prev = ptr; + } else if ( item->flags & A_CLRALL ) { /* clear all previous pushed state */ + TParserPosition *ptr; + while( prs->state->prev ) { + ptr = prs->state->prev->prev; + pfree( prs->state->prev ); + prs->state->prev = ptr; + } + } else if ( item->flags & A_MERGE ) { /* merge posinfo with current and pushed state */ + TParserPosition *ptr = prs->state; + Assert( prs->state->prev ); + prs->state = prs->state->prev; + + prs->state->posbyte = ptr->posbyte; + prs->state->poschar = ptr->poschar; + prs->state->charlen = ptr->charlen; + prs->state->lenbytelexeme = ptr->lenbytelexeme; + prs->state->lencharlexeme = ptr->lencharlexeme; + pfree(ptr); + } + + /* set new state if pointed */ + if ( item->tostate != TPS_Null ) + prs->state->state = item->tostate; + + /* check for go away */ + if ( (item->flags & A_BINGO) || (prs->state->posbyte >= prs->lenstr && (item->flags & A_RERUN)==0 ) ) + break; + + /* go to begining of loop if we should rerun or we just restore state */ + if ( item->flags & ( A_RERUN | A_POP ) ) + continue; + + /* move forward */ + if ( prs->state->charlen ) { + prs->state->posbyte += prs->state->charlen; + prs->state->lenbytelexeme += prs->state->charlen; + prs->state->poschar ++; + prs->state->lencharlexeme ++; + } + } + + return (item && (item->flags & A_BINGO)) ? true : false; +} + + |