diff options
Diffstat (limited to 'contrib/tsearch2/wordparser/parser.h')
-rw-r--r-- | contrib/tsearch2/wordparser/parser.h | 147 |
1 files changed, 142 insertions, 5 deletions
diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h index 3f0e0cd6359..ee5b3b7ab54 100644 --- a/contrib/tsearch2/wordparser/parser.h +++ b/contrib/tsearch2/wordparser/parser.h @@ -1,10 +1,147 @@ #ifndef __PARSER_H__ #define __PARSER_H__ -extern char *token; -extern int tokenlen; -int tsearch2_yylex(void); -void tsearch2_start_parse_str(char *, int); -void tsearch2_end_parse(void); +#include <ctype.h> +#include <limits.h> +#include "ts_locale.h" + +typedef enum { + TPS_Base = 0, + TPS_InUWord, + TPS_InLatWord, + TPS_InCyrWord, + TPS_InUnsignedInt, + TPS_InSignedIntFirst, + TPS_InSignedInt, + TPS_InSpace, + TPS_InUDecimalFirst, + TPS_InUDecimal, + TPS_InDecimalFirst, + TPS_InDecimal, + TPS_InVersionFirst, + TPS_InVersion, + TPS_InMantissaFirst, + TPS_InMantissaSign, + TPS_InMantissa, + TPS_InHTMLEntityFirst, + TPS_InHTMLEntity, + TPS_InHTMLEntityNumFirst, + TPS_InHTMLEntityNum, + TPS_InHTMLEntityEnd, + TPS_InTagFirst, + TPS_InTagCloseFirst, + TPS_InTag, + TPS_InTagEscapeK, + TPS_InTagEscapeKK, + TPS_InTagBackSleshed, + TPS_InTagEnd, + TPS_InCommentFirst, + TPS_InCommentLast, + TPS_InComment, + TPS_InCloseCommentFirst, + TPS_InCloseCommentLast, + TPS_InCommentEnd, + TPS_InHostFirstDomen, + TPS_InHostDomenSecond, + TPS_InHostDomen, + TPS_InPortFirst, + TPS_InPort, + TPS_InHostFirstAN, + TPS_InHost, + TPS_InEmail, + TPS_InFileFirst, + TPS_InFile, + TPS_InFileNext, + TPS_InURIFirst, + TPS_InURIStart, + TPS_InURI, + TPS_InFURL, + TPS_InProtocolFirst, + TPS_InProtocolSecond, + TPS_InProtocolEnd, + TPS_InHyphenLatWordFirst, + TPS_InHyphenLatWord, + TPS_InHyphenCyrWordFirst, + TPS_InHyphenCyrWord, + TPS_InHyphenUWordFirst, + TPS_InHyphenUWord, + TPS_InHyphenValueFirst, + TPS_InHyphenValue, + TPS_InHyphenValueExact, + TPS_InParseHyphen, + TPS_InParseHyphenHyphen, + TPS_InHyphenCyrWordPart, + TPS_InHyphenLatWordPart, + TPS_InHyphenUWordPart, + TPS_InHyphenUnsignedInt, + TPS_InHDecimalPartFirst, + TPS_InHDecimalPart, + TPS_InHVersionPartFirst, + TPS_InHVersionPart, + TPS_Null /* last state (fake value) */ +} TParserState; + +/* forward declaration */ +struct TParser; + + +typedef int (*TParserCharTest)(struct TParser*); /* any p_is* functions except p_iseq */ +typedef void (*TParserSpecial)(struct TParser*); /* special handler for special cases... */ + +typedef struct { + TParserCharTest isclass; + char c; + uint16 flags; + TParserState tostate; + int type; + TParserSpecial special; +} TParserStateActionItem; + +typedef struct { + TParserState state; + TParserStateActionItem *action; +} TParserStateAction; + +typedef struct TParserPosition { + int posbyte; /* position of parser in bytes */ + int poschar; /* osition of parser in characters */ + int charlen; /* length of current char */ + int lenbytelexeme; + int lencharlexeme; + TParserState state; + struct TParserPosition *prev; + int flags; + TParserStateActionItem *pushedAtAction; +} TParserPosition; + +typedef struct TParser { + /* string and position information */ + char *str; /* multibyte string */ + int lenstr; /* length of mbstring */ + wchar_t *wstr; /* wide character string */ + int lenwstr; /* length of wsting */ + + /* State of parse */ + int charmaxlen; + bool usewide; + TParserPosition *state; + bool ignore; + bool wanthost; + + /* silly char */ + char c; + + /* out */ + char *lexeme; + int lenbytelexeme; + int lencharlexeme; + int type; + +} TParser; + + +TParser* TParserInit( char *, int ); +bool TParserGet( TParser* ); +void TParserClose( TParser* ); #endif |