29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MemoryBufferRef.h"
36#include "llvm/Support/NativeFormatting.h"
37#include "llvm/Support/Unicode.h"
38#include "llvm/Support/UnicodeCharRanges.h"
64 return II->getObjCKeywordID() == objcKey;
71 return tok::objc_not_keyword;
79 case tok::annot_typename:
80 case tok::annot_decltype:
81 case tok::annot_pack_indexing_type:
87 case tok::kw___int128:
89 case tok::kw_unsigned:
97 case tok::kw__Float16:
98 case tok::kw___float128:
99 case tok::kw___ibm128:
100 case tok::kw_wchar_t:
106#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
107#include "clang/Basic/TransformTypeTraits.def"
108 case tok::kw___auto_type:
109 case tok::kw_char16_t:
110 case tok::kw_char32_t:
112 case tok::kw_decltype:
113 case tok::kw_char8_t:
125void Lexer::anchor() {}
127void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
128 const char *BufEnd) {
129 BufferStart = BufStart;
133 assert(BufEnd[0] == 0 &&
134 "We assume that the input buffer has a null character at the end"
135 " to simplify lexing!");
140 if (BufferStart == BufferPtr) {
142 StringRef Buf(BufferStart, BufferEnd - BufferStart);
143 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
144 .StartsWith(
"\xEF\xBB\xBF", 3)
148 BufferPtr += BOMLength;
151 Is_PragmaLexer =
false;
152 CurrentConflictMarkerState =
CMK_None;
155 IsAtStartOfLine =
true;
156 IsAtPhysicalStartOfLine =
true;
158 HasLeadingSpace =
false;
159 HasLeadingEmptyMacro =
false;
174 ExtendedTokenMode = 0;
176 NewLinePtr =
nullptr;
186 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
188 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
189 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
190 InputFile.getBufferEnd());
199 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
200 bool IsFirstIncludeOfFile)
202 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
203 InitLexer(BufStart, BufPtr, BufEnd);
214 bool IsFirstIncludeOfFile)
215 :
Lexer(
SM.getLocForStartOfFile(FID), langOpts, FromFile.getBufferStart(),
216 FromFile.getBufferStart(), FromFile.getBufferEnd(),
217 IsFirstIncludeOfFile) {}
220 assert(
PP &&
"Cannot reset token mode without a preprocessor");
221 if (LangOpts.TraditionalCPP)
249 FileID SpellingFID =
SM.getFileID(SpellingLoc);
250 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
256 const char *StrData =
SM.getCharacterData(SpellingLoc);
258 L->BufferPtr = StrData;
259 L->BufferEnd = StrData+TokLen;
260 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
264 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
266 ExpansionLocEnd, TokLen);
273 L->Is_PragmaLexer =
true;
278 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
279 this->IsAtStartOfLine = IsAtStartOfLine;
280 assert((BufferStart + Offset) <= BufferEnd);
281 BufferPtr = BufferStart + Offset;
285 typename T::size_type i = 0, e = Str.size();
287 if (Str[i] ==
'\\' || Str[i] == Quote) {
288 Str.insert(Str.begin() + i,
'\\');
291 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
293 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
294 Str[i] != Str[i + 1]) {
300 Str.insert(Str.begin() + i + 1,
'n');
310 std::string
Result = std::string(Str);
311 char Quote = Charify ?
'\'' :
'"';
326 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
329 const char *BufEnd = BufPtr + Tok.
getLength();
333 while (BufPtr < BufEnd) {
335 Spelling[Length++] = CharAndSize.Char;
336 BufPtr += CharAndSize.Size;
338 if (Spelling[Length - 1] ==
'"')
346 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
349 const char *RawEnd = BufEnd;
350 do --RawEnd;
while (*RawEnd !=
'"');
351 size_t RawLength = RawEnd - BufPtr + 1;
354 memcpy(Spelling + Length, BufPtr, RawLength);
362 while (BufPtr < BufEnd) {
364 Spelling[Length++] = CharAndSize.Char;
365 BufPtr += CharAndSize.Size;
369 "NeedsCleaning flag set on token that didn't need cleaning!");
384 std::pair<FileID, unsigned> locInfo =
SM.getDecomposedLoc(loc);
387 bool invalidTemp =
false;
388 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
390 if (invalid) *invalid =
true;
394 const char *tokenBegin = file.data() + locInfo.second;
397 Lexer lexer(
SM.getLocForStartOfFile(locInfo.first), options,
398 file.begin(), tokenBegin, file.end());
406 return StringRef(tokenBegin,
length);
410 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
411 return StringRef(buffer.data(), buffer.size());
421 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
423 bool CharDataInvalid =
false;
433 return std::string(TokStart, TokStart + Tok.
getLength());
454 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
456 const char *TokStart =
nullptr;
458 if (Tok.
is(tok::raw_identifier))
463 Buffer = II->getNameStart();
464 return II->getLength();
474 bool CharDataInvalid =
false;
478 if (CharDataInvalid) {
491 return getSpellingSlow(Tok, TokStart, LangOpts,
const_cast<char*
>(Buffer));
512 bool IgnoreWhiteSpace) {
522 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
524 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
528 const char *StrData = Buffer.data()+LocInfo.second;
530 if (!IgnoreWhiteSpace &&
isWhitespace(SkipEscapedNewLines(StrData)[0]))
534 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
535 Buffer.begin(), StrData, Buffer.end());
544 const char *BufStart = Buffer.data();
545 if (Offset >= Buffer.size())
548 const char *LexStart = BufStart + Offset;
549 for (; LexStart != BufStart; --LexStart) {
564 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
565 if (LocInfo.first.isInvalid())
569 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
575 const char *StrData = Buffer.data() + LocInfo.second;
577 if (!LexStart || LexStart == StrData)
582 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
602 }
while (TheTok.
getKind() != tok::eof);
614 if (!
SM.isMacroArgExpansion(
Loc))
619 std::pair<FileID, unsigned> FileLocInfo =
SM.getDecomposedLoc(FileLoc);
620 std::pair<FileID, unsigned> BeginFileLocInfo =
621 SM.getDecomposedLoc(BeginFileLoc);
622 assert(FileLocInfo.first == BeginFileLocInfo.first &&
623 FileLocInfo.second >= BeginFileLocInfo.second);
629enum PreambleDirectiveKind {
644 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
648 bool InPreprocessorDirective =
false;
652 unsigned MaxLineOffset = 0;
654 const char *CurPtr = Buffer.begin();
655 unsigned CurLine = 0;
656 while (CurPtr != Buffer.end()) {
660 if (CurLine == MaxLines)
664 if (CurPtr != Buffer.end())
665 MaxLineOffset = CurPtr - Buffer.begin();
671 if (InPreprocessorDirective) {
673 if (TheTok.
getKind() == tok::eof) {
684 InPreprocessorDirective =
false;
693 if (MaxLineOffset && TokOffset >= MaxLineOffset)
698 if (TheTok.
getKind() == tok::comment) {
706 Token HashTok = TheTok;
707 InPreprocessorDirective =
true;
716 PreambleDirectiveKind PDK
717 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
718 .Case(
"include", PDK_Skipped)
719 .Case(
"__include_macros", PDK_Skipped)
720 .Case(
"define", PDK_Skipped)
721 .Case(
"undef", PDK_Skipped)
722 .Case(
"line", PDK_Skipped)
723 .Case(
"error", PDK_Skipped)
724 .Case(
"pragma", PDK_Skipped)
725 .Case(
"import", PDK_Skipped)
726 .Case(
"include_next", PDK_Skipped)
727 .Case(
"warning", PDK_Skipped)
728 .Case(
"ident", PDK_Skipped)
729 .Case(
"sccs", PDK_Skipped)
730 .Case(
"assert", PDK_Skipped)
731 .Case(
"unassert", PDK_Skipped)
732 .Case(
"if", PDK_Skipped)
733 .Case(
"ifdef", PDK_Skipped)
734 .Case(
"ifndef", PDK_Skipped)
735 .Case(
"elif", PDK_Skipped)
736 .Case(
"elifdef", PDK_Skipped)
737 .Case(
"elifndef", PDK_Skipped)
738 .Case(
"else", PDK_Skipped)
739 .Case(
"endif", PDK_Skipped)
740 .Default(PDK_Unknown);
757 TheTok.
getKind() == tok::raw_identifier &&
759 LangOpts.CPlusPlusModules) {
762 Token ModuleTok = TheTok;
765 }
while (TheTok.
getKind() == tok::comment);
766 if (TheTok.
getKind() != tok::semi) {
781 if (ActiveCommentLoc.
isValid())
782 End = ActiveCommentLoc;
797 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
800 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
803 unsigned PhysOffset = 0;
808 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
818 for (; CharNo; --CharNo) {
820 TokPtr += CharAndSize.Size;
821 PhysOffset += CharAndSize.Size;
828 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
829 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
878 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
884 *MacroBegin = expansionLoc;
906 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
912 *MacroEnd = expansionLoc;
925 if (
Range.isTokenRange()) {
934 std::tie(FID, BeginOffs) =
SM.getDecomposedLoc(
Begin);
939 if (!
SM.isInFileID(End, FID, &EndOffs) ||
949 return SM.getSLocEntry(
SM.getFileID(
Loc))
951 .isExpansionTokenRange();
973 if (
Range.isTokenRange()) {
994 if (
Range.isTokenRange())
1034 std::pair<FileID, unsigned> beginInfo =
SM.getDecomposedLoc(
Range.
getBegin());
1035 if (beginInfo.first.isInvalid()) {
1041 if (!
SM.isInFileID(
Range.
getEnd(), beginInfo.first, &EndOffs) ||
1042 beginInfo.second > EndOffs) {
1048 bool invalidTemp =
false;
1049 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1056 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1062 assert(
Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1078 Loc =
SM.getImmediateExpansionRange(
Loc).getBegin();
1086 if (
SM.isInFileID(SpellLoc, MacroFID))
1100 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(
Loc);
1102 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1103 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1108 assert(
Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1110 while (
SM.isMacroArgExpansion(
Loc))
1111 Loc =
SM.getImmediateExpansionRange(
Loc).getBegin();
1117 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1123 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(
Loc).getBegin());
1127 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(
Loc);
1129 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1130 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1139 if (Str - 1 < BufferStart)
1142 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1143 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1144 if (Str - 2 < BufferStart)
1154 return *Str ==
'\\';
1161 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
1162 if (LocInfo.first.isInvalid())
1165 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1171 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1172 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1173 return NumWhitespaceChars == StringRef::npos
1175 : Rest.take_front(NumWhitespaceChars);
1190 unsigned CharNo,
unsigned TokLen) {
1191 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1207 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1213 unsigned TokLen)
const {
1214 assert(
Loc >= BufferStart &&
Loc <= BufferEnd &&
1215 "Location out of range for this buffer!");
1219 unsigned CharNo =
Loc-BufferStart;
1225 assert(
PP &&
"This doesn't work on raw lexers");
1244 case '=':
return '#';
1245 case ')':
return ']';
1246 case '(':
return '[';
1247 case '!':
return '|';
1248 case '\'':
return '^';
1249 case '>':
return '}';
1250 case '/':
return '\\';
1251 case '<':
return '{';
1252 case '-':
return '~';
1267 L->
Diag(CP-2, diag::trigraph_ignored);
1272 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1279unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1284 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1288 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1289 Ptr[Size-1] != Ptr[Size])
1302const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1304 const char *AfterEscape;
1307 }
else if (*
P ==
'?') {
1309 if (
P[1] !=
'?' ||
P[2] !=
'/')
1318 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1319 if (NewLineSize == 0)
return P;
1320 P = AfterEscape+NewLineSize;
1327 bool IncludeComments) {
1330 return std::nullopt;
1335 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
1338 bool InvalidTemp =
false;
1339 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1341 return std::nullopt;
1343 const char *TokenBegin =
File.data() + LocInfo.second;
1346 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1347 TokenBegin,
File.end());
1358 bool IncludeComments) {
1359 const auto StartOfFile =
SM.getLocForStartOfFile(
SM.getFileID(
Loc));
1360 while (
Loc != StartOfFile) {
1363 return std::nullopt;
1369 if (!Tok.
is(tok::comment) || IncludeComments) {
1373 return std::nullopt;
1382 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1384 if (!Tok || Tok->isNot(TKind))
1389 unsigned NumWhitespaceChars = 0;
1390 if (SkipTrailingWhitespaceAndNewLine) {
1391 const char *TokenEnd =
SM.getCharacterData(TokenLoc) + Tok->getLength();
1392 unsigned char C = *TokenEnd;
1395 NumWhitespaceChars++;
1399 if (
C ==
'\n' ||
C ==
'\r') {
1402 NumWhitespaceChars++;
1403 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1404 NumWhitespaceChars++;
1429 if (Ptr[0] ==
'\\') {
1435 return {
'\\', Size};
1439 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1445 Diag(Ptr, diag::backslash_newline_space);
1448 Size += EscapedNewLineSize;
1449 Ptr += EscapedNewLineSize;
1452 auto CharAndSize = getCharAndSizeSlow(Ptr, Tok);
1453 CharAndSize.Size += Size;
1458 return {
'\\',
Size};
1462 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1466 LangOpts.Trigraphs)) {
1472 if (
C ==
'\\')
goto Slash;
1478 return {*Ptr,
Size + 1u};
1492 if (Ptr[0] ==
'\\') {
1498 return {
'\\',
Size};
1501 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1503 Size += EscapedNewLineSize;
1504 Ptr += EscapedNewLineSize;
1507 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1508 CharAndSize.Size +=
Size;
1513 return {
'\\',
Size};
1517 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1523 if (
C ==
'\\')
goto Slash;
1529 return {*Ptr,
Size + 1u};
1537void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1538 BufferPtr = BufferStart + Offset;
1539 if (BufferPtr > BufferEnd)
1540 BufferPtr = BufferEnd;
1544 IsAtStartOfLine = StartOfLine;
1545 IsAtPhysicalStartOfLine = StartOfLine;
1549 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1551 return UnicodeWhitespaceChars.contains(Codepoint);
1556 llvm::raw_svector_ostream CharOS(CharBuf);
1557 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1568 bool IsStart,
bool &IsExtension) {
1569 static const llvm::sys::UnicodeCharSet MathStartChars(
1571 static const llvm::sys::UnicodeCharSet MathContinueChars(
1573 if (MathStartChars.contains(
C) ||
1574 (!IsStart && MathContinueChars.contains(
C))) {
1582 bool &IsExtension) {
1583 if (LangOpts.AsmPreprocessor) {
1585 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1587 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1592 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1594 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1598 }
else if (LangOpts.C11) {
1599 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1601 return C11AllowedIDChars.contains(
C);
1603 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1605 return C99AllowedIDChars.contains(
C);
1610 bool &IsExtension) {
1611 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1612 IsExtension =
false;
1613 if (LangOpts.AsmPreprocessor) {
1616 if (LangOpts.CPlusPlus || LangOpts.C23) {
1617 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1618 if (XIDStartChars.contains(
C))
1626 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1628 return !C11DisallowedInitialIDChars.contains(
C);
1630 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1632 return !C99DisallowedInitialIDChars.contains(
C);
1638 static const llvm::sys::UnicodeCharSet MathStartChars(
1640 static const llvm::sys::UnicodeCharSet MathContinueChars(
1643 (void)MathStartChars;
1644 (void)MathContinueChars;
1645 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1646 "Unexpected mathematical notation codepoint");
1662 CannotAppearInIdentifier = 0,
1663 CannotStartIdentifier
1666 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1668 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1670 if (!C99AllowedIDChars.contains(
C)) {
1673 << CannotAppearInIdentifier;
1674 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1677 << CannotStartIdentifier;
1689 struct HomoglyphPair {
1692 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1694 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1747 std::lower_bound(std::begin(SortedHomoglyphs),
1748 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1749 if (Homoglyph->Character ==
C) {
1750 if (Homoglyph->LooksLike) {
1751 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1772 if ((IsFirst && IsIDStart) || (!IsFirst && IsIDContinue))
1775 bool InvalidOnlyAtStart = IsFirst && !IsIDStart && IsIDContinue;
1777 if (!IsFirst || InvalidOnlyAtStart) {
1788bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1790 const char *UCNPtr = CurPtr +
Size;
1791 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1792 if (CodePoint == 0) {
1795 bool IsExtension =
false;
1820 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1821 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1824 while (CurPtr != UCNPtr)
1825 (void)getAndAdvanceChar(CurPtr,
Result);
1829bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1830 llvm::UTF32 CodePoint;
1835 unsigned FirstCodeUnitSize;
1836 getCharAndSize(CurPtr, FirstCodeUnitSize);
1837 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1838 const char *UnicodePtr = CharStart;
1840 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1841 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1842 &CodePoint, llvm::strictConversion);
1843 if (ConvResult != llvm::conversionOK)
1846 bool IsExtension =
false;
1875 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1876 CurPtr = UnicodePtr;
1880bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1881 const char *CurPtr) {
1882 bool IsExtension =
false;
1897 return LexIdentifierContinue(
Result, CurPtr);
1922 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1928 [[maybe_unused]]
const char *BufferEnd) {
1930 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1931 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1933 constexpr ssize_t BytesPerRegister = 16;
1935 __m128i AsciiIdentifierRangeV =
1938 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1945 if (Consumed == BytesPerRegister)
1951 unsigned char C = *CurPtr;
1957bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1966 unsigned char C = getCharAndSize(CurPtr, Size);
1968 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1973 if (!LangOpts.DollarIdents)
1977 Diag(CurPtr, diag::ext_dollar_in_identifier);
1978 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1981 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1989 const char *IdStart = BufferPtr;
1990 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
1991 Result.setRawIdentifierData(IdStart);
2007 if (isCodeCompletionPoint(CurPtr)) {
2009 Result.setKind(tok::code_completion);
2015 assert(*CurPtr == 0 &&
"Completion character must be 0");
2020 if (CurPtr < BufferEnd) {
2038bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2040 char C1 = CharAndSize1.Char;
2046 char C2 = CharAndSize2.Char;
2047 return (C2 ==
'x' || C2 ==
'X');
2053bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2055 char C = getCharAndSize(CurPtr, Size);
2058 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2060 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2064 C = getCharAndSize(CurPtr, Size);
2068 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2071 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2072 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2076 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2080 bool IsHexFloat =
true;
2081 if (!LangOpts.C99) {
2082 if (!isHexaLiteral(BufferPtr, LangOpts))
2084 else if (!LangOpts.CPlusPlus17 &&
2085 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2089 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2093 if (
C ==
'\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
2097 Diag(CurPtr, LangOpts.CPlusPlus
2098 ? diag::warn_cxx11_compat_digit_separator
2099 : diag::warn_c23_compat_digit_separator);
2100 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2101 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2102 return LexNumericConstant(
Result, CurPtr);
2107 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2108 return LexNumericConstant(
Result, CurPtr);
2110 return LexNumericConstant(
Result, CurPtr);
2113 const char *TokStart = BufferPtr;
2114 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2115 Result.setLiteralData(TokStart);
2121const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2122 bool IsStringLiteral) {
2123 assert(LangOpts.CPlusPlus);
2127 char C = getCharAndSize(CurPtr, Size);
2128 bool Consumed =
false;
2131 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2133 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2139 if (!LangOpts.CPlusPlus11) {
2142 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2143 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2154 bool IsUDSuffix =
false;
2157 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2161 const unsigned MaxStandardSuffixLength = 3;
2162 char Buffer[MaxStandardSuffixLength] = {
C };
2163 unsigned Consumed =
Size;
2166 auto [Next, NextSize] =
2170 const StringRef CompleteSuffix(Buffer, Chars);
2176 if (Chars == MaxStandardSuffixLength)
2180 Buffer[Chars++] = Next;
2181 Consumed += NextSize;
2187 Diag(CurPtr, LangOpts.MSVCCompat
2188 ? diag::ext_ms_reserved_user_defined_literal
2189 : diag::ext_reserved_user_defined_literal)
2194 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2199 C = getCharAndSize(CurPtr, Size);
2201 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2202 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2203 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2213bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2215 const char *AfterQuote = CurPtr;
2217 const char *NulCharacter =
nullptr;
2220 (Kind == tok::utf8_string_literal ||
2221 Kind == tok::utf16_string_literal ||
2222 Kind == tok::utf32_string_literal))
2223 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2224 : diag::warn_c99_compat_unicode_literal);
2226 char C = getAndAdvanceChar(CurPtr,
Result);
2231 C = getAndAdvanceChar(CurPtr,
Result);
2233 if (
C ==
'\n' ||
C ==
'\r' ||
2234 (
C == 0 && CurPtr-1 == BufferEnd)) {
2236 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2237 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2242 if (isCodeCompletionPoint(CurPtr-1)) {
2244 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2247 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2252 NulCharacter = CurPtr-1;
2254 C = getAndAdvanceChar(CurPtr,
Result);
2258 if (LangOpts.CPlusPlus)
2259 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2263 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2266 const char *TokStart = BufferPtr;
2267 FormTokenWithChars(
Result, CurPtr, Kind);
2268 Result.setLiteralData(TokStart);
2274bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2282 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2284 unsigned PrefixLen = 0;
2288 llvm::is_contained({
'$',
'@',
'`'}, CurPtr[PrefixLen])) {
2289 const char *Pos = &CurPtr[PrefixLen];
2290 Diag(Pos, LangOpts.CPlusPlus26
2291 ? diag::warn_cxx26_compat_raw_string_literal_character_set
2292 : diag::ext_cxx26_raw_string_literal_character_set)
2293 << StringRef(Pos, 1);
2299 if (CurPtr[PrefixLen] !=
'(') {
2301 const char *PrefixEnd = &CurPtr[PrefixLen];
2302 if (PrefixLen == 16) {
2303 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2304 }
else if (*PrefixEnd ==
'\n') {
2305 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2307 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2308 << StringRef(PrefixEnd, 1);
2320 if (
C == 0 && CurPtr-1 == BufferEnd) {
2326 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2331 const char *Prefix = CurPtr;
2332 CurPtr += PrefixLen + 1;
2339 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2340 CurPtr += PrefixLen + 1;
2343 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2345 Diag(BufferPtr, diag::err_unterminated_raw_string)
2346 << StringRef(Prefix, PrefixLen);
2347 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2353 if (LangOpts.CPlusPlus)
2354 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2357 const char *TokStart = BufferPtr;
2358 FormTokenWithChars(
Result, CurPtr, Kind);
2359 Result.setLiteralData(TokStart);
2365bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2367 const char *NulCharacter =
nullptr;
2368 const char *AfterLessPos = CurPtr;
2369 char C = getAndAdvanceChar(CurPtr,
Result);
2374 C = getAndAdvanceChar(CurPtr,
Result);
2377 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2380 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2385 if (isCodeCompletionPoint(CurPtr - 1)) {
2386 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2388 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2391 NulCharacter = CurPtr-1;
2393 C = getAndAdvanceChar(CurPtr,
Result);
2398 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2401 const char *TokStart = BufferPtr;
2402 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2403 Result.setLiteralData(TokStart);
2407void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2408 const char *CompletionPoint,
2411 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2412 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2413 auto Slash = PartialPath.find_last_of(SlashChars);
2415 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2416 const char *StartOfFilename =
2417 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2420 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2423 while (CompletionPoint < BufferEnd) {
2424 char Next = *(CompletionPoint + 1);
2425 if (Next == 0 || Next ==
'\r' || Next ==
'\n')
2428 if (Next == (IsAngled ?
'>' :
'"'))
2430 if (SlashChars.contains(Next))
2442bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2445 const char *NulCharacter =
nullptr;
2448 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2449 Diag(BufferPtr, LangOpts.CPlusPlus
2450 ? diag::warn_cxx98_compat_unicode_literal
2451 : diag::warn_c99_compat_unicode_literal);
2452 else if (Kind == tok::utf8_char_constant)
2453 Diag(BufferPtr, LangOpts.CPlusPlus
2454 ? diag::warn_cxx14_compat_u8_character_literal
2455 : diag::warn_c17_compat_u8_character_literal);
2458 char C = getAndAdvanceChar(CurPtr,
Result);
2461 Diag(BufferPtr, diag::ext_empty_character);
2462 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2469 C = getAndAdvanceChar(CurPtr,
Result);
2471 if (
C ==
'\n' ||
C ==
'\r' ||
2472 (
C == 0 && CurPtr-1 == BufferEnd)) {
2474 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2475 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2480 if (isCodeCompletionPoint(CurPtr-1)) {
2482 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2487 NulCharacter = CurPtr-1;
2489 C = getAndAdvanceChar(CurPtr,
Result);
2493 if (LangOpts.CPlusPlus)
2494 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2498 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2501 const char *TokStart = BufferPtr;
2502 FormTokenWithChars(
Result, CurPtr, Kind);
2503 Result.setLiteralData(TokStart);
2511bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr,
2512 bool &TokAtPhysicalStartOfLine) {
2516 unsigned char Char = *CurPtr;
2518 const char *lastNewLine =
nullptr;
2519 auto setLastNewLine = [&](
const char *Ptr) {
2525 setLastNewLine(CurPtr - 1);
2544 if (*CurPtr ==
'\n')
2545 setLastNewLine(CurPtr);
2552 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2554 IsAtStartOfLine =
true;
2555 IsAtPhysicalStartOfLine =
true;
2562 char PrevChar = CurPtr[-1];
2568 TokAtPhysicalStartOfLine =
true;
2570 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2587bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr,
2588 bool &TokAtPhysicalStartOfLine) {
2593 Diag(BufferPtr, diag::ext_line_comment);
2611 bool UnicodeDecodingAlreadyDiagnosed =
false;
2618 C !=
'\n' &&
C !=
'\r') {
2620 UnicodeDecodingAlreadyDiagnosed =
false;
2624 unsigned Length = llvm::getUTF8SequenceSize(
2625 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2628 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2629 UnicodeDecodingAlreadyDiagnosed =
true;
2632 UnicodeDecodingAlreadyDiagnosed =
false;
2638 const char *NextLine = CurPtr;
2641 const char *EscapePtr = CurPtr-1;
2642 bool HasSpace =
false;
2648 if (*EscapePtr ==
'\\')
2651 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2652 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2654 CurPtr = EscapePtr-2;
2660 Diag(EscapePtr, diag::backslash_newline_space);
2667 const char *OldPtr = CurPtr;
2670 C = getAndAdvanceChar(CurPtr,
Result);
2675 if (
C != 0 && CurPtr == OldPtr+1) {
2683 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2684 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2685 for (; OldPtr != CurPtr; ++OldPtr)
2686 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2690 const char *ForwardPtr = CurPtr;
2693 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2698 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2703 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2708 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2726 return SaveLineComment(
Result, CurPtr);
2740 NewLinePtr = CurPtr++;
2744 TokAtPhysicalStartOfLine =
true;
2753bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2756 FormTokenWithChars(
Result, CurPtr, tok::comment);
2768 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2772 Result.setKind(tok::comment);
2783 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2786 const char *TrigraphPos =
nullptr;
2788 const char *SpacePos =
nullptr;
2795 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2797 if (CurPtr[0] == CurPtr[1])
2811 if (*CurPtr ==
'\\') {
2813 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2815 TrigraphPos = CurPtr - 2;
2826 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2835 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2839 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2844 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2848 L->
Diag(SpacePos, diag::backslash_newline_space);
2854#include <emmintrin.h>
2869bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr,
2870 bool &TokAtPhysicalStartOfLine) {
2880 unsigned char C = getCharAndSize(CurPtr, CharSize);
2882 if (
C == 0 && CurPtr == BufferEnd+1) {
2884 Diag(BufferPtr, diag::err_unterminated_block_comment);
2890 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2907 bool UnicodeDecodingAlreadyDiagnosed =
false;
2912 if (CurPtr + 24 < BufferEnd &&
2917 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2922 if (
C ==
'/')
goto FoundSlash;
2926 while (CurPtr + 16 < BufferEnd) {
2928 if (LLVM_UNLIKELY(Mask != 0)) {
2938 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2944 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2945 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2946 0x80, 0x80, 0x80, 0x80};
2947 __vector
unsigned char Slashes = {
2948 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2949 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2951 while (CurPtr + 16 < BufferEnd) {
2953 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2955 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2962 while (CurPtr + 16 < BufferEnd) {
2963 bool HasNonASCII =
false;
2964 for (
unsigned I = 0; I < 16; ++I)
2965 HasNonASCII |= !
isASCII(CurPtr[I]);
2967 if (LLVM_UNLIKELY(HasNonASCII))
2970 bool HasSlash =
false;
2971 for (
unsigned I = 0; I < 16; ++I)
2972 HasSlash |= CurPtr[I] ==
'/';
2986 while (
C !=
'/' &&
C !=
'\0') {
2988 UnicodeDecodingAlreadyDiagnosed =
false;
2995 unsigned Length = llvm::getUTF8SequenceSize(
2996 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
2999 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
3000 UnicodeDecodingAlreadyDiagnosed =
true;
3002 UnicodeDecodingAlreadyDiagnosed =
false;
3003 CurPtr += Length - 1;
3010 if (CurPtr[-2] ==
'*')
3013 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
3015 LangOpts.Trigraphs)) {
3021 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
3026 Diag(CurPtr-1, diag::warn_nested_block_comment);
3028 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
3030 Diag(BufferPtr, diag::err_unterminated_block_comment);
3039 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3045 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3064 FormTokenWithChars(
Result, CurPtr, tok::comment);
3073 SkipWhitespace(
Result, CurPtr+1, TokAtPhysicalStartOfLine);
3091 "Must be in a preprocessing directive!");
3096 const char *CurPtr = BufferPtr;
3098 char Char = getAndAdvanceChar(CurPtr, Tmp);
3106 if (CurPtr-1 != BufferEnd) {
3107 if (isCodeCompletionPoint(CurPtr-1)) {
3123 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3124 BufferPtr = CurPtr-1;
3128 if (Tmp.
is(tok::code_completion)) {
3133 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3145bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3153 FormTokenWithChars(
Result, CurPtr, tok::eod);
3165 BufferPtr = BufferEnd;
3166 FormTokenWithChars(
Result, BufferEnd, tok::eof);
3186 diag::err_pp_unterminated_conditional);
3192 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
3197 if (LangOpts.CPlusPlus11) {
3201 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
3202 DiagID = diag::warn_cxx98_compat_no_newline_eof;
3204 DiagID = diag::warn_no_newline_eof;
3207 DiagID = diag::ext_no_newline_eof;
3210 Diag(BufferEnd, DiagID)
3224unsigned Lexer::isNextPPTokenLParen() {
3225 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3227 if (isDependencyDirectivesLexer()) {
3228 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3230 return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
3240 const char *TmpBufferPtr = BufferPtr;
3242 bool atStartOfLine = IsAtStartOfLine;
3243 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3244 bool leadingSpace = HasLeadingSpace;
3250 BufferPtr = TmpBufferPtr;
3252 HasLeadingSpace = leadingSpace;
3253 IsAtStartOfLine = atStartOfLine;
3254 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3259 if (Tok.
is(tok::eof))
3261 return Tok.
is(tok::l_paren);
3267 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3269 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3270 size_t Pos = RestOfBuffer.find(Terminator);
3271 while (Pos != StringRef::npos) {
3274 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3275 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3276 Pos = RestOfBuffer.find(Terminator);
3279 return RestOfBuffer.data()+Pos;
3288bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3290 if (CurPtr != BufferStart &&
3291 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3295 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3296 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3311 Diag(CurPtr, diag::err_conflict_marker);
3312 CurrentConflictMarkerState =
Kind;
3316 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3317 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3332bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3334 if (CurPtr != BufferStart &&
3335 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3344 for (
unsigned i = 1; i != 4; ++i)
3345 if (CurPtr[i] != CurPtr[0])
3352 CurrentConflictMarkerState)) {
3356 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3362 CurrentConflictMarkerState =
CMK_None;
3370 const char *BufferEnd) {
3371 if (CurPtr == BufferEnd)
3374 for (; CurPtr != BufferEnd; ++CurPtr) {
3375 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3381bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3382 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3388 const char *Start = CurPtr - 1;
3389 if (!LangOpts.AllowEditorPlaceholders)
3390 Diag(Start, diag::err_placeholder_in_source);
3392 FormTokenWithChars(
Result, End, tok::raw_identifier);
3393 Result.setRawIdentifierData(Start);
3400bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3409std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3410 const char *SlashLoc,
3413 char Kind = getCharAndSize(StartPtr, CharSize);
3414 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3416 unsigned NumHexDigits;
3419 else if (Kind ==
'U')
3422 bool Delimited =
false;
3423 bool FoundEndDelimiter =
false;
3427 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3429 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3430 return std::nullopt;
3433 const char *CurPtr = StartPtr + CharSize;
3434 const char *KindLoc = &CurPtr[-1];
3437 while (Count != NumHexDigits || Delimited) {
3438 char C = getCharAndSize(CurPtr, CharSize);
3439 if (!Delimited && Count == 0 &&
C ==
'{') {
3445 if (Delimited &&
C ==
'}') {
3447 FoundEndDelimiter =
true;
3451 unsigned Value = llvm::hexDigitValue(
C);
3456 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3457 << StringRef(KindLoc, 1);
3458 return std::nullopt;
3461 if (CodePoint & 0xF000'0000) {
3463 Diag(KindLoc, diag::err_escape_too_large) << 0;
3464 return std::nullopt;
3475 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3476 : diag::warn_ucn_escape_no_digits)
3477 << StringRef(KindLoc, 1);
3478 return std::nullopt;
3481 if (Delimited && Kind ==
'U') {
3483 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3484 return std::nullopt;
3487 if (!Delimited && Count != NumHexDigits) {
3489 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3491 if (Count == 4 && NumHexDigits == 8) {
3493 Diag(KindLoc, diag::note_ucn_four_not_eight)
3497 return std::nullopt;
3500 if (Delimited &&
PP) {
3502 ? diag::warn_cxx23_delimited_escape_sequence
3503 : diag::ext_delimited_escape_sequence)
3512 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3515 while (StartPtr != CurPtr)
3516 (void)getAndAdvanceChar(StartPtr, *
Result);
3523std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3524 const char *SlashLoc,
3529 char C = getCharAndSize(StartPtr, CharSize);
3530 assert(
C ==
'N' &&
"expected \\N{...}");
3532 const char *CurPtr = StartPtr + CharSize;
3533 const char *KindLoc = &CurPtr[-1];
3535 C = getCharAndSize(CurPtr, CharSize);
3538 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3539 return std::nullopt;
3542 const char *StartName = CurPtr;
3543 bool FoundEndDelimiter =
false;
3546 C = getCharAndSize(CurPtr, CharSize);
3549 FoundEndDelimiter =
true;
3555 Buffer.push_back(
C);
3558 if (!FoundEndDelimiter || Buffer.empty()) {
3560 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3561 : diag::warn_delimited_ucn_incomplete)
3562 << StringRef(KindLoc, 1);
3563 return std::nullopt;
3566 StringRef Name(Buffer.data(), Buffer.size());
3567 std::optional<char32_t> Match =
3568 llvm::sys::unicode::nameToCodepointStrict(Name);
3569 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3571 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3573 Diag(StartName, diag::err_invalid_ucn_name)
3574 << StringRef(Buffer.data(), Buffer.size())
3577 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3588 if (Diagnose && Match)
3590 ? diag::warn_cxx23_delimited_escape_sequence
3591 : diag::ext_delimited_escape_sequence)
3598 if (LooseMatch && Diagnose)
3599 Match = LooseMatch->CodePoint;
3606 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3609 while (StartPtr != CurPtr)
3610 (void)getAndAdvanceChar(StartPtr, *
Result);
3614 return Match ? std::optional<uint32_t>(*Match) :
std::nullopt;
3617uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3621 std::optional<uint32_t> CodePointOpt;
3622 char Kind = getCharAndSize(StartPtr, CharSize);
3623 if (Kind ==
'u' || Kind ==
'U')
3624 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3625 else if (Kind ==
'N')
3626 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3631 uint32_t CodePoint = *CodePointOpt;
3634 if (LangOpts.AsmPreprocessor)
3653 if (CodePoint < 0xA0) {
3657 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3658 Diag(BufferPtr, diag::err_ucn_control_character);
3660 char C =
static_cast<char>(CodePoint);
3661 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3666 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3671 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3672 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3674 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3682bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3683 const char *CurPtr) {
3686 Diag(BufferPtr, diag::ext_unicode_whitespace)
3695void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3696 IsAtStartOfLine =
Result.isAtStartOfLine();
3697 HasLeadingSpace =
Result.hasLeadingSpace();
3698 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3703 assert(!isDependencyDirectivesLexer());
3709 if (IsAtStartOfLine) {
3711 IsAtStartOfLine =
false;
3714 if (HasLeadingSpace) {
3716 HasLeadingSpace =
false;
3719 if (HasLeadingEmptyMacro) {
3721 HasLeadingEmptyMacro =
false;
3724 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3725 IsAtPhysicalStartOfLine =
false;
3728 bool returnedToken = LexTokenInternal(
Result, atPhysicalStartOfLine);
3730 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3731 return returnedToken;
3739bool Lexer::LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine) {
3741 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3742 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3745 const char *CurPtr = BufferPtr;
3757 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3766 unsigned SizeTmp, SizeTmp2;
3769 char Char = getAndAdvanceChar(CurPtr,
Result);
3773 NewLinePtr =
nullptr;
3778 if (CurPtr-1 == BufferEnd)
3779 return LexEndOfFile(
Result, CurPtr-1);
3782 if (isCodeCompletionPoint(CurPtr-1)) {
3785 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3790 Diag(CurPtr-1, diag::null_in_file);
3792 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3801 if (LangOpts.MicrosoftExt) {
3803 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3804 return LexEndOfFile(
Result, CurPtr-1);
3808 Kind = tok::unknown;
3812 if (CurPtr[0] ==
'\n')
3813 (void)getAndAdvanceChar(CurPtr,
Result);
3827 IsAtStartOfLine =
true;
3828 IsAtPhysicalStartOfLine =
true;
3829 NewLinePtr = CurPtr - 1;
3838 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3848 SkipHorizontalWhitespace:
3850 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3859 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3860 if (SkipLineComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3862 goto SkipIgnoredUnits;
3864 if (SkipBlockComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3866 goto SkipIgnoredUnits;
3868 goto SkipHorizontalWhitespace;
3876 case '0':
case '1':
case '2':
case '3':
case '4':
3877 case '5':
case '6':
case '7':
case '8':
case '9':
3880 return LexNumericConstant(
Result, CurPtr);
3889 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3890 Char = getCharAndSize(CurPtr, SizeTmp);
3894 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3895 tok::utf16_string_literal);
3899 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3900 tok::utf16_char_constant);
3903 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3904 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3905 return LexRawStringLiteral(
Result,
3906 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3908 tok::utf16_string_literal);
3911 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3915 return LexStringLiteral(
Result,
3916 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3918 tok::utf8_string_literal);
3919 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3920 return LexCharConstant(
3921 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3923 tok::utf8_char_constant);
3925 if (Char2 ==
'R' && LangOpts.RawStringLiterals) {
3927 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3930 return LexRawStringLiteral(
Result,
3931 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3934 tok::utf8_string_literal);
3941 return LexIdentifierContinue(
Result, CurPtr);
3947 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3948 Char = getCharAndSize(CurPtr, SizeTmp);
3952 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3953 tok::utf32_string_literal);
3957 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3958 tok::utf32_char_constant);
3961 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3962 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3963 return LexRawStringLiteral(
Result,
3964 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3966 tok::utf32_string_literal);
3970 return LexIdentifierContinue(
Result, CurPtr);
3976 if (LangOpts.RawStringLiterals) {
3977 Char = getCharAndSize(CurPtr, SizeTmp);
3980 return LexRawStringLiteral(
Result,
3981 ConsumeChar(CurPtr, SizeTmp,
Result),
3982 tok::string_literal);
3986 return LexIdentifierContinue(
Result, CurPtr);
3991 Char = getCharAndSize(CurPtr, SizeTmp);
3995 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3996 tok::wide_string_literal);
3999 if (LangOpts.RawStringLiterals && Char ==
'R' &&
4000 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
4001 return LexRawStringLiteral(
Result,
4002 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4004 tok::wide_string_literal);
4008 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4009 tok::wide_char_constant);
4014 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
4015 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
4016 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
4017 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
4018 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
4019 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
4020 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
4021 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
4025 return LexIdentifierContinue(
Result, CurPtr);
4028 if (LangOpts.DollarIdents) {
4030 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4033 return LexIdentifierContinue(
Result, CurPtr);
4036 Kind = tok::unknown;
4043 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4049 return LexStringLiteral(
Result, CurPtr,
4051 : tok::string_literal);
4055 Kind = tok::question;
4058 Kind = tok::l_square;
4061 Kind = tok::r_square;
4064 Kind = tok::l_paren;
4067 Kind = tok::r_paren;
4070 Kind = tok::l_brace;
4073 Kind = tok::r_brace;
4076 Char = getCharAndSize(CurPtr, SizeTmp);
4077 if (Char >=
'0' && Char <=
'9') {
4081 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4082 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4083 Kind = tok::periodstar;
4085 }
else if (Char ==
'.' &&
4086 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4087 Kind = tok::ellipsis;
4088 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4095 Char = getCharAndSize(CurPtr, SizeTmp);
4098 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4099 }
else if (Char ==
'=') {
4100 Kind = tok::ampequal;
4101 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4107 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4108 Kind = tok::starequal;
4109 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4115 Char = getCharAndSize(CurPtr, SizeTmp);
4117 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4118 Kind = tok::plusplus;
4119 }
else if (Char ==
'=') {
4120 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4121 Kind = tok::plusequal;
4127 Char = getCharAndSize(CurPtr, SizeTmp);
4129 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4130 Kind = tok::minusminus;
4131 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4132 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4133 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4135 Kind = tok::arrowstar;
4136 }
else if (Char ==
'>') {
4137 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4139 }
else if (Char ==
'=') {
4140 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4141 Kind = tok::minusequal;
4150 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4151 Kind = tok::exclaimequal;
4152 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4154 Kind = tok::exclaim;
4159 Char = getCharAndSize(CurPtr, SizeTmp);
4169 bool TreatAsComment =
4170 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4171 if (!TreatAsComment)
4173 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4175 if (TreatAsComment) {
4176 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4177 TokAtPhysicalStartOfLine))
4183 goto SkipIgnoredUnits;
4188 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4189 TokAtPhysicalStartOfLine))
4198 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4199 Kind = tok::slashequal;
4205 Char = getCharAndSize(CurPtr, SizeTmp);
4207 Kind = tok::percentequal;
4208 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4209 }
else if (LangOpts.Digraphs && Char ==
'>') {
4210 Kind = tok::r_brace;
4211 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4212 }
else if (LangOpts.Digraphs && Char ==
':') {
4213 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4214 Char = getCharAndSize(CurPtr, SizeTmp);
4215 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4216 Kind = tok::hashhash;
4217 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4219 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4220 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4222 Diag(BufferPtr, diag::ext_charize_microsoft);
4229 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4230 goto HandleDirective;
4235 Kind = tok::percent;
4239 Char = getCharAndSize(CurPtr, SizeTmp);
4241 return LexAngledStringLiteral(
Result, CurPtr);
4242 }
else if (Char ==
'<') {
4243 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4245 Kind = tok::lesslessequal;
4246 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4248 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4252 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4256 }
else if (LangOpts.CUDA && After ==
'<') {
4257 Kind = tok::lesslessless;
4258 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4261 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4262 Kind = tok::lessless;
4264 }
else if (Char ==
'=') {
4265 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4267 if (LangOpts.CPlusPlus20) {
4269 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4270 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4272 Kind = tok::spaceship;
4278 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4283 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4284 Kind = tok::lessequal;
4285 }
else if (LangOpts.Digraphs && Char ==
':') {
4286 if (LangOpts.CPlusPlus11 &&
4287 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4294 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4295 if (After !=
':' && After !=
'>') {
4298 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4303 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4304 Kind = tok::l_square;
4305 }
else if (LangOpts.Digraphs && Char ==
'%') {
4306 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4307 Kind = tok::l_brace;
4308 }
else if (Char ==
'#' && SizeTmp == 1 &&
4309 lexEditorPlaceholder(
Result, CurPtr)) {
4316 Char = getCharAndSize(CurPtr, SizeTmp);
4318 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4319 Kind = tok::greaterequal;
4320 }
else if (Char ==
'>') {
4321 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4323 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4325 Kind = tok::greatergreaterequal;
4326 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4330 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4333 }
else if (LangOpts.CUDA && After ==
'>') {
4334 Kind = tok::greatergreatergreater;
4335 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4338 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4339 Kind = tok::greatergreater;
4342 Kind = tok::greater;
4346 Char = getCharAndSize(CurPtr, SizeTmp);
4348 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4349 Kind = tok::caretequal;
4351 if (LangOpts.OpenCL && Char ==
'^')
4352 Diag(CurPtr, diag::err_opencl_logical_exclusive_or);
4357 Char = getCharAndSize(CurPtr, SizeTmp);
4359 Kind = tok::pipeequal;
4360 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4361 }
else if (Char ==
'|') {
4363 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4365 Kind = tok::pipepipe;
4366 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4372 Char = getCharAndSize(CurPtr, SizeTmp);
4373 if (LangOpts.Digraphs && Char ==
'>') {
4374 Kind = tok::r_square;
4375 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4376 }
else if (Char ==
':') {
4377 Kind = tok::coloncolon;
4378 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4387 Char = getCharAndSize(CurPtr, SizeTmp);
4390 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4393 Kind = tok::equalequal;
4394 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4403 Char = getCharAndSize(CurPtr, SizeTmp);
4405 Kind = tok::hashhash;
4406 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4407 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4410 Diag(BufferPtr, diag::ext_charize_microsoft);
4411 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4417 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4418 goto HandleDirective;
4426 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
4429 Kind = tok::unknown;
4434 if (!LangOpts.AsmPreprocessor) {
4435 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4436 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4437 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4445 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4449 Kind = tok::unknown;
4454 Kind = tok::unknown;
4458 llvm::UTF32 CodePoint;
4463 llvm::ConversionResult Status =
4464 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4465 (
const llvm::UTF8 *)BufferEnd,
4467 llvm::strictConversion);
4468 if (Status == llvm::conversionOK) {
4469 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4470 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4477 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4483 Kind = tok::unknown;
4490 Diag(CurPtr, diag::err_invalid_utf8);
4492 BufferPtr = CurPtr+1;
4504 FormTokenWithChars(
Result, CurPtr, Kind);
4510 FormTokenWithChars(
Result, CurPtr, tok::hash);
4525const char *Lexer::convertDependencyDirectiveToken(
4527 const char *TokPtr = BufferStart + DDTok.
Offset;
4533 BufferPtr = TokPtr + DDTok.
Length;
4537bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4538 assert(isDependencyDirectivesLexer());
4540 using namespace dependency_directives_scan;
4542 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4543 if (DepDirectives.front().Kind == pp_eof)
4544 return LexEndOfFile(
Result, BufferEnd);
4545 if (DepDirectives.front().Kind == tokens_present_before_eof)
4547 NextDepDirectiveTokenIndex = 0;
4548 DepDirectives = DepDirectives.drop_front();
4552 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4553 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4559 BufferPtr = BufferStart + DDTok.
Offset;
4560 LexAngledStringLiteral(
Result, BufferPtr + 1);
4561 if (
Result.isNot(tok::header_name))
4566 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4567 if (BufferStart + NextTok.
Offset >= BufferPtr)
4569 ++NextDepDirectiveTokenIndex;
4574 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4576 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4580 if (
Result.is(tok::raw_identifier)) {
4581 Result.setRawIdentifierData(TokPtr);
4589 if (
Result.isLiteral()) {
4590 Result.setLiteralData(TokPtr);
4593 if (
Result.is(tok::colon)) {
4595 if (*BufferPtr ==
':') {
4596 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4598 ++NextDepDirectiveTokenIndex;
4599 Result.setKind(tok::coloncolon);
4609bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4610 assert(isDependencyDirectivesLexer());
4612 using namespace dependency_directives_scan;
4615 unsigned NestedIfs = 0;
4617 DepDirectives = DepDirectives.drop_front();
4618 switch (DepDirectives.front().Kind) {
4620 llvm_unreachable(
"unexpected 'pp_none'");
4661 NextDepDirectiveTokenIndex = 0;
4662 return LexEndOfFile(
Result, BufferEnd);
4667 DepDirectives.front().Tokens.front();
4668 assert(DDTok.
is(tok::hash));
4669 NextDepDirectiveTokenIndex = 1;
4671 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?...
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)
Finds the token that comes before the given location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
void ExitTopLevelConditional()
Called when the lexer exits the top-level conditional.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool isRecordingPreamble() const
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
SourceManager & getSourceManager() const
EmptylineHandler * getEmptylineHandler() const
bool getCommentRetentionState() const
bool hadModuleLoaderFatalFailure() const
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
IdentifierTable & getIdentifierTable()
const LangOptions & getLangOpts() const
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
DiagnosticsEngine & getDiagnostics() const
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
A trivial tuple used to represent a source range.
void setBegin(SourceLocation b)
SourceLocation getEnd() const
SourceLocation getBegin() const
void setEnd(SourceLocation e)
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
bool hasUCN() const
Returns true if this token contains a universal character name.
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
const FunctionProtoType * T
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const