token_kind.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_LEX_TOKEN_KIND_H_
  5. #define CARBON_TOOLCHAIN_LEX_TOKEN_KIND_H_
  6. #include <cstdint>
  7. #include "common/check.h"
  8. #include "common/enum_base.h"
  9. #include "llvm/ADT/ArrayRef.h"
  10. #include "llvm/ADT/StringRef.h"
  11. #include "llvm/Support/FormatVariadicDetails.h"
  12. namespace Carbon::Lex {
  13. CARBON_DEFINE_RAW_ENUM_CLASS(TokenKind, uint8_t) {
  14. #define CARBON_TOKEN(TokenName) CARBON_RAW_ENUM_ENUMERATOR(TokenName)
  15. #include "toolchain/lex/token_kind.def"
  16. };
  17. class TokenKind : public CARBON_ENUM_BASE(TokenKind) {
  18. public:
  19. #define CARBON_TOKEN(TokenName) CARBON_ENUM_CONSTANT_DECL(TokenName)
  20. #include "toolchain/lex/token_kind.def"
  21. // An array of all the keyword tokens.
  22. static const llvm::ArrayRef<TokenKind> KeywordTokens;
  23. using EnumBase::EnumBase;
  24. // Test whether this kind of token is a simple symbol sequence (punctuation,
  25. // not letters) that appears directly in the source text and can be
  26. // unambiguously lexed with `starts_with` logic. While these may appear
  27. // inside of other tokens, outside of the contents of other tokens they
  28. // don't require any specific characters before or after to distinguish them
  29. // in the source. Returns false otherwise.
  30. auto is_symbol() const -> bool { return IsSymbol[AsInt()]; }
  31. // Test whether this kind of token is a grouping symbol (part of an opening
  32. // and closing pair that must always be matched in the token stream).
  33. auto is_grouping_symbol() const -> bool { return IsGroupingSymbol[AsInt()]; }
  34. // Test whether this kind of token is an opening symbol for a group.
  35. auto is_opening_symbol() const -> bool { return IsOpeningSymbol[AsInt()]; }
  36. // Returns the associated closing symbol for an opening symbol.
  37. //
  38. // The token kind must be an opening symbol.
  39. auto closing_symbol() const -> TokenKind {
  40. auto result = ClosingSymbol[AsInt()];
  41. CARBON_DCHECK(result != Error) << "Only opening symbols are valid!";
  42. return result;
  43. }
  44. // Test whether this kind of token is a closing symbol for a group.
  45. auto is_closing_symbol() const -> bool { return IsClosingSymbol[AsInt()]; }
  46. // Returns the associated opening symbol for a closing symbol.
  47. //
  48. // The token kind must be a closing symbol.
  49. auto opening_symbol() const -> TokenKind {
  50. auto result = OpeningSymbol[AsInt()];
  51. CARBON_DCHECK(result != Error) << "Only closing symbols are valid!";
  52. return result;
  53. }
  54. // Test whether this kind of token is a one-character symbol whose character
  55. // is not part of any other symbol.
  56. auto is_one_char_symbol() const -> bool { return IsOneCharSymbol[AsInt()]; };
  57. // Test whether this kind of token is a keyword.
  58. auto is_keyword() const -> bool { return IsKeyword[AsInt()]; };
  59. // Test whether this kind of token is a sized type literal.
  60. auto is_sized_type_literal() const -> bool {
  61. return *this == TokenKind::IntTypeLiteral ||
  62. *this == TokenKind::UnsignedIntTypeLiteral ||
  63. *this == TokenKind::FloatTypeLiteral;
  64. };
  65. // If this token kind has a fixed spelling when in source code, returns it.
  66. // Otherwise returns an empty string.
  67. auto fixed_spelling() const -> llvm::StringLiteral {
  68. return FixedSpelling[AsInt()];
  69. };
  70. // Get the expected number of parse tree nodes that will be created for this
  71. // token.
  72. auto expected_parse_tree_size() const -> int {
  73. return ExpectedParseTreeSize[AsInt()];
  74. }
  75. // Test whether this token kind is in the provided list.
  76. auto IsOneOf(std::initializer_list<TokenKind> kinds) const -> bool {
  77. for (TokenKind kind : kinds) {
  78. if (*this == kind) {
  79. return true;
  80. }
  81. }
  82. return false;
  83. }
  84. private:
  85. static const TokenKind KeywordTokensStorage[];
  86. static const bool IsSymbol[];
  87. static const bool IsGroupingSymbol[];
  88. static const bool IsOpeningSymbol[];
  89. static const TokenKind ClosingSymbol[];
  90. static const bool IsClosingSymbol[];
  91. static const TokenKind OpeningSymbol[];
  92. static const bool IsOneCharSymbol[];
  93. static const bool IsKeyword[];
  94. static const llvm::StringLiteral FixedSpelling[];
  95. static const int8_t ExpectedParseTreeSize[];
  96. };
  97. #define CARBON_TOKEN(TokenName) \
  98. CARBON_ENUM_CONSTANT_DEFINITION(TokenKind, TokenName)
  99. #include "toolchain/lex/token_kind.def"
  100. constexpr TokenKind TokenKind::KeywordTokensStorage[] = {
  101. #define CARBON_KEYWORD_TOKEN(TokenName, Spelling) TokenKind::TokenName,
  102. #include "toolchain/lex/token_kind.def"
  103. };
  104. constexpr llvm::ArrayRef<TokenKind> TokenKind::KeywordTokens =
  105. KeywordTokensStorage;
  106. } // namespace Carbon::Lex
  107. namespace llvm {
  108. // We use formatv primarily for diagnostics. In these cases, it's expected that
  109. // the spelling in source code should be used.
  110. template <>
  111. struct format_provider<Carbon::Lex::TokenKind> {
  112. static void format(const Carbon::Lex::TokenKind& kind, raw_ostream& out,
  113. StringRef /*style*/) {
  114. auto spelling = kind.fixed_spelling();
  115. if (!spelling.empty()) {
  116. out << spelling;
  117. } else {
  118. // Default to the name if there's no fixed spelling.
  119. out << kind;
  120. }
  121. }
  122. };
  123. } // namespace llvm
  124. #endif // CARBON_TOOLCHAIN_LEX_TOKEN_KIND_H_