1 год назад · 08f24551ec
--- a/toolchain/lex/lex.cpp
+++ b/toolchain/lex/lex.cpp
@@ -1396,13 +1396,12 @@ auto Lexer::Finalize() -> void {
 
				   // many identifiers to fit an `IdentifierId` into a `token_payload_`, and
			
 
				   // likewise for `IntId` and so on. If we start adding any of those IDs prior
			
 
				   // to lexing, we may need to also limit the number of those IDs here.
			
 
				-  if (buffer_.token_infos_.size() > TokenizedBuffer::MaxTokens) {
			
 
				+  if (buffer_.token_infos_.size() > TokenIndex::Max) {
			
 
				     CARBON_DIAGNOSTIC(TooManyTokens, Error,
			
 
				                       "too many tokens in source file; try splitting into "
			
 
				                       "multiple source files");
			
 
				     // Subtract one to leave room for the `FileEnd` token.
			
 
				-    token_emitter_.Emit(TokenIndex(TokenizedBuffer::MaxTokens - 1),
			
 
				-                        TooManyTokens);
			
 
				+    token_emitter_.Emit(TokenIndex(TokenIndex::Max - 1), TooManyTokens);
			
 
				     // TODO: Convert tokens after the token limit to error tokens to avoid
			
 
				     // misinterpretation by consumers of the tokenized buffer.
			
 
				   }
			
--- a/toolchain/lex/token_index.h
+++ b/toolchain/lex/token_index.h
@@ -24,6 +24,12 @@ namespace Carbon::Lex {
 
				 //
			
 
				 // All other APIs to query a `TokenIndex` are on the `TokenizedBuffer`.
			
 
				 struct TokenIndex : public IndexBase<TokenIndex> {
			
 
				+  // The number of bits which must be allotted for `TokenIndex`.
			
 
				+  static constexpr int Bits = 23;
			
 
				+  // The maximum number of tokens that can be stored, including the FileStart
			
 
				+  // and FileEnd tokens.
			
 
				+  static constexpr int Max = 1 << Bits;
			
 
				+
			
 
				   static constexpr llvm::StringLiteral Label = "token";
			
 
				   static const TokenIndex Invalid;
			
 
				   // Comments aren't tokenized, so this is the first token after FileStart.
			
--- a/toolchain/lex/tokenized_buffer.h
+++ b/toolchain/lex/tokenized_buffer.h
@@ -85,10 +85,6 @@ class TokenDiagnosticConverter : public DiagnosticConverter<TokenIndex> {
 
				 // `HasError` returning true.
			
 
				 class TokenizedBuffer : public Printable<TokenizedBuffer> {
			
 
				  public:
			
 
				-  // The maximum number of tokens that can be stored in the buffer, including
			
 
				-  // the FileStart and FileEnd tokens.
			
 
				-  static constexpr int MaxTokens = 1 << 23;
			
 
				-
			
 
				   // A comment, which can be a block of lines.
			
 
				   //
			
 
				   // This is the API version of `CommentData`.
			
@@ -281,7 +277,7 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
 
				   class TokenInfo {
			
 
				    public:
			
 
				     // The kind for this token.
			
 
				-    auto kind() const -> TokenKind { return TokenKind::Make(kind_); }
			
 
				+    auto kind() const -> TokenKind { return kind_; }
			
 
				 
			
 
				     // Whether this token is preceded by whitespace. We only store the preceding
			
 
				     // state, and look at the next token to check for trailing whitespace.
			
@@ -364,6 +360,7 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
 
				 
			
 
				     // Make sure we have enough payload bits to represent token-associated IDs.
			
 
				     static_assert(PayloadBits >= IntId::TokenIdBits);
			
 
				+    static_assert(PayloadBits >= TokenIndex::Bits);
			
 
				 
			
 
				     // Constructor for a TokenKind that carries no payload, or where the payload
			
 
				     // will be set later.
			
@@ -397,13 +394,12 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
 
				     // Payload values are typically ID types for which we create at most one per
			
 
				     // token, so we ensure that `token_payload_` is large enough to fit any
			
 
				     // token index. Stores to this field may overflow, but we produce an error
			
 
				-    // in `Lexer::Finalize` if the file has more than `MaxTokens` tokens, so
			
 
				-    // this value never overflows if lexing succeeds.
			
 
				-    TokenKind::RawEnumType kind_ : sizeof(TokenKind) * 8;
			
 
				+    // in `Lexer::Finalize` if the file has more than `TokenIndex::Max` tokens,
			
 
				+    // so this value never overflows if lexing succeeds.
			
 
				+    TokenKind kind_;
			
 
				+    static_assert(sizeof(kind_) == 1, "TokenKind must pack to 8 bits");
			
 
				     bool has_leading_space_ : 1;
			
 
				     unsigned token_payload_ : PayloadBits;
			
 
				-    static_assert(MaxTokens <= 1 << PayloadBits,
			
 
				-                  "Not enough payload bits to store a token index");
			
 
				 
			
 
				     // Separate storage for the byte offset, this is hot while lexing but then
			
 
				     // generally cold.
			
--- a/toolchain/lex/tokenized_buffer_test.cpp
+++ b/toolchain/lex/tokenized_buffer_test.cpp
@@ -1160,9 +1160,9 @@ TEST_F(LexerTest, DiagnosticFileTooLarge) {
 
				     input += "{}\n";
			
 
				   }
			
 
				   EXPECT_CALL(consumer,
			
 
				-              HandleDiagnostic(IsSingleDiagnostic(
			
 
				-                  DiagnosticKind::TooManyTokens, DiagnosticLevel::Error,
			
 
				-                  TokenizedBuffer::MaxTokens / 2, 1, _)));
			
 
				+              HandleDiagnostic(IsSingleDiagnostic(DiagnosticKind::TooManyTokens,
			
 
				+                                                  DiagnosticLevel::Error,
			
 
				+                                                  TokenIndex::Max / 2, 1, _)));
			
 
				   compile_helper_.GetTokenizedBuffer(input, &consumer);
			
 
				 }
			
 
				 
			
--- a/toolchain/parse/context.cpp
+++ b/toolchain/parse/context.cpp
@@ -45,8 +45,8 @@ auto Context::ReplacePlaceholderNode(int32_t position, NodeKind kind,
 
				   CARBON_CHECK(position >= 0 && position < tree_->size(),
			
 
				                "position: {0} size: {1}", position, tree_->size());
			
 
				   auto* node_impl = &tree_->node_impls_[position];
			
 
				-  CARBON_CHECK(node_impl->kind == NodeKind::Placeholder);
			
 
				-  *node_impl = {.kind = kind, .has_error = has_error, .token = token};
			
 
				+  CARBON_CHECK(node_impl->kind() == NodeKind::Placeholder);
			
 
				+  *node_impl = Tree::NodeImpl(kind, has_error, token);
			
 
				 }
			
 
				 
			
 
				 auto Context::ConsumeAndAddOpenParen(Lex::TokenIndex default_token,
			
--- a/toolchain/parse/context.h
+++ b/toolchain/parse/context.h
@@ -107,8 +107,7 @@ class Context {
 
				                                kind != NodeKind::InvalidParseStart &&
			
 
				                                kind != NodeKind::InvalidParseSubtree),
			
 
				                  "{0} nodes must always have an error", kind);
			
 
				-    tree_->node_impls_.push_back(
			
 
				-        {.kind = kind, .has_error = has_error, .token = token});
			
 
				+    tree_->node_impls_.push_back(Tree::NodeImpl(kind, has_error, token));
			
 
				   }
			
 
				 
			
 
				   // Adds an invalid parse node.
			
--- a/toolchain/parse/tree.cpp
+++ b/toolchain/parse/tree.cpp
@@ -23,7 +23,7 @@ auto Tree::postorder() const -> llvm::iterator_range<PostorderIterator> {
 
				 
			
 
				 auto Tree::node_token(NodeId n) const -> Lex::TokenIndex {
			
 
				   CARBON_CHECK(n.is_valid());
			
 
				-  return node_impls_[n.index].token;
			
 
				+  return node_impls_[n.index].token();
			
 
				 }
			
 
				 
			
 
				 auto Tree::Print(llvm::raw_ostream& output) const -> void {
			
--- a/toolchain/parse/tree.h
+++ b/toolchain/parse/tree.h
@@ -120,13 +120,13 @@ class Tree : public Printable<Tree> {
 
				   // full expected structure of the grammar.
			
 
				   auto node_has_error(NodeId n) const -> bool {
			
 
				     CARBON_DCHECK(n.is_valid());
			
 
				-    return node_impls_[n.index].has_error;
			
 
				+    return node_impls_[n.index].has_error();
			
 
				   }
			
 
				 
			
 
				   // Returns the kind of the given parse tree node.
			
 
				   auto node_kind(NodeId n) const -> NodeKind {
			
 
				     CARBON_DCHECK(n.is_valid());
			
 
				-    return node_impls_[n.index].kind;
			
 
				+    return node_impls_[n.index].kind();
			
 
				   }
			
 
				 
			
 
				   // Returns the token the given parse tree node models.
			
@@ -201,12 +201,24 @@ class Tree : public Printable<Tree> {
 
				 
			
 
				   // The in-memory representation of data used for a particular node in the
			
 
				   // tree.
			
 
				-  struct NodeImpl {
			
 
				-    // The kind of this node. Note that this is only a single byte.
			
 
				-    NodeKind kind;
			
 
				+  class NodeImpl {
			
 
				+   public:
			
 
				+    explicit NodeImpl(NodeKind kind, bool has_error, Lex::TokenIndex token)
			
 
				+        : kind_(kind), has_error_(has_error), token_index_(token.index) {
			
 
				+      CARBON_DCHECK(token.index >= 0, "Unexpected token for node: {0}", token);
			
 
				+    }
			
 
				 
			
 
				-    // We have 3 bytes of padding here that we can pack flags or other compact
			
 
				-    // data into.
			
 
				+    auto kind() const -> NodeKind { return kind_; }
			
 
				+    auto set_kind(NodeKind kind) -> void { kind_ = kind; }
			
 
				+    auto has_error() const -> bool { return has_error_; }
			
 
				+    auto token() const -> Lex::TokenIndex {
			
 
				+      return Lex::TokenIndex(token_index_);
			
 
				+    }
			
 
				+
			
 
				+   private:
			
 
				+    // The kind of this node. Note that this is only a single byte.
			
 
				+    NodeKind kind_;
			
 
				+    static_assert(sizeof(kind_) == 1, "TokenKind must pack to 8 bits");
			
 
				 
			
 
				     // Whether this node is or contains a parse error.
			
 
				     //
			
@@ -221,20 +233,20 @@ class Tree : public Printable<Tree> {
 
				     // optional (and will depend on the particular parse implementation
			
 
				     // strategy). The goal is that you can rely on grammar-based structural
			
 
				     // invariants *until* you encounter a node with this set.
			
 
				-    bool has_error;
			
 
				+    bool has_error_ : 1;
			
 
				 
			
 
				     // The token root of this node.
			
 
				-    Lex::TokenIndex token;
			
 
				+    unsigned token_index_ : Lex::TokenIndex::Bits;
			
 
				   };
			
 
				 
			
 
				-  static_assert(sizeof(NodeImpl) == 8,
			
 
				+  static_assert(sizeof(NodeImpl) == 4,
			
 
				                 "Unexpected size of node implementation!");
			
 
				 
			
 
				   // Sets the kind of a node. This is intended to allow putting the tree into a
			
 
				   // state where verification can fail, in order to make the failure path of
			
 
				   // `Verify` testable.
			
 
				   auto SetNodeKindForTesting(NodeId node_id, NodeKind kind) -> void {
			
 
				-    node_impls_[node_id.index].kind = kind;
			
 
				+    node_impls_[node_id.index].set_kind(kind);
			
 
				   }
			
 
				 
			
 
				   // Depth-first postorder sequence of node implementation data.