9 месяцев назад · cae8aa3adf
--- a/toolchain/diagnostics/diagnostic_kind.def
+++ b/toolchain/diagnostics/diagnostic_kind.def
@@ -74,6 +74,12 @@ CARBON_DIAGNOSTIC_KIND(UnrecognizedCharacters)
 
				 CARBON_DIAGNOSTIC_KIND(UnterminatedString)
			
 
				 CARBON_DIAGNOSTIC_KIND(WrongRealLiteralExponent)
			
 
				 
			
 
				+CARBON_DIAGNOSTIC_KIND(CharLiteralEmpty)
			
 
				+CARBON_DIAGNOSTIC_KIND(CharLiteralInvalidUTF8)
			
 
				+CARBON_DIAGNOSTIC_KIND(CharLiteralOverflow)
			
 
				+CARBON_DIAGNOSTIC_KIND(CharLiteralRaw)
			
 
				+CARBON_DIAGNOSTIC_KIND(CharLiteralUnderflow)
			
 
				+
			
 
				 // ============================================================================
			
 
				 // Parser diagnostics
			
 
				 // ============================================================================
			
--- a/toolchain/lex/BUILD
+++ b/toolchain/lex/BUILD
@@ -129,6 +129,7 @@ cc_library(
 
				     deps = [
			
 
				         ":character_set",
			
 
				         ":helpers",
			
 
				+        ":token_info",
			
 
				         "//common:check",
			
 
				         "//toolchain/diagnostics:diagnostic_emitter",
			
 
				         "@llvm-project//llvm:Support",
			
@@ -189,6 +190,7 @@ cc_library(
 
				         ":numeric_literal",
			
 
				         ":string_literal",
			
 
				         ":token_index",
			
 
				+        ":token_info",
			
 
				         ":token_kind",
			
 
				         ":tokenized_buffer",
			
 
				         "//common:check",
			
@@ -196,6 +198,7 @@ cc_library(
 
				         "//toolchain/base:kind_switch",
			
 
				         "//toolchain/base:shared_value_stores",
			
 
				         "//toolchain/diagnostics:diagnostic_emitter",
			
 
				+        "//toolchain/diagnostics:format_providers",
			
 
				         "//toolchain/source:source_buffer",
			
 
				         "@llvm-project//llvm:Support",
			
 
				     ],
			
--- a/toolchain/lex/lex.cpp
+++ b/toolchain/lex/lex.cpp
@@ -16,11 +16,13 @@
 
				 #include "llvm/Support/Compiler.h"
			
 
				 #include "toolchain/base/kind_switch.h"
			
 
				 #include "toolchain/base/shared_value_stores.h"
			
 
				+#include "toolchain/diagnostics/format_providers.h"
			
 
				 #include "toolchain/lex/character_set.h"
			
 
				 #include "toolchain/lex/helpers.h"
			
 
				 #include "toolchain/lex/numeric_literal.h"
			
 
				 #include "toolchain/lex/string_literal.h"
			
 
				 #include "toolchain/lex/token_index.h"
			
 
				+#include "toolchain/lex/token_info.h"
			
 
				 #include "toolchain/lex/token_kind.h"
			
 
				 #include "toolchain/lex/tokenized_buffer.h"
			
 
				 
			
@@ -1126,6 +1128,15 @@ auto Lexer::LexNumericLiteral(llvm::StringRef source_text, ssize_t& position)
 
				   }
			
 
				 }
			
 
				 
			
 
				+static auto DiagnoseUnterminatedString(
			
 
				+    Diagnostics::Emitter<const char*>& emitter, const StringLiteral& literal,
			
 
				+    bool is_char) -> void {
			
 
				+  CARBON_DIAGNOSTIC(UnterminatedString, Error,
			
 
				+                    "{0:character|string} literal is missing a terminator",
			
 
				+                    Diagnostics::BoolAsSelect);
			
 
				+  emitter.Emit(literal.text().begin(), UnterminatedString, is_char);
			
 
				+}
			
 
				+
			
 
				 auto Lexer::LexStringLiteral(llvm::StringRef source_text, ssize_t& position)
			
 
				     -> LexResult {
			
 
				   std::optional<StringLiteral> literal =
			
@@ -1137,11 +1148,28 @@ auto Lexer::LexStringLiteral(llvm::StringRef source_text, ssize_t& position)
 
				   // Capture the position before we step past the token.
			
 
				   int32_t byte_offset = position;
			
 
				   int string_column = byte_offset - current_line_info().start;
			
 
				-  ssize_t literal_size = literal->text().size();
			
 
				-  position += literal_size;
			
 
				+  position += literal->text().size();
			
 
				+
			
 
				+  // Helper for error paths.
			
 
				+  auto lex_as_error = [&]() {
			
 
				+    return LexTokenWithPayload(TokenKind::Error, literal->text().size(),
			
 
				+                               byte_offset);
			
 
				+  };
			
 
				+
			
 
				+  if (literal->kind() == StringLiteral::Kind::Char) {
			
 
				+    if (!literal->is_terminated()) {
			
 
				+      DiagnoseUnterminatedString(emitter_, *literal, /*is_char=*/true);
			
 
				+      return lex_as_error();
			
 
				+    }
			
 
				+    if (auto value = literal->ComputeCharValue(emitter_)) {
			
 
				+      return LexTokenWithPayload(TokenKind::CharLiteral, value->value,
			
 
				+                                 byte_offset);
			
 
				+    }
			
 
				+    return lex_as_error();
			
 
				+  }
			
 
				 
			
 
				   // Update line and column information.
			
 
				-  if (literal->is_multi_line()) {
			
 
				+  if (literal->kind() != StringLiteral::Kind::SingleLine) {
			
 
				     while (next_line_info().start < position) {
			
 
				       ++line_index_.index;
			
 
				       current_line_info().indent = string_column;
			
@@ -1151,17 +1179,14 @@ auto Lexer::LexStringLiteral(llvm::StringRef source_text, ssize_t& position)
 
				     // last line of the multi-line literal *also* has its indent set.
			
 
				   }
			
 
				 
			
 
				-  if (literal->is_terminated()) {
			
 
				-    auto string_id = buffer_.value_stores_->string_literal_values().Add(
			
 
				-        literal->ComputeValue(buffer_.allocator_, emitter_));
			
 
				-    return LexTokenWithPayload(TokenKind::StringLiteral, string_id.index,
			
 
				-                               byte_offset);
			
 
				-  } else {
			
 
				-    CARBON_DIAGNOSTIC(UnterminatedString, Error,
			
 
				-                      "string is missing a terminator");
			
 
				-    emitter_.Emit(literal->text().begin(), UnterminatedString);
			
 
				-    return LexTokenWithPayload(TokenKind::Error, literal_size, byte_offset);
			
 
				+  if (!literal->is_terminated()) {
			
 
				+    DiagnoseUnterminatedString(emitter_, *literal, /*is_char=*/false);
			
 
				+    return lex_as_error();
			
 
				   }
			
 
				+  auto string_id = buffer_.value_stores_->string_literal_values().Add(
			
 
				+      literal->ComputeStringValue(buffer_.allocator_, emitter_));
			
 
				+  return LexTokenWithPayload(TokenKind::StringLiteral, string_id.index,
			
 
				+                             byte_offset);
			
 
				 }
			
 
				 
			
 
				 auto Lexer::LexOneCharSymbolToken(llvm::StringRef source_text, TokenKind kind,
			
--- a/toolchain/lex/string_literal.cpp
+++ b/toolchain/lex/string_literal.cpp
@@ -24,7 +24,7 @@ static constexpr char DoubleQuotedMultiLineIndicator[] = R"(""")";
 
				 
			
 
				 struct StringLiteral::Introducer {
			
 
				   // The kind of string being introduced.
			
 
				-  MultiLineKind kind;
			
 
				+  Kind kind;
			
 
				   // The terminator for the string, without any '#' suffixes.
			
 
				   llvm::StringRef terminator;
			
 
				   // The length of the introducer, including the file type indicator and
			
@@ -41,17 +41,17 @@ struct StringLiteral::Introducer {
 
				 // recovery purposes, and reject """ literals after lexing.
			
 
				 auto StringLiteral::Introducer::Lex(llvm::StringRef source_text)
			
 
				     -> std::optional<Introducer> {
			
 
				-  MultiLineKind kind = NotMultiLine;
			
 
				+  Kind kind = Kind::SingleLine;
			
 
				   llvm::StringRef indicator;
			
 
				   if (source_text.starts_with(MultiLineIndicator)) {
			
 
				-    kind = MultiLine;
			
 
				+    kind = Kind::MultiLine;
			
 
				     indicator = llvm::StringRef(MultiLineIndicator);
			
 
				   } else if (source_text.starts_with(DoubleQuotedMultiLineIndicator)) {
			
 
				-    kind = MultiLineWithDoubleQuotes;
			
 
				+    kind = Kind::MultiLineWithDoubleQuotes;
			
 
				     indicator = llvm::StringRef(DoubleQuotedMultiLineIndicator);
			
 
				   }
			
 
				 
			
 
				-  if (kind != NotMultiLine) {
			
 
				+  if (kind != Kind::SingleLine) {
			
 
				     // The rest of the line must be a valid file type indicator: a sequence of
			
 
				     // characters containing neither '#' nor '"' followed by a newline.
			
 
				     auto prefix_end = source_text.find_first_of("#\n\"", indicator.size());
			
@@ -64,9 +64,13 @@ auto StringLiteral::Introducer::Lex(llvm::StringRef source_text)
 
				     }
			
 
				   }
			
 
				 
			
 
				-  if (!source_text.empty() && source_text[0] == '"') {
			
 
				+  if (source_text.starts_with('"')) {
			
 
				     return Introducer{
			
 
				-        .kind = NotMultiLine, .terminator = "\"", .prefix_size = 1};
			
 
				+        .kind = Kind::SingleLine, .terminator = "\"", .prefix_size = 1};
			
 
				+  }
			
 
				+
			
 
				+  if (source_text.starts_with('\'')) {
			
 
				+    return Introducer{.kind = Kind::Char, .terminator = "'", .prefix_size = 1};
			
 
				   }
			
 
				 
			
 
				   return std::nullopt;
			
@@ -89,6 +93,12 @@ struct alignas(8) CharSet {
 
				 };
			
 
				 }  // namespace
			
 
				 
			
 
				+// Determine whether this is a multi-line string literal.
			
 
				+static auto IsMultiLine(StringLiteral::Kind kind) -> bool {
			
 
				+  return kind == StringLiteral::Kind::MultiLine ||
			
 
				+         kind == StringLiteral::Kind::MultiLineWithDoubleQuotes;
			
 
				+}
			
 
				+
			
 
				 auto StringLiteral::Lex(llvm::StringRef source_text)
			
 
				     -> std::optional<StringLiteral> {
			
 
				   int64_t cursor = 0;
			
@@ -144,8 +154,8 @@ auto StringLiteral::Lex(llvm::StringRef source_text)
 
				           // If there's either not a character following the escape, or it's a
			
 
				           // single-line string and the escaped character is a newline, we
			
 
				           // should stop here.
			
 
				-          if (cursor >= source_text_size || (introducer->kind == NotMultiLine &&
			
 
				-                                             source_text[cursor] == '\n')) {
			
 
				+          if (cursor >= source_text_size ||
			
 
				+              (!IsMultiLine(introducer->kind) && source_text[cursor] == '\n')) {
			
 
				             llvm::StringRef text = source_text.take_front(cursor);
			
 
				             return StringLiteral(text, text.drop_front(prefix_len),
			
 
				                                  content_needs_validation, hash_level,
			
@@ -155,7 +165,7 @@ auto StringLiteral::Lex(llvm::StringRef source_text)
 
				         }
			
 
				         break;
			
 
				       case '\n':
			
 
				-        if (introducer->kind == NotMultiLine) {
			
 
				+        if (!IsMultiLine(introducer->kind)) {
			
 
				           llvm::StringRef text = source_text.take_front(cursor);
			
 
				           return StringLiteral(text, text.drop_front(prefix_len),
			
 
				                                content_needs_validation, hash_level,
			
@@ -466,21 +476,82 @@ static auto ExpandEscapeSequencesAndRemoveIndent(
 
				   }
			
 
				 }
			
 
				 
			
 
				-auto StringLiteral::ComputeValue(llvm::BumpPtrAllocator& allocator,
			
 
				-                                 DiagnosticEmitter& emitter) const
			
 
				-    -> llvm::StringRef {
			
 
				-  if (!is_terminated_) {
			
 
				-    return "";
			
 
				+auto StringLiteral::ComputeCharValue(Diagnostics::Emitter<const char*>& emitter)
			
 
				+    const -> std::optional<CharLiteralValue> {
			
 
				+  CARBON_DCHECK(kind_ == Kind::Char);
			
 
				+  CARBON_DCHECK(is_terminated_);
			
 
				+
			
 
				+  if (hash_level_ != 0) {
			
 
				+    CARBON_DIAGNOSTIC(CharLiteralRaw, Error,
			
 
				+                      "unexpected `#` before character literal");
			
 
				+    emitter.Emit(text_.begin(), CharLiteralRaw);
			
 
				   }
			
 
				-  if (multi_line_ == MultiLineWithDoubleQuotes) {
			
 
				+
			
 
				+  // Allocate a buffer sized to the content. Note it's possible this could be
			
 
				+  // more efficient/faster with a `ExpandEscapeSequencesAndRemoveIndent`
			
 
				+  // implementation aware of the buffer size, but this is trying to share logic
			
 
				+  // with string expansion.
			
 
				+  llvm::SmallVector<char> buffer;
			
 
				+  buffer.resize_for_overwrite(content_.size());
			
 
				+
			
 
				+  auto result = ExpandEscapeSequencesAndRemoveIndent(
			
 
				+      emitter, content_, 0, /*indent=*/llvm::StringRef(), buffer.data());
			
 
				+  CARBON_CHECK(result.size() <= content_.size(),
			
 
				+               "Content grew from {0} to {1}: `{2}`", content_.size(),
			
 
				+               result.size(), content_);
			
 
				+
			
 
				+  llvm::UTF32 target[1];
			
 
				+  const auto* source_cursor =
			
 
				+      reinterpret_cast<const llvm::UTF8*>(result.begin());
			
 
				+  llvm::UTF32* target_cursor = target;
			
 
				+  llvm::ConversionResult conv_result = llvm::ConvertUTF8toUTF32(
			
 
				+      &source_cursor, reinterpret_cast<const llvm::UTF8*>(result.end()),
			
 
				+      &target_cursor, std::end(target), llvm::strictConversion);
			
 
				+
			
 
				+  switch (conv_result) {
			
 
				+    case llvm::conversionOK: {
			
 
				+      if (target_cursor == target) {
			
 
				+        CARBON_DIAGNOSTIC(CharLiteralEmpty, Error, "empty character literal");
			
 
				+        emitter.Emit(text_.begin(), CharLiteralEmpty);
			
 
				+        return std::nullopt;
			
 
				+      }
			
 
				+      return CharLiteralValue{.value = static_cast<int32_t>(target[0])};
			
 
				+    }
			
 
				+    case llvm::sourceExhausted: {
			
 
				+      CARBON_DIAGNOSTIC(CharLiteralUnderflow, Error, "incomplete UTF-8");
			
 
				+      emitter.Emit(text_.begin(), CharLiteralUnderflow);
			
 
				+      return std::nullopt;
			
 
				+    }
			
 
				+    case llvm::targetExhausted: {
			
 
				+      CARBON_DIAGNOSTIC(CharLiteralOverflow, Error, "too many characters");
			
 
				+      emitter.Emit(text_.begin(), CharLiteralOverflow);
			
 
				+      return std::nullopt;
			
 
				+    }
			
 
				+    case llvm::sourceIllegal: {
			
 
				+      CARBON_DIAGNOSTIC(CharLiteralInvalidUTF8, Error,
			
 
				+                        "invalid UTF-8 character");
			
 
				+      emitter.Emit(text_.begin(), CharLiteralInvalidUTF8);
			
 
				+      return std::nullopt;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+auto StringLiteral::ComputeStringValue(llvm::BumpPtrAllocator& allocator,
			
 
				+                                       DiagnosticEmitter& emitter) const
			
 
				+    -> llvm::StringRef {
			
 
				+  CARBON_DCHECK(kind_ != Kind::Char);
			
 
				+  CARBON_DCHECK(is_terminated_);
			
 
				+
			
 
				+  if (kind_ == Kind::MultiLineWithDoubleQuotes) {
			
 
				     CARBON_DIAGNOSTIC(
			
 
				         MultiLineStringWithDoubleQuotes, Error,
			
 
				         "use `'''` delimiters for a multi-line string literal, not `\"\"\"`");
			
 
				     emitter.Emit(text_.begin(), MultiLineStringWithDoubleQuotes);
			
 
				   }
			
 
				-  llvm::StringRef indent =
			
 
				-      multi_line_ ? CheckIndent(emitter, text_, content_) : llvm::StringRef();
			
 
				-  if (!content_needs_validation_ && (!multi_line_ || indent.empty())) {
			
 
				+  llvm::StringRef indent = IsMultiLine(kind_)
			
 
				+                               ? CheckIndent(emitter, text_, content_)
			
 
				+                               : llvm::StringRef();
			
 
				+  if (!content_needs_validation_ && (!IsMultiLine(kind_) || indent.empty())) {
			
 
				     return content_;
			
 
				   }
			
 
				 
			
--- a/toolchain/lex/string_literal.h
+++ b/toolchain/lex/string_literal.h
@@ -10,11 +10,27 @@
 
				 #include "llvm/ADT/StringRef.h"
			
 
				 #include "llvm/Support/Allocator.h"
			
 
				 #include "toolchain/diagnostics/diagnostic_emitter.h"
			
 
				+#include "toolchain/lex/token_info.h"
			
 
				 
			
 
				 namespace Carbon::Lex {
			
 
				 
			
 
				 class StringLiteral {
			
 
				  public:
			
 
				+  // A string literal's kind.
			
 
				+  enum class Kind : int8_t {
			
 
				+    // A character literal is still handled through string literal lexing.
			
 
				+    Char,
			
 
				+
			
 
				+    // A single-line string, `"<content>"`.
			
 
				+    SingleLine,
			
 
				+
			
 
				+    // A multi-line string, `'''<content>'''`.
			
 
				+    MultiLine,
			
 
				+
			
 
				+    // An incorrectly double-quoted multi-line string, `"""<content>"""`.
			
 
				+    MultiLineWithDoubleQuotes,
			
 
				+  };
			
 
				+
			
 
				   // Extract a string literal token from the given text, if it has a suitable
			
 
				   // form. Returning std::nullopt indicates no string literal was found;
			
 
				   // returning an invalid literal indicates a string prefix was found, but it's
			
@@ -22,58 +38,64 @@ class StringLiteral {
 
				   // construction.
			
 
				   static auto Lex(llvm::StringRef source_text) -> std::optional<StringLiteral>;
			
 
				 
			
 
				+  // Expand any escape sequences and compute the resulting character. This
			
 
				+  // handles error recovery internally, but can return nullopt for an invalid
			
 
				+  // character.
			
 
				+  auto ComputeCharValue(Diagnostics::Emitter<const char*>& emitter) const
			
 
				+      -> std::optional<CharLiteralValue>;
			
 
				+
			
 
				   // Expand any escape sequences in the given string literal and compute the
			
 
				   // resulting value. This handles error recovery internally and cannot fail.
			
 
				   //
			
 
				   // When content_needs_validation_ is false and the string has no indent to
			
 
				   // deal with, this can return the content directly. Otherwise, the allocator
			
 
				   // will be used for the StringRef.
			
 
				-  auto ComputeValue(llvm::BumpPtrAllocator& allocator,
			
 
				-                    Diagnostics::Emitter<const char*>& emitter) const
			
 
				+  auto ComputeStringValue(llvm::BumpPtrAllocator& allocator,
			
 
				+                          Diagnostics::Emitter<const char*>& emitter) const
			
 
				       -> llvm::StringRef;
			
 
				 
			
 
				   // Get the text corresponding to this literal.
			
 
				   auto text() const -> llvm::StringRef { return text_; }
			
 
				 
			
 
				   // Determine whether this is a multi-line string literal.
			
 
				-  auto is_multi_line() const -> bool { return multi_line_; }
			
 
				+  auto kind() const -> Kind { return kind_; }
			
 
				 
			
 
				   // Returns true if the string has a valid terminator.
			
 
				   auto is_terminated() const -> bool { return is_terminated_; }
			
 
				 
			
 
				  private:
			
 
				-  enum MultiLineKind : int8_t {
			
 
				-    NotMultiLine,
			
 
				-    MultiLine,
			
 
				-    MultiLineWithDoubleQuotes
			
 
				-  };
			
 
				-
			
 
				   struct Introducer;
			
 
				 
			
 
				   explicit StringLiteral(llvm::StringRef text, llvm::StringRef content,
			
 
				                          bool content_needs_validation, int hash_level,
			
 
				-                         MultiLineKind multi_line, bool is_terminated)
			
 
				+                         Kind kind, bool is_terminated)
			
 
				       : text_(text),
			
 
				         content_(content),
			
 
				         content_needs_validation_(content_needs_validation),
			
 
				         hash_level_(hash_level),
			
 
				-        multi_line_(multi_line),
			
 
				+        kind_(kind),
			
 
				         is_terminated_(is_terminated) {}
			
 
				 
			
 
				   // The complete text of the string literal.
			
 
				   llvm::StringRef text_;
			
 
				+
			
 
				   // The content of the literal. For a multi-line literal, this begins
			
 
				   // immediately after the newline following the file type indicator, and ends
			
 
				   // at the start of the closing `"""`. Leading whitespace is not removed from
			
 
				   // either end.
			
 
				   llvm::StringRef content_;
			
 
				+
			
 
				   // Whether content needs validation, in particular due to either an escape
			
 
				   // (which needs modifications) or a tab character (which may cause a warning).
			
 
				   bool content_needs_validation_;
			
 
				+
			
 
				   // The number of `#`s preceding the opening `"` or `"""`.
			
 
				   int hash_level_;
			
 
				-  // Whether this was a multi-line string literal.
			
 
				-  MultiLineKind multi_line_;
			
 
				+
			
 
				+  // Whether this was a single-line string literal, multi-line string literal,
			
 
				+  // or a char literal.
			
 
				+  Kind kind_;
			
 
				+
			
 
				   // Whether the literal is valid, or should only be used for errors.
			
 
				   bool is_terminated_;
			
 
				 };
			
--- a/toolchain/lex/string_literal_benchmark.cpp
+++ b/toolchain/lex/string_literal_benchmark.cpp
@@ -92,54 +92,56 @@ static void BM_SimpleStringValue(benchmark::State& state, int size,
 
				   std::string x(introducer);
			
 
				   x.append(size, 'a');
			
 
				   if (add_escape) {
			
 
				-    // Adds a basic escape that forces ComputeValue to generate a new string.
			
 
				+    // Adds a basic escape that forces ComputeStringValue to generate a new
			
 
				+    // string.
			
 
				     x.append("\\\\");
			
 
				   }
			
 
				   x.append(terminator);
			
 
				   for (auto _ : state) {
			
 
				-    StringLiteral::Lex(x)->ComputeValue(
			
 
				+    StringLiteral::Lex(x)->ComputeStringValue(
			
 
				         allocator, Diagnostics::NullEmitter<const char*>());
			
 
				   }
			
 
				 }
			
 
				 
			
 
				-static void BM_ComputeValue_NoGenerate_Short(benchmark::State& state) {
			
 
				+static void BM_ComputeStringValue_NoGenerate_Short(benchmark::State& state) {
			
 
				   BM_SimpleStringValue(state, 10, "\"", /*add_escape=*/false, "\"");
			
 
				 }
			
 
				 
			
 
				-static void BM_ComputeValue_NoGenerate_Long(benchmark::State& state) {
			
 
				+static void BM_ComputeStringValue_NoGenerate_Long(benchmark::State& state) {
			
 
				   BM_SimpleStringValue(state, 10000, "\"", /*add_escape=*/false, "\"");
			
 
				 }
			
 
				 
			
 
				-static void BM_ComputeValue_WillGenerate_Short(benchmark::State& state) {
			
 
				+static void BM_ComputeStringValue_WillGenerate_Short(benchmark::State& state) {
			
 
				   BM_SimpleStringValue(state, 10, "\"", /*add_escape=*/true, "\"");
			
 
				 }
			
 
				 
			
 
				-static void BM_ComputeValue_WillGenerate_Long(benchmark::State& state) {
			
 
				+static void BM_ComputeStringValue_WillGenerate_Long(benchmark::State& state) {
			
 
				   BM_SimpleStringValue(state, 10000, "\"", /*add_escape=*/true, "\"");
			
 
				 }
			
 
				 
			
 
				-static void BM_ComputeValue_WillGenerate_Multiline(benchmark::State& state) {
			
 
				+static void BM_ComputeStringValue_WillGenerate_Multiline(
			
 
				+    benchmark::State& state) {
			
 
				   BM_SimpleStringValue(state, 10000, "'''\n", /*add_escape=*/true, "\n'''");
			
 
				 }
			
 
				 
			
 
				-static void BM_ComputeValue_WillGenerate_MultilineDoubleQuote(
			
 
				+static void BM_ComputeStringValue_WillGenerate_MultilineDoubleQuote(
			
 
				     benchmark::State& state) {
			
 
				   BM_SimpleStringValue(state, 10000, "\"\"\"\n", /*add_escape=*/true,
			
 
				                        "\n\"\"\"");
			
 
				 }
			
 
				 
			
 
				-static void BM_ComputeValue_WillGenerate_Raw(benchmark::State& state) {
			
 
				+static void BM_ComputeStringValue_WillGenerate_Raw(benchmark::State& state) {
			
 
				   BM_SimpleStringValue(state, 10000, "#\"", /*add_escape=*/true, "\"#");
			
 
				 }
			
 
				 
			
 
				-BENCHMARK(BM_ComputeValue_NoGenerate_Short);
			
 
				-BENCHMARK(BM_ComputeValue_NoGenerate_Long);
			
 
				+BENCHMARK(BM_ComputeStringValue_NoGenerate_Short);
			
 
				+BENCHMARK(BM_ComputeStringValue_NoGenerate_Long);
			
 
				 
			
 
				-BENCHMARK(BM_ComputeValue_WillGenerate_Short);
			
 
				-BENCHMARK(BM_ComputeValue_WillGenerate_Long);
			
 
				-BENCHMARK(BM_ComputeValue_WillGenerate_Multiline);
			
 
				-BENCHMARK(BM_ComputeValue_WillGenerate_MultilineDoubleQuote);
			
 
				-BENCHMARK(BM_ComputeValue_WillGenerate_Raw);
			
 
				+BENCHMARK(BM_ComputeStringValue_WillGenerate_Short);
			
 
				+BENCHMARK(BM_ComputeStringValue_WillGenerate_Long);
			
 
				+BENCHMARK(BM_ComputeStringValue_WillGenerate_Multiline);
			
 
				+BENCHMARK(BM_ComputeStringValue_WillGenerate_MultilineDoubleQuote);
			
 
				+BENCHMARK(BM_ComputeStringValue_WillGenerate_Raw);
			
 
				 
			
 
				 }  // namespace
			
 
				 }  // namespace Carbon::Lex
			
--- a/toolchain/lex/string_literal_fuzzer.cpp
+++ b/toolchain/lex/string_literal_fuzzer.cpp
@@ -14,29 +14,46 @@ namespace Carbon::Testing {
 
				 
			
 
				 // NOLINTNEXTLINE: Match the documented fuzzer entry point declaration style.
			
 
				 extern "C" int LLVMFuzzerTestOneInput(const unsigned char* data, size_t size) {
			
 
				-  auto token = Lex::StringLiteral::Lex(
			
 
				+  auto literal = Lex::StringLiteral::Lex(
			
 
				       llvm::StringRef(reinterpret_cast<const char*>(data), size));
			
 
				-  if (!token) {
			
 
				+  if (!literal) {
			
 
				     // Lexically not a string literal.
			
 
				     return 0;
			
 
				   }
			
 
				 
			
 
				-  if (!token->is_terminated()) {
			
 
				+  if (!literal->is_terminated()) {
			
 
				     // Found errors while parsing.
			
 
				     return 0;
			
 
				   }
			
 
				 
			
 
				-  fprintf(stderr, "valid: %d\n", token->is_terminated());
			
 
				-  fprintf(stderr, "size: %lu\n", token->text().size());
			
 
				-  fprintf(stderr, "text: %s\n", token->text().str().c_str());
			
 
				+  fprintf(stderr, "valid: %d\n", literal->is_terminated());
			
 
				+  fprintf(stderr, "size: %lu\n", literal->text().size());
			
 
				+  fprintf(stderr, "text: %s\n", literal->text().str().c_str());
			
 
				 
			
 
				   // Check multiline flag was computed correctly.
			
 
				-  CARBON_CHECK(token->is_multi_line() == token->text().contains('\n'));
			
 
				+  switch (literal->kind()) {
			
 
				+    case Lex::StringLiteral::Kind::Char:
			
 
				+      break;
			
 
				+
			
 
				+    case Lex::StringLiteral::Kind::SingleLine:
			
 
				+      CARBON_CHECK(!literal->text().contains('\n'));
			
 
				+      break;
			
 
				+
			
 
				+    case Lex::StringLiteral::Kind::MultiLine:
			
 
				+    case Lex::StringLiteral::Kind::MultiLineWithDoubleQuotes:
			
 
				+      CARBON_CHECK(literal->text().contains('\n'));
			
 
				+      break;
			
 
				+  }
			
 
				 
			
 
				-  llvm::BumpPtrAllocator allocator;
			
 
				-  volatile auto value =
			
 
				-      token->ComputeValue(allocator, Diagnostics::NullEmitter<const char*>());
			
 
				-  (void)value;
			
 
				+  auto* null_emitter = &Diagnostics::NullEmitter<const char*>();
			
 
				+  if (literal->kind() == Lex::StringLiteral::Kind::Char) {
			
 
				+    volatile auto value = literal->ComputeCharValue(*null_emitter);
			
 
				+    (void)value;
			
 
				+  } else {
			
 
				+    llvm::BumpPtrAllocator allocator;
			
 
				+    volatile auto value = literal->ComputeStringValue(allocator, *null_emitter);
			
 
				+    (void)value;
			
 
				+  }
			
 
				 
			
 
				   return 0;
			
 
				 }
			
--- a/toolchain/lex/string_literal_test.cpp
+++ b/toolchain/lex/string_literal_test.cpp
@@ -32,7 +32,7 @@ class StringLiteralTest : public ::testing::Test {
 
				   auto Parse(llvm::StringRef text) -> llvm::StringRef {
			
 
				     StringLiteral token = Lex(text);
			
 
				     Testing::SingleTokenDiagnosticEmitter emitter(&error_tracker_, text);
			
 
				-    return token.ComputeValue(allocator_, emitter);
			
 
				+    return token.ComputeStringValue(allocator_, emitter);
			
 
				   }
			
 
				 
			
 
				   llvm::BumpPtrAllocator allocator_;
			
--- a/toolchain/lex/testdata/char_literals.carbon
+++ b/toolchain/lex/testdata/char_literals.carbon
@@ -0,0 +1,63 @@
 
				+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
			
 
				+// Exceptions. See /LICENSE for license information.
			
 
				+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				+//
			
 
				+// AUTOUPDATE
			
 
				+// TIP: To test this file alone, run:
			
 
				+// TIP:   bazel test //toolchain/testing:file_test --test_arg=--file_tests=toolchain/lex/testdata/char_literals.carbon
			
 
				+// TIP: To dump output, run:
			
 
				+// TIP:   bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/lex/testdata/char_literals.carbon
			
 
				+
			
 
				+// --- valid.carbon
			
 
				+// CHECK:STDOUT: - filename: valid.carbon
			
 
				+// CHECK:STDOUT:   tokens:
			
 
				+
			
 
				+'a'
			
 
				+// CHECK:STDOUT:   - { index: 1, kind: "CharLiteral", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'a'", has_leading_space: true }
			
 
				+'\n'
			
 
				+// CHECK:STDOUT:   - { index: 2, kind: "CharLiteral", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\\n'", has_leading_space: true }
			
 
				+'\x7F'
			
 
				+// CHECK:STDOUT:   - { index: 3, kind: "CharLiteral", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\\x7F'", has_leading_space: true }
			
 
				+'\u{123}'
			
 
				+// CHECK:STDOUT:   - { index: 4, kind: "CharLiteral", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\\u{123}'", has_leading_space: true }
			
 
				+'\xC3\xA9'
			
 
				+// CHECK:STDOUT:   - { index: 5, kind: "CharLiteral", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\\xC3\\xA9'", has_leading_space: true }
			
 
				+
			
 
				+// --- fail_invalid.carbon
			
 
				+// CHECK:STDOUT: - filename: fail_invalid.carbon
			
 
				+// CHECK:STDOUT:   tokens:
			
 
				+
			
 
				+// CHECK:STDERR: fail_invalid.carbon:[[@LINE+4]]:1: error: empty character literal [CharLiteralEmpty]
			
 
				+// CHECK:STDERR: ''
			
 
				+// CHECK:STDERR: ^
			
 
				+// CHECK:STDERR:
			
 
				+''
			
 
				+// CHECK:STDOUT:   - { index: 1, kind:       "Error", line: {{ *}}[[@LINE-1]], column:   1, indent: 1, spelling: "''", has_leading_space: true }
			
 
				+
			
 
				+// CHECK:STDERR: fail_invalid.carbon:[[@LINE+4]]:1: error: too many characters [CharLiteralOverflow]
			
 
				+// CHECK:STDERR: 'abcde'
			
 
				+// CHECK:STDERR: ^
			
 
				+// CHECK:STDERR:
			
 
				+'abcde'
			
 
				+// CHECK:STDOUT:   - { index: 2, kind:       "Error", line: {{ *}}[[@LINE-1]], column:   1, indent: 1, spelling: "'abcde'", has_leading_space: true }
			
 
				+
			
 
				+// CHECK:STDERR: fail_invalid.carbon:[[@LINE+4]]:1: error: incomplete UTF-8 [CharLiteralUnderflow]
			
 
				+// CHECK:STDERR: '\xC3'
			
 
				+// CHECK:STDERR: ^
			
 
				+// CHECK:STDERR:
			
 
				+'\xC3'
			
 
				+// CHECK:STDOUT:   - { index: 3, kind:       "Error", line: {{ *}}[[@LINE-1]], column:   1, indent: 1, spelling: "'\\xC3'", has_leading_space: true }
			
 
				+
			
 
				+// CHECK:STDERR: fail_invalid.carbon:[[@LINE+4]]:1: error: invalid UTF-8 character [CharLiteralInvalidUTF8]
			
 
				+// CHECK:STDERR: '\xC3\xFF'
			
 
				+// CHECK:STDERR: ^
			
 
				+// CHECK:STDERR:
			
 
				+'\xC3\xFF'
			
 
				+// CHECK:STDOUT:   - { index: 4, kind:       "Error", line: {{ *}}[[@LINE-1]], column:   1, indent: 1, spelling: "'\\xC3\\xFF'", has_leading_space: true }
			
 
				+
			
 
				+// CHECK:STDERR: fail_invalid.carbon:[[@LINE+4]]:1: error: unexpected `#` before character literal [CharLiteralRaw]
			
 
				+// CHECK:STDERR: #'a'#
			
 
				+// CHECK:STDERR: ^
			
 
				+// CHECK:STDERR:
			
 
				+#'a'#
			
 
				+// CHECK:STDOUT:   - { index: 5, kind: "CharLiteral", line: {{ *}}[[@LINE-1]], column:   1, indent: 1, spelling: "#'a'#", has_leading_space: true }
			
--- a/toolchain/lex/testdata/string_literals.carbon
+++ b/toolchain/lex/testdata/string_literals.carbon
@@ -23,7 +23,7 @@
 
				 // CHECK:STDOUT: - filename: fail_unterminated.carbon
			
 
				 // CHECK:STDOUT:   tokens:
			
 
				 
			
 
				-// CHECK:STDERR: fail_unterminated.carbon:[[@LINE+4]]:1: error: string is missing a terminator [UnterminatedString]
			
 
				+// CHECK:STDERR: fail_unterminated.carbon:[[@LINE+4]]:1: error: string literal is missing a terminator [UnterminatedString]
			
 
				 // CHECK:STDERR: "s
			
 
				 // CHECK:STDERR: ^
			
 
				 // CHECK:STDERR:
			
--- a/toolchain/lex/token_info.h
+++ b/toolchain/lex/token_info.h
@@ -13,6 +13,14 @@
 
				 
			
 
				 namespace Carbon::Lex {
			
 
				 
			
 
				+// A character as a unicode code point.
			
 
				+//
			
 
				+// Unicode requires 21 bits, which should fit inside `TokenInfo::PayloadBits`,
			
 
				+// so we store the value directly.
			
 
				+struct CharLiteralValue {
			
 
				+  int32_t value;
			
 
				+};
			
 
				+
			
 
				 // Storage for the information about a specific token, as an implementation
			
 
				 // detail of `TokenizedBuffer`.
			
 
				 //
			
@@ -69,6 +77,11 @@ class TokenInfo {
 
				     return StringLiteralValueId(token_payload_);
			
 
				   }
			
 
				 
			
 
				+  auto char_literal() const -> CharLiteralValue {
			
 
				+    CARBON_DCHECK(kind() == TokenKind::CharLiteral);
			
 
				+    return CharLiteralValue(token_payload_);
			
 
				+  }
			
 
				+
			
 
				   auto int_id() const -> IntId {
			
 
				     CARBON_DCHECK(kind() == TokenKind::IntLiteral ||
			
 
				                   kind() == TokenKind::IntTypeLiteral ||
			
--- a/toolchain/lex/token_kind.def
+++ b/toolchain/lex/token_kind.def
@@ -226,6 +226,7 @@ CARBON_TOKEN(Identifier)
 
				 CARBON_TOKEN(IntLiteral)
			
 
				 CARBON_TOKEN(RealLiteral)
			
 
				 CARBON_TOKEN(StringLiteral)
			
 
				+CARBON_TOKEN(CharLiteral)
			
 
				 CARBON_TOKEN(IntTypeLiteral)
			
 
				 CARBON_TOKEN(UnsignedIntTypeLiteral)
			
 
				 CARBON_TOKEN(FloatTypeLiteral)
			
--- a/toolchain/lex/tokenized_buffer.cpp
+++ b/toolchain/lex/tokenized_buffer.cpp
@@ -82,7 +82,8 @@ auto TokenizedBuffer::GetTokenText(TokenIndex token) const -> llvm::StringRef {
 
				 
			
 
				   // Refer back to the source text to find the original spelling, including
			
 
				   // escape sequences etc.
			
 
				-  if (token_info.kind() == TokenKind::StringLiteral) {
			
 
				+  if (token_info.kind() == TokenKind::StringLiteral ||
			
 
				+      token_info.kind() == TokenKind::CharLiteral) {
			
 
				     std::optional<StringLiteral> relexed_token =
			
 
				         StringLiteral::Lex(source_->text().substr(token_info.byte_offset()));
			
 
				     CARBON_CHECK(relexed_token, "Could not reform string literal token.");
			
@@ -137,6 +138,14 @@ auto TokenizedBuffer::GetStringLiteralValue(TokenIndex token) const
 
				   return token_info.string_literal_id();
			
 
				 }
			
 
				 
			
 
				+auto TokenizedBuffer::GetCharLiteralValue(TokenIndex token) const
			
 
				+    -> CharLiteralValue {
			
 
				+  const auto& token_info = token_infos_.Get(token);
			
 
				+  CARBON_CHECK(token_info.kind() == TokenKind::CharLiteral, "{0}",
			
 
				+               token_info.kind());
			
 
				+  return token_info.char_literal();
			
 
				+}
			
 
				+
			
 
				 auto TokenizedBuffer::GetTypeLiteralSize(TokenIndex token) const -> IntId {
			
 
				   const auto& token_info = token_infos_.Get(token);
			
 
				   CARBON_CHECK(token_info.kind().is_sized_type_literal(), "{0}",
			
--- a/toolchain/lex/tokenized_buffer.h
+++ b/toolchain/lex/tokenized_buffer.h
@@ -138,16 +138,19 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
 
				   // an `Identifier`.
			
 
				   auto GetIdentifier(TokenIndex token) const -> IdentifierId;
			
 
				 
			
 
				-  // Returns the value of an `IntLiteral()` token.
			
 
				+  // Returns the value of an `IntLiteral` token.
			
 
				   auto GetIntLiteral(TokenIndex token) const -> IntId;
			
 
				 
			
 
				-  // Returns the value of an `RealLiteral()` token.
			
 
				+  // Returns the value of an `RealLiteral` token.
			
 
				   auto GetRealLiteral(TokenIndex token) const -> RealId;
			
 
				 
			
 
				-  // Returns the value of a `StringLiteral()` token.
			
 
				+  // Returns the value of a `StringLiteral` token.
			
 
				   auto GetStringLiteralValue(TokenIndex token) const -> StringLiteralValueId;
			
 
				 
			
 
				-  // Returns the size specified in a `*TypeLiteral()` token.
			
 
				+  // Returns the value of a `CharLiteral` token.
			
 
				+  auto GetCharLiteralValue(TokenIndex token) const -> CharLiteralValue;
			
 
				+
			
 
				+  // Returns the size specified in a `*TypeLiteral` token.
			
 
				   auto GetTypeLiteralSize(TokenIndex token) const -> IntId;
			
 
				 
			
 
				   // Returns the closing token matched with the given opening token.