8 月之前 · ad84e71acd
--- a/.gitattributes
+++ b/.gitattributes
@@ -5,7 +5,3 @@
 
				 # This tells Github to detect files having the extension `.def` as `C++` files, which
			
 
				 # ensures that these files get syntax highlighted properly.
			
 
				 *.def linguist-language=C++
			
 
				-
			
 
				-# This tells Git to treat lexer tests as text when producing diffs, even if
			
 
				-# they contain non-printable characters.
			
 
				-toolchain/lex/testdata/*.carbon diff
			
--- a/testing/file_test/test_file.cpp
+++ b/testing/file_test/test_file.cpp
@@ -145,7 +145,8 @@ static auto AutoFillDidOpenParams(llvm::json::Object& params,
 
				   return Success();
			
 
				 }
			
 
				 
			
 
				-// Reformats `[[@LSP:` and similar keyword as an LSP call with headers.
			
 
				+// Reformats `[[@LSP:` and similar keyword as an LSP call with headers. Returns
			
 
				+// the position to start a find for the next keyword.
			
 
				 static auto ReplaceLspKeywordAt(std::string& content, size_t keyword_pos,
			
 
				                                 int& lsp_call_id,
			
 
				                                 llvm::ArrayRef<TestFile::Split> splits)
			
@@ -154,7 +155,7 @@ static auto ReplaceLspKeywordAt(std::string& content, size_t keyword_pos,
 
				       llvm::StringRef(content).substr(keyword_pos);
			
 
				 
			
 
				   auto [keyword, body_start] = content_at_keyword.split(":");
			
 
				-  if (body_start.empty()) {
			
 
				+  if (keyword.size() == content_at_keyword.size()) {
			
 
				     return ErrorBuilder() << "Missing `:` for `"
			
 
				                           << content_at_keyword.take_front(10) << "`";
			
 
				   }
			
@@ -179,12 +180,11 @@ static auto ReplaceLspKeywordAt(std::string& content, size_t keyword_pos,
 
				   }
			
 
				 
			
 
				   static constexpr llvm::StringLiteral LspEnd = "]]";
			
 
				-  auto body_end = body_start.find(LspEnd);
			
 
				-  if (body_end == std::string::npos) {
			
 
				+  auto [body, rest] = body_start.split("]]");
			
 
				+  if (body.size() == body_start.size()) {
			
 
				     return ErrorBuilder() << "Missing `" << LspEnd << "` after `" << keyword
			
 
				                           << "`";
			
 
				   }
			
 
				-  llvm::StringRef body = body_start.take_front(body_end);
			
 
				   auto [method_or_id, extra_content] = body.split(":");
			
 
				 
			
 
				   llvm::json::Value parsed_extra_content = nullptr;
			
@@ -231,12 +231,33 @@ static auto ReplaceLspKeywordAt(std::string& content, size_t keyword_pos,
 
				   auto json_with_header = llvm::formatv("Content-Length: {0}\n\n{1}\n",
			
 
				                                         content_length, buffer.TakeStr())
			
 
				                               .str();
			
 
				-  int keyword_len =
			
 
				-      (body_start.data() + body_end + LspEnd.size()) - keyword.data();
			
 
				+  size_t keyword_len = rest.data() - keyword.data();
			
 
				   content.replace(keyword_pos, keyword_len, json_with_header);
			
 
				   return keyword_pos + json_with_header.size();
			
 
				 }
			
 
				 
			
 
				+// Replaces `[[@0xAB]]` with the raw byte with value 0xAB. Returns the position
			
 
				+// to start a find for the next keyword.
			
 
				+static auto ReplaceRawByteKeywordAt(std::string& content, size_t keyword_pos)
			
 
				+    -> ErrorOr<size_t> {
			
 
				+  llvm::StringRef content_at_keyword =
			
 
				+      llvm::StringRef(content).substr(keyword_pos);
			
 
				+  auto [keyword, rest] = content_at_keyword.split("]]");
			
 
				+  if (keyword.size() == content_at_keyword.size()) {
			
 
				+    return ErrorBuilder() << "Missing `]]` after " << keyword.take_front(10)
			
 
				+                          << "`";
			
 
				+  }
			
 
				+
			
 
				+  unsigned char byte_value;
			
 
				+  if (keyword.substr(std::size("[[@0x") - 1).getAsInteger(16, byte_value)) {
			
 
				+    return ErrorBuilder() << "Invalid raw byte specifier `"
			
 
				+                          << keyword.take_front(10) << "`";
			
 
				+  }
			
 
				+
			
 
				+  content.replace(keyword_pos, keyword.size() + 2, 1, byte_value);
			
 
				+  return keyword_pos + 1;
			
 
				+}
			
 
				+
			
 
				 // Replaces the keyword at the given position. Returns the position to start a
			
 
				 // find for the next keyword.
			
 
				 static auto ReplaceContentKeywordAt(std::string& content, size_t keyword_pos,
			
@@ -263,14 +284,18 @@ static auto ReplaceContentKeywordAt(std::string& content, size_t keyword_pos,
 
				     return ReplaceLspKeywordAt(content, keyword_pos, lsp_call_id, splits);
			
 
				   }
			
 
				 
			
 
				+  if (keyword.starts_with("[[@0x")) {
			
 
				+    return ReplaceRawByteKeywordAt(content, keyword_pos);
			
 
				+  }
			
 
				+
			
 
				   return ErrorBuilder() << "Unexpected use of `[[@` at `"
			
 
				                         << keyword.substr(0, 5) << "`";
			
 
				 }
			
 
				 
			
 
				 // Replaces the content keywords.
			
 
				 //
			
 
				-// TEST_NAME is the only content keyword at present, but we do validate that
			
 
				-// other names are reserved.
			
 
				+// This handles content keywords such as [[@TEST_NAME]] and [[@LSP*]]. Unknown
			
 
				+// content keywords are diagnosed.
			
 
				 static auto ReplaceContentKeywords(llvm::StringRef filename,
			
 
				                                    std::string& content,
			
 
				                                    llvm::ArrayRef<TestFile::Split> splits)
			
--- a/testing/file_test/testdata/replace_content.carbon
+++ b/testing/file_test/testdata/replace_content.carbon
@@ -11,3 +11,6 @@
 
				 
			
 
				 library "[[@TEST_NAME]]";
			
 
				 // CHECK:STDOUT: replace_content.carbon:[[@LINE-1]]: library "replace_content";
			
 
				+
			
 
				+var x: str = "[[@0x48]][[@0x65]][[@0x6C]][[@0x6C]][[@0x6F]]";
			
 
				+// CHECK:STDOUT: replace_content.carbon:[[@LINE-1]]: var x: str = "Hello";
			
--- a/toolchain/diagnostics/diagnostic.cpp
+++ b/toolchain/diagnostics/diagnostic.cpp
@@ -7,6 +7,8 @@
 
				 #include <algorithm>
			
 
				 #include <cstdint>
			
 
				 
			
 
				+#include "llvm/ADT/Sequence.h"
			
 
				+
			
 
				 namespace Carbon::Diagnostics {
			
 
				 
			
 
				 auto Loc::FormatLocation(llvm::raw_ostream& out) const -> void {
			
@@ -38,22 +40,51 @@ auto Loc::FormatSnippet(llvm::raw_ostream& out, int indent) const -> void {
 
				   if (column_number == -1) {
			
 
				     return;
			
 
				   }
			
 
				-
			
 
				   // column_number is 1-based.
			
 
				-  int32_t column = column_number - 1;
			
 
				+  const int caret_byte_offset = column_number - 1;
			
 
				 
			
 
				   out.indent(indent);
			
 
				-  out << line << "\n";
			
 
				 
			
 
				-  out.indent(indent + column);
			
 
				+  int column = 0;
			
 
				+  int caret_column = 0;
			
 
				+  int underline_end_column = 0;
			
 
				+
			
 
				+  int byte_offset = 0;
			
 
				+  for (char c : line) {
			
 
				+    // TODO: Handle tab characters.
			
 
				+    // TODO: Print Unicode characters directly, and use
			
 
				+    // llvm::sys::unicode::getColumnWidth to determine their width.
			
 
				+    if (std::isprint(static_cast<unsigned char>(c))) {
			
 
				+      out << c;
			
 
				+      ++column;
			
 
				+    } else {
			
 
				+      // TODO: Consider using ANSI colors to distinguish this from the program
			
 
				+      // text.
			
 
				+      int pos = out.tell();
			
 
				+      out << '<';
			
 
				+      llvm::write_hex(out, static_cast<unsigned char>(c),
			
 
				+                      llvm::HexPrintStyle::Upper, 2);
			
 
				+      out << '>';
			
 
				+      column += out.tell() - pos;
			
 
				+    }
			
 
				+
			
 
				+    ++byte_offset;
			
 
				+    if (byte_offset <= caret_byte_offset) {
			
 
				+      caret_column = column;
			
 
				+    }
			
 
				+    if (byte_offset <= caret_byte_offset + length) {
			
 
				+      underline_end_column = column;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  out << "\n";
			
 
				+
			
 
				+  out.indent(indent + caret_column);
			
 
				   out << "^";
			
 
				-  // We want to ensure that we don't underline past the end of the line in
			
 
				-  // case of a multiline token.
			
 
				-  // TODO: Revisit this once we can reference multiple ranges on multiple
			
 
				-  // lines in a single diagnostic message.
			
 
				-  int underline_length =
			
 
				-      std::min(length, static_cast<int32_t>(line.size()) - column);
			
 
				-  for (int i = 1; i < underline_length; ++i) {
			
 
				+  // TODO: Revisit this once we can reference multiple ranges in a single
			
 
				+  // diagnostic message.
			
 
				+  for (auto _ :
			
 
				+       llvm::seq(std::max(underline_end_column - caret_column - 1, 0))) {
			
 
				     out << '~';
			
 
				   }
			
 
				   out << '\n';
			
--- a/toolchain/lex/testdata/char_literals.carbon
+++ b/toolchain/lex/testdata/char_literals.carbon
@@ -105,8 +105,8 @@
 
				 
			
 
				 // This literal contains a raw tab character.
			
 
				 // CHECK:STDERR: fail_invalid.carbon:[[@LINE+4]]:2: error: whitespace other than plain space must be expressed with an escape sequence in a string literal [InvalidHorizontalWhitespaceInString]
			
 
				-// CHECK:STDERR: '{{\t}}'
			
 
				-// CHECK:STDERR:  ^
			
 
				+// CHECK:STDERR: '<09>'
			
 
				+// CHECK:STDERR:  ^~~~
			
 
				 // CHECK:STDERR:
			
 
				 '	'
			
 
				 // CHECK:STDOUT:   - { index: 11, kind: "CharLiteral", line: {{ *}}[[@LINE-1]], column:   1, indent: 1, spelling: "'\t'", has_leading_space: true }
			
--- a/toolchain/lex/testdata/fail_bad_raw_identifier.carbon
+++ b/toolchain/lex/testdata/fail_bad_raw_identifier.carbon
@@ -34,7 +34,7 @@ r#3
 
				 
			
 
				 // Non ascii start to identifier.
			
 
				 // CHECK:STDERR: fail_bad_raw_identifier.carbon:[[@LINE+4]]:2: error: encountered unrecognized characters while parsing [UnrecognizedCharacters]
			
 
				-// CHECK:STDERR: r#á
			
 
				+// CHECK:STDERR: r#<C3><A1>
			
 
				 // CHECK:STDERR:  ^
			
 
				 // CHECK:STDERR:
			
 
				 r#á
			
--- a/toolchain/lex/testdata/fail_char_literals_bad_encoding.carbon
+++ b/toolchain/lex/testdata/fail_char_literals_bad_encoding.carbon
--- a/toolchain/lex/testdata/string_literals.carbon
+++ b/toolchain/lex/testdata/string_literals.carbon
@@ -69,8 +69,8 @@
 
				 // CHECK:STDOUT:   tokens:
			
 
				 
			
 
				 // CHECK:STDERR: fail_literal_tab_in_string.carbon:[[@LINE+4]]:2: error: whitespace other than plain space must be expressed with an escape sequence in a string literal [InvalidHorizontalWhitespaceInString]
			
 
				-// CHECK:STDERR: "{{\t}}"
			
 
				-// CHECK:STDERR:  ^
			
 
				+// CHECK:STDERR: "<09>"
			
 
				+// CHECK:STDERR:  ^~~~
			
 
				 // CHECK:STDERR:
			
 
				 "	"
			
 
				 // CHECK:STDOUT:   - { index: 1, kind: "StringLiteral", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "\"\t\"", value: "\t", has_leading_space: true }