Bläddra i källkod

Add location information to diagnostics. (#385)

Add simple mocking and testing of diagnostic emission to ensure this
works.
Richard Smith 5 år sedan
förälder
incheckning
d1757d9979

+ 21 - 0
diagnostics/BUILD

@@ -13,11 +13,32 @@ cc_library(
     deps = ["@llvm-project//llvm:Support"],
 )
 
+cc_library(
+    name = "null_diagnostics",
+    hdrs = ["null_diagnostics.h"],
+    deps = [
+        ":diagnostic_emitter",
+        "@llvm-project//llvm:Support",
+    ],
+)
+
+cc_library(
+    name = "mocks",
+    testonly = 1,
+    hdrs = ["mocks.h"],
+    deps = [
+        ":diagnostic_emitter",
+        "@llvm-project//llvm:Support",
+        "@llvm-project//llvm:gmock",
+    ],
+)
+
 cc_test(
     name = "diagnostic_emitter_test",
     srcs = ["diagnostic_emitter_test.cpp"],
     deps = [
         ":diagnostic_emitter",
+        ":mocks",
         "@llvm-project//llvm:Support",
         "@llvm-project//llvm:gmock",
         "@llvm-project//llvm:gtest",

+ 78 - 22
diagnostics/diagnostic_emitter.h

@@ -23,63 +23,119 @@ namespace Carbon {
 // TODO: turn this into a much more reasonable API when we add some actual
 // uses of it.
 struct Diagnostic {
+  struct Location {
+    // Name of the file or buffer that this diagnostic refers to.
+    std::string file_name;
+    // 1-based line number.
+    int32_t line_number;
+    // 1-based column number.
+    int32_t column_number;
+  };
+
+  Location location;
   llvm::StringRef short_name;
   std::string message;
 };
 
+// Receives diagnostics as they are emitted.
+class DiagnosticConsumer {
+ public:
+  virtual ~DiagnosticConsumer() = default;
+
+  // Handle a diagnostic.
+  virtual auto HandleDiagnostic(const Diagnostic& diagnostic) -> void = 0;
+};
+
+// An interface that can translate some representation of a location into a
+// diagnostic location.
+//
+// TODO: Revisit this once the diagnostics machinery is more complete and see
+// if we can turn it into a `std::function`.
+template <typename LocationT>
+class DiagnosticLocationTranslator {
+ public:
+  virtual ~DiagnosticLocationTranslator() = default;
+
+  [[nodiscard]] virtual auto GetLocation(LocationT loc)
+      -> Diagnostic::Location = 0;
+};
+
 // Manages the creation of reports, the testing if diagnostics are enabled, and
 // the collection of reports.
+//
+// This class is parameterized by a location type, allowing different
+// diagnostic clients to provide location information in whatever form is most
+// convenient for them, such as a position within a buffer when lexing, a token
+// when parsing, or a parse tree node when type-checking, and to allow unit
+// tests to be decoupled from any concrete location representation.
+template <typename LocationT>
 class DiagnosticEmitter {
  public:
-  using Callback = std::function<void(const Diagnostic&)>;
-
-  explicit DiagnosticEmitter(Callback callback)
-      : callback_(std::move(callback)) {}
+  // The `translator` and `consumer` are required to outlive the diagnostic
+  // emitter.
+  explicit DiagnosticEmitter(
+      DiagnosticLocationTranslator<LocationT>& translator,
+      DiagnosticConsumer& consumer)
+      : translator_(&translator), consumer_(&consumer) {}
   ~DiagnosticEmitter() = default;
 
   // Emits an error unconditionally.
   template <typename DiagnosticT>
-  auto EmitError(DiagnosticT diag) -> void {
-    callback_({.short_name = DiagnosticT::ShortName, .message = diag.Format()});
+  auto EmitError(LocationT location, DiagnosticT diag) -> void {
+    // TODO: Encode the diagnostic kind in the Diagnostic object rather than
+    // hardcoding an "error: " prefix.
+    consumer_->HandleDiagnostic({.location = translator_->GetLocation(location),
+                                 .short_name = DiagnosticT::ShortName,
+                                 .message = "error: " + diag.Format()});
   }
 
   // Emits a stateless error unconditionally.
   template <typename DiagnosticT>
-  auto EmitError() -> std::enable_if_t<std::is_empty_v<DiagnosticT>> {
-    EmitError<DiagnosticT>({});
+  auto EmitError(LocationT location)
+      -> std::enable_if_t<std::is_empty_v<DiagnosticT>> {
+    EmitError<DiagnosticT>(location, {});
   }
 
   // Emits a warning if `F` returns true.  `F` may or may not be called if the
   // warning is disabled.
   template <typename DiagnosticT>
-  auto EmitWarningIf(llvm::function_ref<bool(DiagnosticT&)> f) -> void {
-    // TODO(kfm): check if this warning is enabled
+  auto EmitWarningIf(LocationT location,
+                     llvm::function_ref<bool(DiagnosticT&)> f) -> void {
+    // TODO(kfm): check if this warning is enabled at `location`.
     DiagnosticT diag;
     if (f(diag)) {
-      callback_(
-          {.short_name = DiagnosticT::ShortName, .message = diag.Format()});
+      // TODO: Encode the diagnostic kind in the Diagnostic object rather than
+      // hardcoding a "warning: " prefix.
+      consumer_->HandleDiagnostic(
+          {.location = translator_->GetLocation(location),
+           .short_name = DiagnosticT::ShortName,
+           .message = "warning: " + diag.Format()});
     }
   }
 
  private:
-  Callback callback_;
+  DiagnosticLocationTranslator<LocationT>* translator_;
+  DiagnosticConsumer* consumer_;
 };
 
-inline auto ConsoleDiagnosticEmitter() -> DiagnosticEmitter& {
-  static auto* emitter = new DiagnosticEmitter(
-      [](const Diagnostic& d) { llvm::errs() << d.message << "\n"; });
-  return *emitter;
-}
+inline auto ConsoleDiagnosticConsumer() -> DiagnosticConsumer& {
+  struct Consumer : DiagnosticConsumer {
+    auto HandleDiagnostic(const Diagnostic& d) -> void override {
+      if (!d.location.file_name.empty()) {
+        llvm::errs() << d.location.file_name << ":" << d.location.line_number
+                     << ":" << d.location.column_number << ": ";
+      }
 
-inline auto NullDiagnosticEmitter() -> DiagnosticEmitter& {
-  static auto* emitter = new DiagnosticEmitter([](const Diagnostic&) {});
-  return *emitter;
+      llvm::errs() << d.message << "\n";
+    }
+  };
+  static auto* consumer = new Consumer;
+  return *consumer;
 }
 
 // CRTP base class for diagnostics with no substitutions.
 template <typename Derived>
 struct SimpleDiagnostic {
-  struct Substitutions {};
   static auto Format() -> std::string { return Derived::Message.str(); }
 };
 

+ 38 - 20
diagnostics/diagnostic_emitter_test.cpp

@@ -4,6 +4,7 @@
 
 #include "diagnostics/diagnostic_emitter.h"
 
+#include "diagnostics/mocks.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "llvm/ADT/StringRef.h"
@@ -12,6 +13,9 @@
 namespace Carbon {
 namespace {
 
+using Testing::DiagnosticAt;
+using Testing::DiagnosticMessage;
+using Testing::DiagnosticShortName;
 using ::testing::ElementsAre;
 using ::testing::Eq;
 
@@ -32,42 +36,56 @@ struct FakeDiagnostic {
   }
 };
 
-TEST(DiagTest, EmitErrors) {
-  std::vector<std::string> reported;
-
-  DiagnosticEmitter emitter([&](const Diagnostic& diagnostic) {
-    EXPECT_THAT(diagnostic.short_name, Eq("fake-diagnostic"));
-    reported.push_back(diagnostic.message);
-  });
-
-  emitter.EmitError<FakeDiagnostic>({.message = "M1"});
-  emitter.EmitError<FakeDiagnostic>({.message = "M2"});
+struct FakeDiagnosticLocationTranslator : DiagnosticLocationTranslator<int> {
+  auto GetLocation(int n) -> Diagnostic::Location override {
+    return {.file_name = "test", .line_number = 1, .column_number = n};
+  }
+};
 
-  EXPECT_THAT(reported, ElementsAre("M1", "M2"));
+TEST(DiagTest, EmitErrors) {
+  FakeDiagnosticLocationTranslator translator;
+  Testing::MockDiagnosticConsumer consumer;
+  DiagnosticEmitter<int> emitter(translator, consumer);
+
+  EXPECT_CALL(consumer, HandleDiagnostic(AllOf(
+                            DiagnosticAt(1, 1), DiagnosticMessage("error: M1"),
+                            DiagnosticShortName("fake-diagnostic"))));
+  EXPECT_CALL(consumer, HandleDiagnostic(AllOf(
+                            DiagnosticAt(1, 2), DiagnosticMessage("error: M2"),
+                            DiagnosticShortName("fake-diagnostic"))));
+
+  emitter.EmitError<FakeDiagnostic>(1, {.message = "M1"});
+  emitter.EmitError<FakeDiagnostic>(2, {.message = "M2"});
 }
 
 TEST(DiagTest, EmitWarnings) {
   std::vector<std::string> reported;
 
-  DiagnosticEmitter emitter([&](const Diagnostic& diagnostic) {
-    EXPECT_THAT(diagnostic.short_name, Eq("fake-diagnostic"));
-    reported.push_back(diagnostic.message);
-  });
+  FakeDiagnosticLocationTranslator translator;
+  Testing::MockDiagnosticConsumer consumer;
+  DiagnosticEmitter<int> emitter(translator, consumer);
 
-  emitter.EmitWarningIf<FakeDiagnostic>([](FakeDiagnostic& diagnostic) {
+  EXPECT_CALL(consumer,
+              HandleDiagnostic(AllOf(DiagnosticAt(1, 3),
+                                     DiagnosticMessage("warning: M1"),
+                                     DiagnosticShortName("fake-diagnostic"))));
+  EXPECT_CALL(consumer,
+              HandleDiagnostic(AllOf(DiagnosticAt(1, 5),
+                                     DiagnosticMessage("warning: M3"),
+                                     DiagnosticShortName("fake-diagnostic"))));
+
+  emitter.EmitWarningIf<FakeDiagnostic>(3, [](FakeDiagnostic& diagnostic) {
     diagnostic.message = "M1";
     return true;
   });
-  emitter.EmitWarningIf<FakeDiagnostic>([](FakeDiagnostic& diagnostic) {
+  emitter.EmitWarningIf<FakeDiagnostic>(4, [](FakeDiagnostic& diagnostic) {
     diagnostic.message = "M2";
     return false;
   });
-  emitter.EmitWarningIf<FakeDiagnostic>([](FakeDiagnostic& diagnostic) {
+  emitter.EmitWarningIf<FakeDiagnostic>(5, [](FakeDiagnostic& diagnostic) {
     diagnostic.message = "M3";
     return true;
   });
-
-  EXPECT_THAT(reported, ElementsAre("M1", "M3"));
 }
 
 }  // namespace

+ 53 - 0
diagnostics/mocks.h

@@ -0,0 +1,53 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef DIAGNOSTICS_MOCKS_H_
+#define DIAGNOSTICS_MOCKS_H_
+
+#include "diagnostics/diagnostic_emitter.h"
+#include "gmock/gmock.h"
+
+namespace Carbon {
+namespace Testing {
+
+class MockDiagnosticConsumer : public DiagnosticConsumer {
+ public:
+  // TODO: Use `MOCK_METHOD` once it's available.
+  MOCK_METHOD1(HandleDiagnostic, void(const Diagnostic& diagnostic));
+};
+
+// Matcher `DiagnosticAt` matches the location of a diagnostic.
+MATCHER_P2(DiagnosticAt, line, column, "") {
+  const Diagnostic& diag = arg;
+  const Diagnostic::Location& loc = diag.location;
+  if (loc.line_number != line) {
+    *result_listener << "\nExpected diagnostic on line " << line
+                     << " but diagnostic is on line " << loc.line_number << ".";
+    return false;
+  }
+  if (loc.column_number != column) {
+    *result_listener << "\nExpected diagnostic on column " << column
+                     << " but diagnostic is on column " << loc.column_number
+                     << ".";
+    return false;
+  }
+  return true;
+}
+
+template <typename Matcher>
+auto DiagnosticMessage(Matcher&& inner_matcher) -> auto {
+  return testing::Field(&Diagnostic::message,
+                        std::forward<Matcher&&>(inner_matcher));
+}
+
+template <typename Matcher>
+auto DiagnosticShortName(Matcher&& inner_matcher) -> auto {
+  return testing::Field(&Diagnostic::short_name,
+                        std::forward<Matcher&&>(inner_matcher));
+}
+
+}  // namespace Testing
+}  // namespace Carbon
+
+#endif  // DIAGNOSTICS_MOCKS_H_

+ 39 - 0
diagnostics/null_diagnostics.h

@@ -0,0 +1,39 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef DIAGNOSTICS_NULL_DIAGNOSTICS_H_
+#define DIAGNOSTICS_NULL_DIAGNOSTICS_H_
+
+#include "diagnostics/diagnostic_emitter.h"
+
+namespace Carbon {
+
+template <typename LocationT>
+inline auto NullDiagnosticLocationTranslator()
+    -> DiagnosticLocationTranslator<LocationT>& {
+  struct Translator : DiagnosticLocationTranslator<LocationT> {
+    auto GetLocation(LocationT) -> Diagnostic::Location override { return {}; }
+  };
+  static Translator* translator = new Translator;
+  return *translator;
+}
+
+inline auto NullDiagnosticConsumer() -> DiagnosticConsumer& {
+  struct Consumer : DiagnosticConsumer {
+    auto HandleDiagnostic(const Diagnostic& d) -> void override {}
+  };
+  static auto* consumer = new Consumer;
+  return *consumer;
+}
+
+template <typename LocationT>
+inline auto NullDiagnosticEmitter() -> DiagnosticEmitter<LocationT>& {
+  static auto* emitter = new DiagnosticEmitter<LocationT>(
+      NullDiagnosticLocationTranslator<LocationT>(), NullDiagnosticConsumer());
+  return *emitter;
+}
+
+}  // namespace Carbon
+
+#endif  // DIAGNOSTICS_NULL_DIAGNOSTICS_H_

+ 16 - 0
lexer/BUILD

@@ -32,6 +32,16 @@ cc_library(
     deps = ["@llvm-project//llvm:Support"],
 )
 
+cc_library(
+    name = "test_helpers",
+    testonly = 1,
+    hdrs = ["test_helpers.h"],
+    deps = [
+        "//diagnostics:diagnostic_emitter",
+        "@llvm-project//llvm:Support",
+    ],
+)
+
 cc_library(
     name = "numeric_literal",
     srcs = ["numeric_literal.cpp"],
@@ -48,6 +58,7 @@ cc_test(
     srcs = ["numeric_literal_test.cpp"],
     deps = [
         ":numeric_literal",
+        ":test_helpers",
         "//diagnostics:diagnostic_emitter",
         "@llvm-project//llvm:Support",
         "@llvm-project//llvm:gmock",
@@ -63,6 +74,7 @@ cc_fuzz_test(
     deps = [
         ":numeric_literal",
         "//diagnostics:diagnostic_emitter",
+        "//diagnostics:null_diagnostics",
         "@llvm-project//llvm:Support",
     ],
 )
@@ -83,6 +95,7 @@ cc_test(
     srcs = ["string_literal_test.cpp"],
     deps = [
         ":string_literal",
+        ":test_helpers",
         "//diagnostics:diagnostic_emitter",
         "@llvm-project//llvm:Support",
         "@llvm-project//llvm:gmock",
@@ -98,6 +111,7 @@ cc_fuzz_test(
     deps = [
         ":string_literal",
         "//diagnostics:diagnostic_emitter",
+        "//diagnostics:null_diagnostics",
         "@llvm-project//llvm:Support",
     ],
 )
@@ -135,6 +149,7 @@ cc_test(
         ":tokenized_buffer",
         ":tokenized_buffer_test_helpers",
         "//diagnostics:diagnostic_emitter",
+        "//diagnostics:mocks",
         "@llvm-project//llvm:Support",
         "@llvm-project//llvm:gmock",
         "@llvm-project//llvm:gtest",
@@ -149,6 +164,7 @@ cc_fuzz_test(
     deps = [
         ":tokenized_buffer",
         "//diagnostics:diagnostic_emitter",
+        "//diagnostics:null_diagnostics",
         "@llvm-project//llvm:Support",
     ],
 )

+ 11 - 8
lexer/numeric_literal.cpp

@@ -145,7 +145,7 @@ auto LexedNumericLiteral::Lex(llvm::StringRef source_text)
   return result;
 }
 
-LexedNumericLiteral::Parser::Parser(DiagnosticEmitter& emitter,
+LexedNumericLiteral::Parser::Parser(DiagnosticEmitter<const char*>& emitter,
                                     LexedNumericLiteral literal)
     : emitter(emitter), literal(literal) {
   int_part = literal.text.substr(0, literal.radix_point);
@@ -279,19 +279,20 @@ auto LexedNumericLiteral::Parser::CheckDigitSequence(
       // next to another digit separator, or at the end.
       if (!allow_digit_separators || i == 0 || text[i - 1] == '_' ||
           i + 1 == n) {
-        emitter.EmitError<InvalidDigitSeparator>();
+        emitter.EmitError<InvalidDigitSeparator>(text.begin() + i);
         recovered_from_error = true;
       }
       ++num_digit_separators;
       continue;
     }
 
-    emitter.EmitError<InvalidDigit>({.digit = c, .radix = radix});
+    emitter.EmitError<InvalidDigit>(text.begin() + i,
+                                    {.digit = c, .radix = radix});
     return {.ok = false};
   }
 
   if (num_digit_separators == static_cast<int>(text.size())) {
-    emitter.EmitError<EmptyDigitSequence>();
+    emitter.EmitError<EmptyDigitSequence>(text.begin());
     return {.ok = false};
   }
 
@@ -319,8 +320,8 @@ auto LexedNumericLiteral::Parser::CheckDigitSeparatorPlacement(
   assert((radix == 10 || radix == 16) &&
          "unexpected radix for digit separator checks");
 
-  auto diagnose_irregular_digit_separators = [&] {
-    emitter.EmitError<IrregularDigitSeparators>({.radix = radix});
+  auto diagnose_irregular_digit_separators = [&]() {
+    emitter.EmitError<IrregularDigitSeparators>(text.begin(), {.radix = radix});
     recovered_from_error = true;
   };
 
@@ -348,7 +349,7 @@ auto LexedNumericLiteral::Parser::CheckDigitSeparatorPlacement(
 // Check that we don't have a '0' prefix on a non-zero decimal integer.
 auto LexedNumericLiteral::Parser::CheckLeadingZero() -> bool {
   if (radix == 10 && int_part.startswith("0") && int_part != "0") {
-    emitter.EmitError<UnknownBaseSpecifier>();
+    emitter.EmitError<UnknownBaseSpecifier>(int_part.begin());
     return false;
   }
   return true;
@@ -369,7 +370,8 @@ auto LexedNumericLiteral::Parser::CheckFractionalPart() -> bool {
   }
 
   if (radix == 2) {
-    emitter.EmitError<BinaryRealLiteral>();
+    emitter.EmitError<BinaryRealLiteral>(literal.text.begin() +
+                                         literal.radix_point);
     recovered_from_error = true;
     // Carry on and parse the binary real literal anyway.
   }
@@ -391,6 +393,7 @@ auto LexedNumericLiteral::Parser::CheckExponentPart() -> bool {
   char expected_exponent_kind = (radix == 10 ? 'e' : 'p');
   if (literal.text[literal.exponent] != expected_exponent_kind) {
     emitter.EmitError<WrongRealLiteralExponent>(
+        literal.text.begin() + literal.exponent,
         {.expected = expected_exponent_kind});
     return false;
   }

+ 2 - 2
lexer/numeric_literal.h

@@ -51,7 +51,7 @@ class LexedNumericLiteral {
 // either diagnosing or extracting its meaning.
 class LexedNumericLiteral::Parser {
  public:
-  Parser(DiagnosticEmitter& emitter, LexedNumericLiteral literal);
+  Parser(DiagnosticEmitter<const char*>& emitter, LexedNumericLiteral literal);
 
   auto IsInteger() -> bool {
     return literal.radix_point == static_cast<int>(literal.text.size());
@@ -98,7 +98,7 @@ class LexedNumericLiteral::Parser {
   auto CheckExponentPart() -> bool;
 
  private:
-  DiagnosticEmitter& emitter;
+  DiagnosticEmitter<const char*>& emitter;
   LexedNumericLiteral literal;
 
   // The radix of the literal: 2, 10, or 16, for a prefix of '0b', no prefix,

+ 3 - 1
lexer/numeric_literal_fuzzer.cpp

@@ -6,6 +6,7 @@
 #include <cstring>
 
 #include "diagnostics/diagnostic_emitter.h"
+#include "diagnostics/null_diagnostics.h"
 #include "lexer/numeric_literal.h"
 #include "llvm/ADT/StringRef.h"
 
@@ -21,7 +22,8 @@ extern "C" int LLVMFuzzerTestOneInput(const unsigned char* data,
     return 0;
   }
 
-  LexedNumericLiteral::Parser parser(NullDiagnosticEmitter(), *token);
+  LexedNumericLiteral::Parser parser(NullDiagnosticEmitter<const char*>(),
+                                     *token);
   if (parser.Check() == LexedNumericLiteral::Parser::UnrecoverableError) {
     // Lexically OK, but token is meaningless.
     return 0;

+ 12 - 1
lexer/numeric_literal_test.cpp

@@ -5,15 +5,22 @@
 #include "lexer/numeric_literal.h"
 
 #include <iterator>
+#include <memory>
+#include <vector>
 
 #include "diagnostics/diagnostic_emitter.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "lexer/test_helpers.h"
 
 namespace Carbon {
 namespace {
 
 struct NumericLiteralTest : ::testing::Test {
+  std::vector<std::unique_ptr<Testing::SingleTokenDiagnosticTranslator>>
+      translators;
+  std::vector<std::unique_ptr<DiagnosticEmitter<const char*>>> emitters;
+
   auto Lex(llvm::StringRef text) -> LexedNumericLiteral {
     llvm::Optional<LexedNumericLiteral> result = LexedNumericLiteral::Lex(text);
     assert(result);
@@ -22,7 +29,11 @@ struct NumericLiteralTest : ::testing::Test {
   }
 
   auto Parse(llvm::StringRef text) -> LexedNumericLiteral::Parser {
-    return LexedNumericLiteral::Parser(ConsoleDiagnosticEmitter(), Lex(text));
+    translators.push_back(
+        std::make_unique<Testing::SingleTokenDiagnosticTranslator>(text));
+    emitters.push_back(std::make_unique<DiagnosticEmitter<const char*>>(
+        *translators.back(), ConsoleDiagnosticConsumer()));
+    return LexedNumericLiteral::Parser(*emitters.back(), Lex(text));
   }
 };
 

+ 19 - 17
lexer/string_literal.cpp

@@ -13,6 +13,8 @@
 
 namespace Carbon {
 
+using LexerDiagnosticEmitter = DiagnosticEmitter<const char*>;
+
 struct ContentBeforeStringTerminator
     : SimpleDiagnostic<ContentBeforeStringTerminator> {
   static constexpr llvm::StringLiteral ShortName = "syntax-invalid-string";
@@ -177,7 +179,7 @@ struct Indent {
 // Check the literal is indented properly, if it's a multi-line litera.
 // Find the leading whitespace that should be removed from each line of a
 // multi-line string literal.
-static auto CheckIndent(DiagnosticEmitter& emitter, llvm::StringRef text,
+static auto CheckIndent(LexerDiagnosticEmitter& emitter, llvm::StringRef text,
                         llvm::StringRef content) -> Indent {
   // Find the leading horizontal whitespace on the final line of this literal.
   // Note that for an empty literal, this might not be inside the content.
@@ -187,7 +189,7 @@ static auto CheckIndent(DiagnosticEmitter& emitter, llvm::StringRef text,
   // The last line is not permitted to contain any content after its
   // indentation.
   if (indent.end() != content.end()) {
-    emitter.EmitError<ContentBeforeStringTerminator>();
+    emitter.EmitError<ContentBeforeStringTerminator>(indent.end());
     has_errors = true;
   }
 
@@ -195,17 +197,17 @@ static auto CheckIndent(DiagnosticEmitter& emitter, llvm::StringRef text,
 }
 
 // Expand a `\u{HHHHHH}` escape sequence into a sequence of UTF-8 code units.
-static auto ExpandUnicodeEscapeSequence(DiagnosticEmitter& emitter,
+static auto ExpandUnicodeEscapeSequence(LexerDiagnosticEmitter& emitter,
                                         llvm::StringRef digits,
                                         std::string& result) -> bool {
   unsigned code_point;
   if (digits.getAsInteger(16, code_point) || code_point > 0x10FFFF) {
-    emitter.EmitError<UnicodeEscapeTooLarge>();
+    emitter.EmitError<UnicodeEscapeTooLarge>(digits.begin());
     return false;
   }
 
   if (code_point >= 0xD800 && code_point < 0xE000) {
-    emitter.EmitError<UnicodeEscapeSurrogate>();
+    emitter.EmitError<UnicodeEscapeSurrogate>(digits.begin());
     return false;
   }
 
@@ -229,7 +231,7 @@ static auto ExpandUnicodeEscapeSequence(DiagnosticEmitter& emitter,
 // `result` string. `content` is the string content, starting from the first
 // character after the escape sequence introducer (for example, the `n` in
 // `\n`), and will be updated to remove the leading escape sequence.
-static auto ExpandAndConsumeEscapeSequence(DiagnosticEmitter& emitter,
+static auto ExpandAndConsumeEscapeSequence(LexerDiagnosticEmitter& emitter,
                                            llvm::StringRef& content,
                                            std::string& result) -> bool {
   assert(!content.empty() && "should have escaped closing delimiter");
@@ -258,7 +260,7 @@ static auto ExpandAndConsumeEscapeSequence(DiagnosticEmitter& emitter,
     case '0':
       result += '\0';
       if (!content.empty() && IsDecimalDigit(content.front())) {
-        emitter.EmitError<DecimalEscapeSequence>();
+        emitter.EmitError<DecimalEscapeSequence>(content.begin());
         return false;
       }
       return true;
@@ -270,7 +272,7 @@ static auto ExpandAndConsumeEscapeSequence(DiagnosticEmitter& emitter,
         content = content.drop_front(2);
         return true;
       }
-      emitter.EmitError<HexadecimalEscapeMissingDigits>();
+      emitter.EmitError<HexadecimalEscapeMissingDigits>(content.begin());
       break;
     case 'u': {
       llvm::StringRef remaining = content;
@@ -285,11 +287,12 @@ static auto ExpandAndConsumeEscapeSequence(DiagnosticEmitter& emitter,
           return true;
         }
       }
-      emitter.EmitError<UnicodeEscapeMissingBracedDigits>();
+      emitter.EmitError<UnicodeEscapeMissingBracedDigits>(content.begin());
       break;
     }
     default:
-      emitter.EmitError<UnknownEscapeSequence>({.first = first});
+      emitter.EmitError<UnknownEscapeSequence>(content.begin() - 1,
+                                               {.first = first});
       break;
   }
 
@@ -301,11 +304,9 @@ static auto ExpandAndConsumeEscapeSequence(DiagnosticEmitter& emitter,
 }
 
 // Expand any escape sequences in the given string literal.
-static auto ExpandEscapeSequencesAndRemoveIndent(DiagnosticEmitter& emitter,
-                                                 llvm::StringRef contents,
-                                                 int hash_level,
-                                                 llvm::StringRef indent)
-    -> LexedStringLiteral::ExpandedValue {
+static auto ExpandEscapeSequencesAndRemoveIndent(
+    LexerDiagnosticEmitter& emitter, llvm::StringRef contents, int hash_level,
+    llvm::StringRef indent) -> LexedStringLiteral::ExpandedValue {
   std::string result;
   result.reserve(contents.size());
   bool has_errors = false;
@@ -319,9 +320,10 @@ static auto ExpandEscapeSequencesAndRemoveIndent(DiagnosticEmitter& emitter,
     // whitespace) is required to start with the string's indent. For error
     // recovery, remove all leading whitespace if the indent doesn't match.
     if (!contents.consume_front(indent)) {
+      const char* line_start = contents.begin();
       contents = contents.drop_while(IsHorizontalWhitespace);
       if (!contents.startswith("\n")) {
-        emitter.EmitError<MismatchedIndentInString>();
+        emitter.EmitError<MismatchedIndentInString>(line_start);
         has_errors = true;
       }
     }
@@ -369,7 +371,7 @@ static auto ExpandEscapeSequencesAndRemoveIndent(DiagnosticEmitter& emitter,
   }
 }
 
-auto LexedStringLiteral::ComputeValue(DiagnosticEmitter& emitter) const
+auto LexedStringLiteral::ComputeValue(LexerDiagnosticEmitter& emitter) const
     -> ExpandedValue {
   auto indent = multi_line ? CheckIndent(emitter, text, content) : Indent();
   auto result = ExpandEscapeSequencesAndRemoveIndent(emitter, content,

+ 2 - 1
lexer/string_literal.h

@@ -31,7 +31,8 @@ class LexedStringLiteral {
 
   // Expand any escape sequences in the given string literal and compute the
   // resulting value.
-  auto ComputeValue(DiagnosticEmitter& emitter) const -> ExpandedValue;
+  auto ComputeValue(DiagnosticEmitter<const char*>& emitter) const
+      -> ExpandedValue;
 
  private:
   LexedStringLiteral(llvm::StringRef text, llvm::StringRef content,

+ 3 - 1
lexer/string_literal_fuzzer.cpp

@@ -6,6 +6,7 @@
 #include <cstring>
 
 #include "diagnostics/diagnostic_emitter.h"
+#include "diagnostics/null_diagnostics.h"
 #include "lexer/string_literal.h"
 #include "llvm/ADT/StringRef.h"
 
@@ -26,7 +27,8 @@ extern "C" int LLVMFuzzerTestOneInput(const unsigned char* data,
     __builtin_trap();
   }
 
-  volatile auto value = token->ComputeValue(NullDiagnosticEmitter());
+  volatile auto value =
+      token->ComputeValue(NullDiagnosticEmitter<const char*>());
   (void)value;
 
   return 0;

+ 5 - 1
lexer/string_literal_test.cpp

@@ -7,6 +7,7 @@
 #include "diagnostics/diagnostic_emitter.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "lexer/test_helpers.h"
 
 namespace Carbon {
 namespace {
@@ -21,7 +22,10 @@ struct StringLiteralTest : ::testing::Test {
 
   auto Parse(llvm::StringRef text) -> LexedStringLiteral::ExpandedValue {
     LexedStringLiteral token = Lex(text);
-    return token.ComputeValue(ConsoleDiagnosticEmitter());
+    Testing::SingleTokenDiagnosticTranslator translator(text);
+    DiagnosticEmitter<const char*> emitter(translator,
+                                           ConsoleDiagnosticConsumer());
+    return token.ComputeValue(emitter);
   }
 };
 

+ 60 - 0
lexer/test_helpers.h

@@ -0,0 +1,60 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef LEXER_TEST_HELPERS_H_
+#define LEXER_TEST_HELPERS_H_
+
+#include <array>
+#include <string>
+
+#include "diagnostics/diagnostic_emitter.h"
+#include "gmock/gmock.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/FormatVariadic.h"
+
+namespace Carbon {
+namespace Testing {
+
+// A diagnostic translator for tests that lex a single token. Produces
+// locations such as "`12.5`:1:3" to refer to the third character in the token.
+class SingleTokenDiagnosticTranslator
+    : public DiagnosticLocationTranslator<const char*> {
+ public:
+  // Form a translator for a given token. The string provided here must refer
+  // to the same character array that we are going to lex.
+  SingleTokenDiagnosticTranslator(llvm::StringRef token) : token(token) {}
+
+  auto GetLocation(const char* pos) -> Diagnostic::Location override {
+    assert(llvm::is_sorted(std::array{token.begin(), pos, token.end()}) &&
+           "invalid diagnostic location");
+    llvm::StringRef prefix = token.take_front(pos - token.begin());
+    auto [before_last_newline, this_line] = prefix.rsplit('\n');
+    if (before_last_newline.size() == prefix.size()) {
+      // On first line.
+      return {.file_name = SynthesizeFilename(),
+              .line_number = 1,
+              .column_number = static_cast<int32_t>(pos - token.begin() + 1)};
+    } else {
+      // On second or subsequent lines. Note that the line number here is 2
+      // more than the number of newlines because `rsplit` removed one newline
+      // and `line_number` is 1-based.
+      return {.file_name = SynthesizeFilename(),
+              .line_number =
+                  static_cast<int32_t>(before_last_newline.count('\n') + 2),
+              .column_number = static_cast<int32_t>(this_line.size() + 1)};
+    }
+  }
+
+ private:
+  auto SynthesizeFilename() const -> std::string {
+    return llvm::formatv("`{0}`", token);
+  }
+
+  llvm::StringRef token;
+};
+
+}  // namespace Testing
+}  // namespace Carbon
+
+#endif  // LEXER_TOKENIZED_BUFFER_TEST_HELPERS_H_

+ 78 - 13
lexer/tokenized_buffer.cpp

@@ -5,6 +5,7 @@
 #include "lexer/tokenized_buffer.h"
 
 #include <algorithm>
+#include <array>
 #include <cmath>
 #include <iterator>
 #include <string>
@@ -12,6 +13,7 @@
 #include "lexer/character_set.h"
 #include "lexer/numeric_literal.h"
 #include "lexer/string_literal.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
@@ -62,7 +64,12 @@ struct UnrecognizedCharacters : SimpleDiagnostic<UnrecognizedCharacters> {
 // tokenized buffer with the lexed tokens.
 class TokenizedBuffer::Lexer {
   TokenizedBuffer& buffer;
-  DiagnosticEmitter& emitter;
+
+  SourceBufferLocationTranslator translator;
+  LexerDiagnosticEmitter emitter;
+
+  TokenLocationTranslator token_translator;
+  TokenDiagnosticEmitter token_emitter;
 
   Line current_line;
   LineInfo* current_line_info;
@@ -73,9 +80,12 @@ class TokenizedBuffer::Lexer {
   llvm::SmallVector<Token, 8> open_groups;
 
  public:
-  Lexer(TokenizedBuffer& buffer, DiagnosticEmitter& emitter)
+  Lexer(TokenizedBuffer& buffer, DiagnosticConsumer& consumer)
       : buffer(buffer),
-        emitter(emitter),
+        translator(buffer),
+        emitter(translator, consumer),
+        token_translator(buffer),
+        token_emitter(token_translator, consumer),
         current_line(buffer.AddLine({0, 0, 0})),
         current_line_info(&buffer.GetLineInfo(current_line)) {}
 
@@ -121,12 +131,13 @@ class TokenizedBuffer::Lexer {
       if (source_text.startswith("//")) {
         // Any comment must be the only non-whitespace on the line.
         if (set_indent) {
-          emitter.EmitError<TrailingComment>();
+          emitter.EmitError<TrailingComment>(source_text.begin());
           buffer.has_errors = true;
         }
         // The introducer '//' must be followed by whitespace or EOF.
         if (source_text.size() > 2 && !IsSpace(source_text[2])) {
-          emitter.EmitError<NoWhitespaceAfterCommentIntroducer>();
+          emitter.EmitError<NoWhitespaceAfterCommentIntroducer>(
+              source_text.begin() + 2);
           buffer.has_errors = true;
         }
         while (!source_text.empty() && source_text.front() != '\n') {
@@ -300,6 +311,7 @@ class TokenizedBuffer::Lexer {
 
     CloseInvalidOpenGroups(kind);
 
+    const char* location = source_text.begin();
     Token token = buffer.AddToken(
         {.kind = kind, .token_line = current_line, .column = current_column});
     current_column += kind.GetFixedSpelling().size();
@@ -325,7 +337,7 @@ class TokenizedBuffer::Lexer {
       closing_token_info.error_length = kind.GetFixedSpelling().size();
       buffer.has_errors = true;
 
-      emitter.EmitError<UnmatchedClosing>();
+      emitter.EmitError<UnmatchedClosing>(location);
       // Note that this still returns true as we do consume a symbol.
       return token;
     }
@@ -354,7 +366,7 @@ class TokenizedBuffer::Lexer {
 
       open_groups.pop_back();
       buffer.has_errors = true;
-      emitter.EmitError<MismatchedClosing>();
+      token_emitter.EmitError<MismatchedClosing>(opening_token);
 
       // TODO: do a smarter backwards scan for where to put the closing
       // token.
@@ -444,10 +456,7 @@ class TokenizedBuffer::Lexer {
          .token_line = current_line,
          .column = current_column,
          .error_length = static_cast<int32_t>(error_text.size())});
-    // TODO: #19 - Need to convert to the diagnostics library.
-    llvm::errs() << "ERROR: Line " << buffer.GetLineNumber(token) << ", Column "
-                 << buffer.GetColumnNumber(token)
-                 << ": Unrecognized characters!\n";
+    emitter.EmitError<UnrecognizedCharacters>(error_text.begin());
 
     current_column += error_text.size();
     source_text = source_text.drop_front(error_text.size());
@@ -456,10 +465,10 @@ class TokenizedBuffer::Lexer {
   }
 };
 
-auto TokenizedBuffer::Lex(SourceBuffer& source, DiagnosticEmitter& emitter)
+auto TokenizedBuffer::Lex(SourceBuffer& source, DiagnosticConsumer& consumer)
     -> TokenizedBuffer {
   TokenizedBuffer buffer(source);
-  Lexer lexer(buffer, emitter);
+  Lexer lexer(buffer, consumer);
 
   llvm::StringRef source_text = source.Text();
   while (lexer.SkipWhitespace(source_text)) {
@@ -735,4 +744,60 @@ auto TokenizedBuffer::AddToken(TokenInfo info) -> Token {
   return Token(static_cast<int>(token_infos.size()) - 1);
 }
 
+auto TokenizedBuffer::SourceBufferLocationTranslator::GetLocation(
+    const char* loc) -> Diagnostic::Location {
+  assert(llvm::is_sorted(std::array{buffer_->source->Text().begin(), loc,
+                                    buffer_->source->Text().end()}) &&
+         "location not within buffer");
+  int64_t offset = loc - buffer_->source->Text().begin();
+
+  // Find the first line starting after the given location. Note that we can't
+  // inspect `line.length` here because it is not necessarily correct for the
+  // final line.
+  auto line_it = std::partition_point(
+      buffer_->line_infos.begin(), buffer_->line_infos.end(),
+      [offset](const LineInfo& line) { return line.start <= offset; });
+  bool incomplete_line_info = line_it == buffer_->line_infos.end();
+
+  // Step back one line to find the line containing the given position.
+  assert(line_it != buffer_->line_infos.begin() &&
+         "location precedes the start of the first line");
+  --line_it;
+  int line_number = line_it - buffer_->line_infos.begin();
+  int column_number = offset - line_it->start;
+
+  // We might still be lexing the last line. If so, check to see if there are
+  // any newline characters between the start of this line and the given
+  // location.
+  if (incomplete_line_info) {
+    column_number = 0;
+    for (int64_t i = line_it->start; i != offset; ++i) {
+      if (buffer_->source->Text()[i] == '\n') {
+        ++line_number;
+        column_number = 0;
+      } else {
+        ++column_number;
+      }
+    }
+  }
+
+  return {.file_name = buffer_->source->Filename().str(),
+          .line_number = line_number + 1,
+          .column_number = column_number + 1};
+}
+
+auto TokenizedBuffer::TokenLocationTranslator::GetLocation(Token token)
+    -> Diagnostic::Location {
+  // Map the token location into a position within the source buffer.
+  auto& token_info = buffer_->GetTokenInfo(token);
+  auto& line_info = buffer_->GetLineInfo(token_info.token_line);
+  const char* token_start =
+      buffer_->source->Text().begin() + line_info.start + token_info.column;
+
+  // Find the corresponding file location.
+  // TODO: Should we somehow indicate in the diagnostic location if this token
+  // is a recovery token that doesn't correspond to the original source?
+  return SourceBufferLocationTranslator(*buffer_).GetLocation(token_start);
+}
+
 }  // namespace Carbon

+ 95 - 45
lexer/tokenized_buffer.h

@@ -21,6 +21,61 @@
 
 namespace Carbon {
 
+class TokenizedBuffer;
+
+namespace Internal {
+
+// A lightweight handle to a lexed token in a `TokenizedBuffer`.
+//
+// This type's preferred name is `TokenizedBuffer::Token` and is only defined
+// outside the class to break a dependency cycle.
+//
+// `Token` objects are designed to be passed by value, not reference or
+// pointer. They are also designed to be small and efficient to store in data
+// structures.
+//
+// `Token` objects from the same `TokenizedBuffer` can be compared with each
+// other, both for being the same token within the buffer, and to establish
+// relative position within the token stream that has been lexed out of the
+// buffer. `Token` objects from different `TokenizedBuffer`s cannot be
+// meaningfully compared.
+//
+// All other APIs to query a `Token` are on the `TokenizedBuffer`.
+class TokenizedBufferToken {
+ public:
+  using Token = TokenizedBufferToken;
+
+  TokenizedBufferToken() = default;
+
+  friend auto operator==(Token lhs, Token rhs) -> bool {
+    return lhs.index == rhs.index;
+  }
+  friend auto operator!=(Token lhs, Token rhs) -> bool {
+    return lhs.index != rhs.index;
+  }
+  friend auto operator<(Token lhs, Token rhs) -> bool {
+    return lhs.index < rhs.index;
+  }
+  friend auto operator<=(Token lhs, Token rhs) -> bool {
+    return lhs.index <= rhs.index;
+  }
+  friend auto operator>(Token lhs, Token rhs) -> bool {
+    return lhs.index > rhs.index;
+  }
+  friend auto operator>=(Token lhs, Token rhs) -> bool {
+    return lhs.index >= rhs.index;
+  }
+
+ private:
+  friend TokenizedBuffer;
+
+  explicit TokenizedBufferToken(int index) : index(index) {}
+
+  int32_t index;
+};
+
+}  // namespace Internal
+
 // A buffer of tokenized Carbon source code.
 //
 // This is constructed by lexing the source code text into a series of tokens.
@@ -32,47 +87,7 @@ namespace Carbon {
 class TokenizedBuffer {
  public:
   // A lightweight handle to a lexed token in a `TokenizedBuffer`.
-  //
-  // `Token` objects are designed to be passed by value, not reference or
-  // pointer. They are also designed to be small and efficient to store in data
-  // structures.
-  //
-  // `Token` objects from the same `TokenizedBuffer` can be compared with each
-  // other, both for being the same token within the buffer, and to establish
-  // relative position within the token stream that has been lexed out of the
-  // buffer.
-  //
-  // All other APIs to query a `Token` are on the `TokenizedBuffer`.
-  class Token {
-   public:
-    Token() = default;
-
-    friend auto operator==(Token lhs, Token rhs) -> bool {
-      return lhs.index == rhs.index;
-    }
-    friend auto operator!=(Token lhs, Token rhs) -> bool {
-      return lhs.index != rhs.index;
-    }
-    friend auto operator<(Token lhs, Token rhs) -> bool {
-      return lhs.index < rhs.index;
-    }
-    friend auto operator<=(Token lhs, Token rhs) -> bool {
-      return lhs.index <= rhs.index;
-    }
-    friend auto operator>(Token lhs, Token rhs) -> bool {
-      return lhs.index > rhs.index;
-    }
-    friend auto operator>=(Token lhs, Token rhs) -> bool {
-      return lhs.index >= rhs.index;
-    }
-
-   private:
-    friend class TokenizedBuffer;
-
-    explicit Token(int index) : index(index) {}
-
-    int32_t index;
-  };
+  using Token = Internal::TokenizedBufferToken;
 
   // A lightweight handle to a lexed line in a `TokenizedBuffer`.
   //
@@ -221,14 +236,26 @@ class TokenizedBuffer {
           is_decimal(is_decimal) {}
   };
 
+  // A diagnostic location translator that maps token locations into source
+  // buffer locations.
+  class TokenLocationTranslator
+      : public DiagnosticLocationTranslator<Internal::TokenizedBufferToken> {
+   public:
+    explicit TokenLocationTranslator(TokenizedBuffer& buffer)
+        : buffer_(&buffer) {}
+
+    // Map the given token into a diagnostic location.
+    auto GetLocation(Token token) -> Diagnostic::Location override;
+
+   private:
+    TokenizedBuffer* buffer_;
+  };
+
   // Lexes a buffer of source code into a tokenized buffer.
   //
   // The provided source buffer must outlive any returned `TokenizedBuffer`
   // which will refer into the source.
-  //
-  // FIXME: Need to pass in some diagnostic machinery to report the details of
-  // the error! Right now it prints to stderr.
-  static auto Lex(SourceBuffer& source, DiagnosticEmitter& emitter)
+  static auto Lex(SourceBuffer& source, DiagnosticConsumer& consumer)
       -> TokenizedBuffer;
 
   // Returns true if the buffer has errors that are detectable at lexing time.
@@ -320,6 +347,22 @@ class TokenizedBuffer {
   class Lexer;
   friend Lexer;
 
+  // A diagnostic location translator that maps token locations into source
+  // buffer locations.
+  class SourceBufferLocationTranslator
+      : public DiagnosticLocationTranslator<const char*> {
+   public:
+    explicit SourceBufferLocationTranslator(TokenizedBuffer& buffer)
+        : buffer_(&buffer) {}
+
+    // Map the given position within the source buffer into a diagnostic
+    // location.
+    auto GetLocation(const char* pos) -> Diagnostic::Location override;
+
+   private:
+    TokenizedBuffer* buffer_;
+  };
+
   // Specifies minimum widths to use when printing a token's fields via
   // `printToken`.
   struct PrintWidths {
@@ -412,6 +455,13 @@ class TokenizedBuffer {
   bool has_errors = false;
 };
 
+// A diagnostic emitter that uses positions within a source buffer's text as
+// its source of location information.
+using LexerDiagnosticEmitter = DiagnosticEmitter<const char*>;
+
+// A diagnostic emitter that uses tokens as its source of location information.
+using TokenDiagnosticEmitter = DiagnosticEmitter<TokenizedBuffer::Token>;
+
 }  // namespace Carbon
 
 #endif  // LEXER_TOKENIZED_BUFFER_H_

+ 2 - 4
lexer/tokenized_buffer_fuzzer.cpp

@@ -6,6 +6,7 @@
 #include <cstring>
 
 #include "diagnostics/diagnostic_emitter.h"
+#include "diagnostics/null_diagnostics.h"
 #include "lexer/tokenized_buffer.h"
 #include "llvm/ADT/StringRef.h"
 
@@ -37,10 +38,7 @@ extern "C" int LLVMFuzzerTestOneInput(const unsigned char* data,
   auto source = SourceBuffer::CreateFromText(
       llvm::StringRef(reinterpret_cast<const char*>(data), size), filename);
 
-  // Use a real diagnostic emitter to get lazy codepaths to execute.
-  DiagnosticEmitter emitter = NullDiagnosticEmitter();
-
-  auto buffer = TokenizedBuffer::Lex(source, emitter);
+  auto buffer = TokenizedBuffer::Lex(source, NullDiagnosticConsumer());
   if (buffer.HasErrors()) {
     return 0;
   }

+ 49 - 4
lexer/tokenized_buffer_test.cpp

@@ -7,6 +7,7 @@
 #include <iterator>
 
 #include "diagnostics/diagnostic_emitter.h"
+#include "diagnostics/mocks.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "lexer/tokenized_buffer_test_helpers.h"
@@ -22,10 +23,13 @@
 namespace Carbon {
 namespace {
 
+using ::Carbon::Testing::DiagnosticAt;
+using ::Carbon::Testing::DiagnosticMessage;
 using ::Carbon::Testing::ExpectedToken;
 using ::Carbon::Testing::HasTokens;
 using ::Carbon::Testing::IsKeyValueScalars;
 using ::testing::Eq;
+using ::testing::HasSubstr;
 using ::testing::NotNull;
 using ::testing::StrEq;
 
@@ -37,10 +41,10 @@ struct LexerTest : ::testing::Test {
     return source_storage.back();
   }
 
-  auto Lex(llvm::Twine text) -> TokenizedBuffer {
-    // TODO: build a full mock for this.
-    return TokenizedBuffer::Lex(GetSourceBuffer(text),
-                                ConsoleDiagnosticEmitter());
+  auto Lex(llvm::Twine text,
+           DiagnosticConsumer& consumer = ConsoleDiagnosticConsumer())
+      -> TokenizedBuffer {
+    return TokenizedBuffer::Lex(GetSourceBuffer(text), consumer);
   }
 };
 
@@ -750,6 +754,47 @@ TEST_F(LexerTest, InvalidStringLiterals) {
   }
 }
 
+TEST_F(LexerTest, Diagnostics) {
+  llvm::StringLiteral testcase = R"(
+    // Hello!
+    var String x; // trailing comment
+    //no space after comment
+    "hello\bworld\xab"
+    0x123abc
+    #"
+  )";
+
+  Testing::MockDiagnosticConsumer consumer;
+  EXPECT_CALL(consumer, HandleDiagnostic(AllOf(
+                            DiagnosticAt(3, 19),
+                            DiagnosticMessage(HasSubstr("Trailing comment")))));
+  EXPECT_CALL(consumer,
+              HandleDiagnostic(AllOf(
+                  DiagnosticAt(4, 7),
+                  DiagnosticMessage(HasSubstr("Whitespace is required")))));
+  EXPECT_CALL(
+      consumer,
+      HandleDiagnostic(AllOf(
+          DiagnosticAt(5, 12),
+          DiagnosticMessage(HasSubstr("Unrecognized escape sequence `b`")))));
+  EXPECT_CALL(
+      consumer,
+      HandleDiagnostic(AllOf(
+          DiagnosticAt(5, 20),
+          DiagnosticMessage(HasSubstr("two uppercase hexadecimal digits")))));
+  EXPECT_CALL(
+      consumer,
+      HandleDiagnostic(AllOf(
+          DiagnosticAt(6, 10),
+          DiagnosticMessage(HasSubstr("Invalid digit 'a' in hexadecimal")))));
+  EXPECT_CALL(consumer,
+              HandleDiagnostic(AllOf(
+                  DiagnosticAt(7, 5),
+                  DiagnosticMessage(HasSubstr("unrecognized character")))));
+
+  Lex(testcase, consumer);
+}
+
 auto GetAndDropLine(llvm::StringRef& text) -> std::string {
   auto newline_offset = text.find_first_of('\n');
   llvm::StringRef line = text.slice(0, newline_offset);

+ 1 - 0
parser/BUILD

@@ -80,6 +80,7 @@ cc_fuzz_test(
     deps = [
         ":parse_tree",
         "//diagnostics:diagnostic_emitter",
+        "//diagnostics:null_diagnostics",
         "//lexer:tokenized_buffer",
         "@llvm-project//llvm:Support",
     ],

+ 4 - 1
parser/parse_tree.cpp

@@ -19,8 +19,11 @@
 
 namespace Carbon {
 
-auto ParseTree::Parse(TokenizedBuffer& tokens, DiagnosticEmitter& emitter)
+auto ParseTree::Parse(TokenizedBuffer& tokens, DiagnosticConsumer& consumer)
     -> ParseTree {
+  TokenizedBuffer::TokenLocationTranslator translator(tokens);
+  TokenDiagnosticEmitter emitter(translator, consumer);
+
   // Delegate to the parser.
   return Parser::Parse(tokens, emitter);
 }

+ 1 - 1
parser/parse_tree.h

@@ -47,7 +47,7 @@ class ParseTree {
   // Parses the token buffer into a `ParseTree`.
   //
   // This is the factory function which is used to build parse trees.
-  static auto Parse(TokenizedBuffer& tokens, DiagnosticEmitter& emitter)
+  static auto Parse(TokenizedBuffer& tokens, DiagnosticConsumer& consumer)
       -> ParseTree;
 
   // Tests whether there are any errors in the parse tree.

+ 3 - 5
parser/parse_tree_fuzzer.cpp

@@ -7,6 +7,7 @@
 #include <cstring>
 
 #include "diagnostics/diagnostic_emitter.h"
+#include "diagnostics/null_diagnostics.h"
 #include "lexer/tokenized_buffer.h"
 #include "llvm/ADT/StringRef.h"
 #include "parser/parse_tree.h"
@@ -39,18 +40,15 @@ extern "C" int LLVMFuzzerTestOneInput(const unsigned char* data,
   auto source = SourceBuffer::CreateFromText(
       llvm::StringRef(reinterpret_cast<const char*>(data), size), filename);
 
-  // Use a real diagnostic emitter to get lazy codepaths to execute.
-  DiagnosticEmitter emitter = NullDiagnosticEmitter();
-
   // Lex the input.
-  auto tokens = TokenizedBuffer::Lex(source, emitter);
+  auto tokens = TokenizedBuffer::Lex(source, NullDiagnosticConsumer());
   if (tokens.HasErrors()) {
     return 0;
   }
 
   // Now parse it into a tree. Note that parsing will (when asserts are enabled)
   // walk the entire tree to verify it so we don't have to do that here.
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, NullDiagnosticConsumer());
   if (tree.HasErrors()) {
     return 0;
   }

+ 26 - 25
parser/parse_tree_test.cpp

@@ -30,7 +30,7 @@ using ::testing::StrEq;
 struct ParseTreeTest : ::testing::Test {
   std::forward_list<SourceBuffer> source_storage;
   std::forward_list<TokenizedBuffer> token_storage;
-  DiagnosticEmitter emitter = NullDiagnosticEmitter();
+  DiagnosticConsumer& consumer = ConsoleDiagnosticConsumer();
 
   auto GetSourceBuffer(llvm::Twine t) -> SourceBuffer& {
     source_storage.push_front(SourceBuffer::CreateFromText(t.str()));
@@ -38,21 +38,22 @@ struct ParseTreeTest : ::testing::Test {
   }
 
   auto GetTokenizedBuffer(llvm::Twine t) -> TokenizedBuffer& {
-    token_storage.push_front(TokenizedBuffer::Lex(GetSourceBuffer(t), emitter));
+    token_storage.push_front(
+        TokenizedBuffer::Lex(GetSourceBuffer(t), consumer));
     return token_storage.front();
   }
 };
 
 TEST_F(ParseTreeTest, Empty) {
   TokenizedBuffer tokens = GetTokenizedBuffer("");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_FALSE(tree.HasErrors());
   EXPECT_THAT(tree.Postorder().begin(), Eq(tree.Postorder().end()));
 }
 
 TEST_F(ParseTreeTest, EmptyDeclaration) {
   TokenizedBuffer tokens = GetTokenizedBuffer(";");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_FALSE(tree.HasErrors());
   auto it = tree.Postorder().begin();
   auto end = tree.Postorder().end();
@@ -76,7 +77,7 @@ TEST_F(ParseTreeTest, EmptyDeclaration) {
 
 TEST_F(ParseTreeTest, BasicFunctionDeclaration) {
   TokenizedBuffer tokens = GetTokenizedBuffer("fn F();");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_FALSE(tree.HasErrors());
   EXPECT_THAT(
       tree, MatchParseTreeNodes(
@@ -92,14 +93,14 @@ TEST_F(ParseTreeTest, BasicFunctionDeclaration) {
 
 TEST_F(ParseTreeTest, NoDeclarationIntroducerOrSemi) {
   TokenizedBuffer tokens = GetTokenizedBuffer("foo bar baz");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
   EXPECT_THAT(tree.Postorder().begin(), Eq(tree.Postorder().end()));
 }
 
 TEST_F(ParseTreeTest, NoDeclarationIntroducerWithSemi) {
   TokenizedBuffer tokens = GetTokenizedBuffer("foo;");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
   EXPECT_THAT(tree,
               MatchParseTreeNodes({{.kind = ParseNodeKind::EmptyDeclaration(),
@@ -109,7 +110,7 @@ TEST_F(ParseTreeTest, NoDeclarationIntroducerWithSemi) {
 
 TEST_F(ParseTreeTest, JustFunctionIntroducerAndSemi) {
   TokenizedBuffer tokens = GetTokenizedBuffer("fn;");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
   EXPECT_THAT(tree, MatchParseTreeNodes(
                         {{.kind = ParseNodeKind::FunctionDeclaration(),
@@ -119,7 +120,7 @@ TEST_F(ParseTreeTest, JustFunctionIntroducerAndSemi) {
 
 TEST_F(ParseTreeTest, RepeatedFunctionIntroducerAndSemi) {
   TokenizedBuffer tokens = GetTokenizedBuffer("fn fn;");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
   EXPECT_THAT(tree, MatchParseTreeNodes(
                         {{.kind = ParseNodeKind::FunctionDeclaration(),
@@ -129,7 +130,7 @@ TEST_F(ParseTreeTest, RepeatedFunctionIntroducerAndSemi) {
 
 TEST_F(ParseTreeTest, FunctionDeclarationWithNoSignatureOrSemi) {
   TokenizedBuffer tokens = GetTokenizedBuffer("fn foo");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
   EXPECT_THAT(tree,
               MatchParseTreeNodes(
@@ -141,7 +142,7 @@ TEST_F(ParseTreeTest, FunctionDeclarationWithNoSignatureOrSemi) {
 TEST_F(ParseTreeTest,
        FunctionDeclarationWithIdentifierInsteadOfSignatureAndSemi) {
   TokenizedBuffer tokens = GetTokenizedBuffer("fn foo bar;");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
   EXPECT_THAT(tree, MatchParseTreeNodes(
                         {{.kind = ParseNodeKind::FunctionDeclaration(),
@@ -152,7 +153,7 @@ TEST_F(ParseTreeTest,
 
 TEST_F(ParseTreeTest, FunctionDeclarationWithSingleIdentifierParameterList) {
   TokenizedBuffer tokens = GetTokenizedBuffer("fn foo(bar);");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   // Note: this might become valid depending on the parameter syntax, this test
   // shouldn't be taken as a sign it should remain invalid.
   EXPECT_TRUE(tree.HasErrors());
@@ -170,7 +171,7 @@ TEST_F(ParseTreeTest, FunctionDeclarationWithSingleIdentifierParameterList) {
 
 TEST_F(ParseTreeTest, FunctionDeclarationWithoutName) {
   TokenizedBuffer tokens = GetTokenizedBuffer("fn ();");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
   EXPECT_THAT(tree, MatchParseTreeNodes(
                         {{.kind = ParseNodeKind::FunctionDeclaration(),
@@ -182,7 +183,7 @@ TEST_F(ParseTreeTest,
        FunctionDeclarationWithoutNameAndManyTokensToSkipInGroupedSymbols) {
   TokenizedBuffer tokens = GetTokenizedBuffer(
       "fn (a tokens c d e f g h i j k l m n o p q r s t u v w x y z);");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
   EXPECT_THAT(tree, MatchParseTreeNodes(
                         {{.kind = ParseNodeKind::FunctionDeclaration(),
@@ -194,7 +195,7 @@ TEST_F(ParseTreeTest, FunctionDeclarationSkipToNewlineWithoutSemi) {
   TokenizedBuffer tokens = GetTokenizedBuffer(
       "fn ()\n"
       "fn F();");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
   EXPECT_THAT(
       tree,
@@ -213,7 +214,7 @@ TEST_F(ParseTreeTest, FunctionDeclarationSkipIndentedNewlineWithSemi) {
       "    y,\n"
       "    z);\n"
       "fn F();");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
   EXPECT_THAT(
       tree,
@@ -234,7 +235,7 @@ TEST_F(ParseTreeTest, FunctionDeclarationSkipIndentedNewlineWithoutSemi) {
       "    y,\n"
       "    z)\n"
       "fn F();");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
   EXPECT_THAT(
       tree,
@@ -253,7 +254,7 @@ TEST_F(ParseTreeTest, FunctionDeclarationSkipIndentedNewlineUntilOutdent) {
       "      y,\n"
       "      z)\n"
       "fn F();");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
   EXPECT_THAT(
       tree,
@@ -275,7 +276,7 @@ TEST_F(ParseTreeTest, FunctionDeclarationSkipWithoutSemiToCurly) {
   TokenizedBuffer tokens = GetTokenizedBuffer(
       "struct X { fn () }\n"
       "fn F();");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_TRUE(tree.HasErrors());
 }
 
@@ -283,7 +284,7 @@ TEST_F(ParseTreeTest, BasicFunctionDefinition) {
   TokenizedBuffer tokens = GetTokenizedBuffer(
       "fn F() {\n"
       "}");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_FALSE(tree.HasErrors());
   EXPECT_THAT(
       tree, MatchParseTreeNodes(
@@ -304,7 +305,7 @@ TEST_F(ParseTreeTest, FunctionDefinitionWithNestedBlocks) {
       "    {{}}\n"
       "  }\n"
       "}");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_FALSE(tree.HasErrors());
   EXPECT_THAT(
       tree,
@@ -332,7 +333,7 @@ TEST_F(ParseTreeTest, FunctionDefinitionWithIdenifierInStatements) {
       "fn F() {\n"
       "  bar\n"
       "}");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   // Note: this might become valid depending on the expression syntax. This test
   // shouldn't be taken as a sign it should remain invalid.
   EXPECT_TRUE(tree.HasErrors());
@@ -353,7 +354,7 @@ TEST_F(ParseTreeTest, FunctionDefinitionWithIdenifierInNestedBlock) {
       "fn F() {\n"
       "  {bar}\n"
       "}");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   // Note: this might become valid depending on the expression syntax. This test
   // shouldn't be taken as a sign it should remain invalid.
   EXPECT_TRUE(tree.HasErrors());
@@ -387,7 +388,7 @@ auto GetAndDropLine(llvm::StringRef& s) -> std::string {
 
 TEST_F(ParseTreeTest, Printing) {
   TokenizedBuffer tokens = GetTokenizedBuffer("fn F();");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_FALSE(tree.HasErrors());
   std::string print_storage;
   llvm::raw_string_ostream print_stream(print_storage);
@@ -413,7 +414,7 @@ TEST_F(ParseTreeTest, Printing) {
 
 TEST_F(ParseTreeTest, PrintingAsYAML) {
   TokenizedBuffer tokens = GetTokenizedBuffer("fn F();");
-  ParseTree tree = ParseTree::Parse(tokens, emitter);
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
   EXPECT_FALSE(tree.HasErrors());
   std::string print_output;
   llvm::raw_string_ostream print_stream(print_output);

+ 1 - 1
parser/parser_impl.cpp

@@ -16,7 +16,7 @@
 namespace Carbon {
 
 auto ParseTree::Parser::Parse(TokenizedBuffer& tokens,
-                              DiagnosticEmitter& /*unused*/) -> ParseTree {
+                              TokenDiagnosticEmitter& /*unused*/) -> ParseTree {
   ParseTree tree(tokens);
 
   // We expect to have a 1:1 correspondence between tokens and tree nodes, so

+ 1 - 1
parser/parser_impl.h

@@ -19,7 +19,7 @@ class ParseTree::Parser {
   // Parses the tokens into a parse tree, emitting any errors encountered.
   //
   // This is the entry point to the parser implementation.
-  static auto Parse(TokenizedBuffer& tokens, DiagnosticEmitter& de)
+  static auto Parse(TokenizedBuffer& tokens, TokenDiagnosticEmitter& de)
       -> ParseTree;
 
  private: