Kaynağa Gözat

Carbon fuzzing 1/3: added a proto for representing Carbon AST + ast_to_proto library (#1128)

* initial fuzzer proto

* visibility change

* use newer protocol buffer version which has the defs.bzl bug fixed

* Adjusted fix_cc_deps to work with protobuf external repo

* explicitly load rules_cc to avoid a frozenset bug in the version loaded by protobuf

* use explit deps, use llvm's zlib

* restored cxx settings

* deps change

* Cleaned up WORKSPACE and changed the test to read carbon sources from testdata

* updated comment

* adapted to new ErrorOr return value

* proto buffer 3.19.2 -> 3.19.4

* changed comment

* Apply suggestions from code review

Co-authored-by: Jon Meow <jperkins@google.com>

* Apply suggestions from code review

Co-authored-by: Jon Meow <jperkins@google.com>

* Update executable_semantics/fuzzing/BUILD

Co-authored-by: Jon Meow <jperkins@google.com>

* addressed review comments

* updated Unimplemented error message

* Addressed review comments

* more review comments

* switched to loading protobuf via rules_proto()

* Ignore protobuf headers in fix_cc_deps.py until the script supports alias rules

* renamed repeated proto fields to be plural

* added @zlib to check_non_test_cc_deps

* Update common/fuzzing/BUILD

Co-authored-by: Jon Meow <jperkins@google.com>

* review comments

* set is_omitted_expression for return value

Co-authored-by: Jon Meow <jperkins@google.com>
pk19604014 4 yıl önce
ebeveyn
işleme
5ae6d22415

+ 1 - 1
.pre-commit-config.yaml

@@ -97,7 +97,7 @@ repos:
             Exceptions. See /LICENSE for license information.
             SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
           - --custom_format
-          - '\.(carbon|ypp)$'
+          - '\.(carbon|proto|ypp)$'
           - ''
           - '// '
           - ''

+ 32 - 1
WORKSPACE

@@ -143,6 +143,7 @@ load("@llvm-raw//utils/bazel:configure.bzl", "llvm_configure")
 
 llvm_configure(
     name = "llvm-project",
+    repo_mapping = {"@llvm_zlib": "@zlib"},
     targets = [
         "AArch64",
         "X86",
@@ -157,7 +158,7 @@ llvm_terminfo_system(name = "llvm_terminfo")
 load("@llvm-raw//utils/bazel:zlib.bzl", "llvm_zlib_system")
 
 # We require successful detection and use of a system zlib library.
-llvm_zlib_system(name = "llvm_zlib")
+llvm_zlib_system(name = "zlib")
 
 ###############################################################################
 # Flex/Bison rules
@@ -217,6 +218,36 @@ load("@rules_bison//bison:bison.bzl", "bison_register_toolchains")
 # fix them anyways.
 bison_register_toolchains(extra_copts = ["-w"])
 
+###############################################################################
+# Protocol buffers - for structured fuzzer testing.
+###############################################################################
+
+# TODO: `rules_proto` pulls in a version of `rules_cc` with a frozenset bug.
+rules_cc_version = "0.0.1"
+
+http_archive(
+    name = "rules_cc",
+    sha256 = "4dccbfd22c0def164c8f47458bd50e0c7148f3d92002cdb459c2a96a68498241",
+    urls = ["https://github.com/bazelbuild/rules_cc/releases/download/%s/rules_cc-%s.tar.gz" % (rules_cc_version, rules_cc_version)],
+)
+
+rules_proto_version = "4.0.0-3.19.2"
+
+http_archive(
+    name = "rules_proto",
+    sha256 = "c22cfcb3f22a0ae2e684801ea8dfed070ba5bed25e73f73580564f250475e72d",
+    strip_prefix = "rules_proto-%s" % rules_proto_version,
+    urls = [
+        "https://github.com/bazelbuild/rules_proto/archive/refs/tags/%s.tar.gz" % rules_proto_version,
+    ],
+)
+
+load("@rules_proto//proto:repositories.bzl", "rules_proto_dependencies", "rules_proto_toolchains")
+
+rules_proto_dependencies()
+
+rules_proto_toolchains()
+
 ###############################################################################
 # Example conversion repositories
 ###############################################################################

+ 1 - 1
bazel/check_deps/check_non_test_cc_deps.py

@@ -56,7 +56,7 @@ for dep in deps:
 
         # The rest of LLVM, LLD, and Clang themselves are safe to depend on.
         continue
-    if repo in ("@llvm_terminfo", "@llvm_zlib"):
+    if repo in ("@llvm_terminfo", "@llvm_zlib", "@zlib"):
         # These are stubs wrapping system libraries for LLVM. They aren't
         # distributed and so should be fine.
         continue

+ 18 - 0
common/fuzzing/BUILD

@@ -0,0 +1,18 @@
+# Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+# Exceptions. See /LICENSE for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("@rules_cc//cc:defs.bzl", "cc_proto_library")
+load("@rules_proto//proto:defs.bzl", "proto_library")
+
+package(default_visibility = ["//visibility:public"])
+
+proto_library(
+    name = "carbon_proto",
+    srcs = ["carbon.proto"],
+)
+
+cc_proto_library(
+    name = "carbon_cc_proto",
+    deps = [":carbon_proto"],
+)

+ 341 - 0
common/fuzzing/carbon.proto

@@ -0,0 +1,341 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+syntax = "proto2";
+
+package Carbon.Fuzzing;
+
+message LibraryName {
+  optional string package_name = 1;
+  optional string path = 2;
+}
+
+// Expressions.
+
+message CallExpression {
+  optional Expression function = 1;
+  optional Expression argument = 2;
+}
+
+message FunctionTypeLiteral {
+  optional Expression parameter = 1;
+  optional Expression return_type = 2;
+}
+
+message FieldAccessExpression {
+  optional string field = 1;
+  optional Expression aggregate = 2;
+}
+
+message IndexExpression {
+  optional Expression aggregate = 1;
+  optional Expression offset = 2;
+}
+
+message PrimitiveOperatorExpression {
+  enum Operator {
+    UnknownOperator = 0;
+    Add = 1;
+    AddressOf = 2;
+    And = 3;
+    Deref = 4;
+    Eq = 5;
+    Mul = 6;
+    Neg = 7;
+    Not = 8;
+    Or = 9;
+    Sub = 10;
+    Ptr = 11;
+  }
+  optional Operator op = 1;
+  repeated Expression arguments = 2;
+}
+
+message TupleLiteralExpression {
+  repeated Expression fields = 1;
+}
+
+message FieldInitializer {
+  optional string name = 1;
+  optional Expression expression = 2;
+}
+
+message StructLiteralExpression {
+  repeated FieldInitializer fields = 1;
+}
+
+message StructTypeLiteralExpression {
+  repeated FieldInitializer fields = 1;
+}
+
+message IdentifierExpression {
+  optional string name = 1;
+}
+
+message IntrinsicExpression {
+  enum Intrinsic {
+    UnknownIntrinsic = 0;
+    Print = 1;
+  }
+  optional Intrinsic intrinsic = 1;
+  optional TupleLiteralExpression argument = 2;
+}
+
+message IfExpression {
+  optional Expression condition = 1;
+  optional Expression then_expression = 2;
+  optional Expression else_expression = 3;
+}
+
+message BoolTypeLiteral {}
+
+message BoolLiteral {
+  optional bool value = 1;
+}
+
+message IntTypeLiteral {}
+message ContinuationTypeLiteral {}
+
+message IntLiteral {
+  optional int64 value = 1;
+}
+
+message StringLiteral {
+  optional string value = 1;
+}
+
+message StringTypeLiteral {}
+message TypeTypeLiteral {}
+
+message UnimplementedExpression {}
+
+message Expression {
+  oneof kind {
+    CallExpression call = 1;
+    FunctionTypeLiteral function_type = 2;
+    FieldAccessExpression field_access = 3;
+    IndexExpression index = 4;
+    PrimitiveOperatorExpression primitive_operator = 5;
+    TupleLiteralExpression tuple_literal = 6;
+    StructLiteralExpression struct_literal = 7;
+    StructTypeLiteralExpression struct_type_literal = 8;
+    IdentifierExpression identifier = 9;
+    IntrinsicExpression intrinsic = 10;
+    IfExpression if_expression = 11;
+    BoolTypeLiteral bool_type_literal = 12;
+    BoolLiteral bool_literal = 13;
+    IntTypeLiteral int_type_literal = 14;
+    ContinuationTypeLiteral continuation_type_literal = 15;
+    IntLiteral int_literal = 16;
+    StringLiteral string_literal = 17;
+    StringTypeLiteral string_type_literal = 18;
+    TypeTypeLiteral type_type_literal = 19;
+    UnimplementedExpression unimplemented_expression = 20;
+  }
+}
+
+// Patterns.
+
+message BindingPattern {
+  optional string name = 1;
+  optional Pattern type = 2;
+}
+
+message TuplePattern {
+  repeated Pattern fields = 1;
+}
+
+message AlternativePattern {
+  optional Expression choice_type = 1;
+  optional string alternative_name = 2;
+  optional TuplePattern arguments = 3;
+}
+
+message ExpressionPattern {
+  optional Expression expression = 1;
+}
+
+message AutoPattern {}
+
+message VarPattern {
+  optional Pattern pattern = 1;
+}
+
+message Pattern {
+  oneof kind {
+    BindingPattern binding_pattern = 1;
+    TuplePattern tuple_pattern = 2;
+    AlternativePattern alternative_pattern = 3;
+    ExpressionPattern expression_pattern = 4;
+    AutoPattern auto_pattern = 5;
+    VarPattern var_pattern = 6;
+  }
+}
+
+// Statements.
+
+message ExpressionStatement {
+  optional Expression expression = 1;
+}
+
+message AssignStatement {
+  optional Expression lhs = 1;
+  optional Expression rhs = 2;
+}
+
+message VariableDefinitionStatement {
+  optional Pattern pattern = 1;
+  optional Expression init = 2;
+}
+
+message IfStatement {
+  optional Expression condition = 1;
+  optional BlockStatement then_block = 2;
+  optional BlockStatement else_block = 3;
+}
+
+message ReturnStatement {
+  optional Expression expression = 1;  // Can be omitted.
+  optional bool is_omitted_expression = 2;
+}
+
+message BlockStatement {
+  repeated Statement statements = 1;
+}
+
+message WhileStatement {
+  optional Expression condition = 1;
+  optional BlockStatement body = 2;
+}
+
+message MatchClause {
+  optional Pattern pattern = 1;
+  optional Statement statement = 2;
+}
+
+message MatchStatement {
+  optional Expression expression = 1;
+  repeated MatchClause clauses = 2;
+}
+
+message ContinuationStatement {
+  optional string name = 1;
+  optional BlockStatement body = 2;
+}
+
+message RunStatement {
+  optional Expression argument = 1;
+}
+
+message AwaitStatement {}
+
+message BreakStatement {}
+message ContinueStatement {}
+
+message Statement {
+  oneof kind {
+    ExpressionStatement expression_statement = 1;
+    AssignStatement assign = 2;
+    VariableDefinitionStatement variable_definition = 3;
+    IfStatement if_statement = 4;
+    ReturnStatement return_statement = 5;
+    BlockStatement block = 6;
+    WhileStatement while_statement = 7;
+    MatchStatement match = 8;
+    ContinuationStatement continuation = 9;
+    RunStatement run = 10;
+    AwaitStatement await = 11;
+    BreakStatement break_statement = 12;
+    ContinueStatement continue_statement = 13;
+  }
+}
+
+// Declarations.
+
+message ReturnTerm {
+  enum ReturnKind {
+    UnknownReturnKind = 0;
+    Omitted = 1;
+    Auto = 2;
+    Expression = 3;
+  }
+  optional ReturnKind kind = 1;
+  optional Expression type = 2;
+}
+
+message GenericBinding {
+  optional string name = 1;
+  optional Expression type = 2;
+}
+
+message FunctionDeclaration {
+  optional string name = 1;
+  repeated GenericBinding deduced_parameters = 2;
+  optional BindingPattern me_pattern = 3;
+  optional TuplePattern param_pattern = 4;
+  optional ReturnTerm return_term = 5;
+  optional BlockStatement body = 6;
+}
+
+message ClassDeclaration {
+  optional string name = 1;
+  repeated Declaration members = 2;
+}
+
+message AlternativeSignature {
+  optional string name = 1;
+  optional Expression signature = 2;
+}
+
+message ChoiceDeclaration {
+  optional string name = 1;
+  repeated AlternativeSignature alternatives = 2;
+}
+
+message VariableDeclaration {
+  optional BindingPattern binding = 1;
+  optional Expression initializer = 2;
+}
+
+message InterfaceDeclaration {
+  optional string name = 1;
+  repeated Declaration members = 2;
+  optional GenericBinding self = 3;
+}
+
+message ImplDeclaration {
+  enum ImplKind {
+    UnknownImplKind = 0;
+    InternalImpl = 1;
+    ExternalImpl = 2;
+  }
+
+  optional ImplKind kind = 1;
+  optional Expression impl_type = 2;
+  optional Expression interface = 3;
+  repeated Declaration members = 4;
+}
+
+message Declaration {
+  oneof kind {
+    FunctionDeclaration function = 1;
+    ClassDeclaration class_declaration = 2;
+    ChoiceDeclaration choice = 3;
+    VariableDeclaration variable = 4;
+    InterfaceDeclaration interface = 5;
+    ImplDeclaration impl = 6;
+  }
+}
+
+message CompilationUnit {
+  optional LibraryName package_statement = 1;
+  optional bool is_api = 2;
+  repeated Declaration declarations = 3;
+  // TODO Add support for imports if they are useful in fuzzing.
+}
+
+// Top-level fuzzer input.
+message Carbon {
+  optional CompilationUnit compilation_unit = 1;
+}

+ 38 - 0
executable_semantics/fuzzing/BUILD

@@ -0,0 +1,38 @@
+# Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+# Exceptions. See /LICENSE for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+cc_library(
+    name = "ast_to_proto_lib",
+    srcs = ["ast_to_proto.cpp"],
+    hdrs = ["ast_to_proto.h"],
+    deps = [
+        "//common/fuzzing:carbon_cc_proto",
+        "//executable_semantics/ast",
+        "//executable_semantics/ast:declaration",
+        "//executable_semantics/ast:expression",
+        "//executable_semantics/ast:generic_binding",
+        "@llvm-project//llvm:Support",
+    ],
+)
+
+cc_test(
+    name = "ast_to_proto_test",
+    srcs = ["ast_to_proto_test.cpp"],
+    args = [
+        "$(location //executable_semantics:data/prelude.carbon)",
+        "$(locations //executable_semantics/testdata:carbon_files)",
+    ],
+    data = [
+        "//executable_semantics:data/prelude.carbon",
+        "//executable_semantics/testdata:carbon_files",
+    ],
+    deps = [
+        ":ast_to_proto_lib",
+        "//common/fuzzing:carbon_cc_proto",
+        "//executable_semantics/syntax",
+        "@com_google_googletest//:gtest",
+        "@com_google_protobuf//:protobuf_headers",
+        "@llvm-project//llvm:Support",
+    ],
+)

+ 541 - 0
executable_semantics/fuzzing/ast_to_proto.cpp

@@ -0,0 +1,541 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "executable_semantics/fuzzing/ast_to_proto.h"
+
+#include <optional>
+
+#include "executable_semantics/ast/declaration.h"
+#include "executable_semantics/ast/expression.h"
+#include "executable_semantics/ast/generic_binding.h"
+#include "llvm/Support/Casting.h"
+
+namespace Carbon {
+
+using ::llvm::cast;
+
+static auto ExpressionToProto(const Expression& expression)
+    -> Fuzzing::Expression;
+static auto PatternToProto(const Pattern& pattern) -> Fuzzing::Pattern;
+static auto StatementToProto(const Statement& statement) -> Fuzzing::Statement;
+static auto DeclarationToProto(const Declaration& declaration)
+    -> Fuzzing::Declaration;
+
+static auto LibraryNameToProto(const LibraryName& library_name)
+    -> Fuzzing::LibraryName {
+  Fuzzing::LibraryName library_name_proto;
+  library_name_proto.set_package_name(library_name.package);
+  if (!library_name.path.empty()) {
+    library_name_proto.set_path(library_name.path);
+  }
+  return library_name_proto;
+}
+
+static auto OperatorToProtoEnum(const Operator op)
+    -> Fuzzing::PrimitiveOperatorExpression::Operator {
+  switch (op) {
+    case Operator::AddressOf:
+      return Fuzzing::PrimitiveOperatorExpression::AddressOf;
+    case Operator::Deref:
+      return Fuzzing::PrimitiveOperatorExpression::Deref;
+    case Operator::Neg:
+      return Fuzzing::PrimitiveOperatorExpression::Neg;
+    case Operator::Not:
+      return Fuzzing::PrimitiveOperatorExpression::Not;
+    case Operator::Ptr:
+      return Fuzzing::PrimitiveOperatorExpression::Ptr;
+    case Operator::Add:
+      return Fuzzing::PrimitiveOperatorExpression::Add;
+    case Operator::And:
+      return Fuzzing::PrimitiveOperatorExpression::And;
+    case Operator::Eq:
+      return Fuzzing::PrimitiveOperatorExpression::Eq;
+    case Operator::Mul:
+      return Fuzzing::PrimitiveOperatorExpression::Mul;
+    case Operator::Or:
+      return Fuzzing::PrimitiveOperatorExpression::Or;
+    case Operator::Sub:
+      return Fuzzing::PrimitiveOperatorExpression::Sub;
+  }
+}
+
+static auto FieldInitializerToProto(const FieldInitializer& field)
+    -> Fuzzing::FieldInitializer {
+  Fuzzing::FieldInitializer field_proto;
+  field_proto.set_name(field.name());
+  *field_proto.mutable_expression() = ExpressionToProto(field.expression());
+  return field_proto;
+}
+
+static auto TupleLiteralExpressionToProto(const TupleLiteral& tuple_literal)
+    -> Fuzzing::TupleLiteralExpression {
+  Fuzzing::TupleLiteralExpression tuple_literal_proto;
+  for (Nonnull<const Expression*> field : tuple_literal.fields()) {
+    *tuple_literal_proto.add_fields() = ExpressionToProto(*field);
+  }
+  return tuple_literal_proto;
+}
+
+static auto ExpressionToProto(const Expression& expression)
+    -> Fuzzing::Expression {
+  Fuzzing::Expression expression_proto;
+  switch (expression.kind()) {
+    case ExpressionKind::CallExpression: {
+      const auto& call = cast<CallExpression>(expression);
+      auto* call_proto = expression_proto.mutable_call();
+      *call_proto->mutable_function() = ExpressionToProto(call.function());
+      *call_proto->mutable_argument() = ExpressionToProto(call.argument());
+      break;
+    }
+
+    case ExpressionKind::FunctionTypeLiteral: {
+      const auto& fun_type = cast<FunctionTypeLiteral>(expression);
+      auto* fun_type_proto = expression_proto.mutable_function_type();
+      *fun_type_proto->mutable_parameter() =
+          ExpressionToProto(fun_type.parameter());
+      *fun_type_proto->mutable_return_type() =
+          ExpressionToProto(fun_type.return_type());
+      break;
+    }
+
+    case ExpressionKind::FieldAccessExpression: {
+      const auto& field_access = cast<FieldAccessExpression>(expression);
+      auto* field_access_proto = expression_proto.mutable_field_access();
+      field_access_proto->set_field(field_access.field());
+      *field_access_proto->mutable_aggregate() =
+          ExpressionToProto(field_access.aggregate());
+      break;
+    }
+
+    case ExpressionKind::IndexExpression: {
+      const auto& index = cast<IndexExpression>(expression);
+      auto* index_proto = expression_proto.mutable_index();
+      *index_proto->mutable_aggregate() = ExpressionToProto(index.aggregate());
+      *index_proto->mutable_offset() = ExpressionToProto(index.offset());
+      break;
+    }
+
+    case ExpressionKind::PrimitiveOperatorExpression: {
+      const auto& primitive_operator =
+          cast<PrimitiveOperatorExpression>(expression);
+      auto* operator_proto = expression_proto.mutable_primitive_operator();
+      operator_proto->set_op(OperatorToProtoEnum(primitive_operator.op()));
+      for (Nonnull<const Expression*> arg : primitive_operator.arguments()) {
+        *operator_proto->add_arguments() = ExpressionToProto(*arg);
+      }
+      break;
+    }
+
+    case ExpressionKind::TupleLiteral:
+      *expression_proto.mutable_tuple_literal() =
+          TupleLiteralExpressionToProto(cast<TupleLiteral>(expression));
+      break;
+
+    case ExpressionKind::StructLiteral: {
+      const auto& struct_literal = cast<StructLiteral>(expression);
+      auto* struct_literal_proto = expression_proto.mutable_struct_literal();
+      for (const FieldInitializer& field : struct_literal.fields()) {
+        *struct_literal_proto->add_fields() = FieldInitializerToProto(field);
+      }
+      break;
+    }
+
+    case ExpressionKind::StructTypeLiteral: {
+      const auto& struct_type_literal = cast<StructTypeLiteral>(expression);
+      auto* struct_type_literal_proto =
+          expression_proto.mutable_struct_type_literal();
+      for (const FieldInitializer& field : struct_type_literal.fields()) {
+        *struct_type_literal_proto->add_fields() =
+            FieldInitializerToProto(field);
+      }
+      break;
+    }
+
+    case ExpressionKind::IdentifierExpression: {
+      const auto& identifier = cast<IdentifierExpression>(expression);
+      auto* identifier_proto = expression_proto.mutable_identifier();
+      identifier_proto->set_name(identifier.name());
+      break;
+    }
+
+    case ExpressionKind::IntrinsicExpression: {
+      const auto& intrinsic = cast<IntrinsicExpression>(expression);
+      auto* intrinsic_proto = expression_proto.mutable_intrinsic();
+      switch (intrinsic.intrinsic()) {
+        case IntrinsicExpression::Intrinsic::Print:
+          intrinsic_proto->set_intrinsic(Fuzzing::IntrinsicExpression::Print);
+          break;
+      }
+      *intrinsic_proto->mutable_argument() =
+          TupleLiteralExpressionToProto(intrinsic.args());
+      break;
+    }
+
+    case ExpressionKind::IfExpression: {
+      const auto& if_expression = cast<IfExpression>(expression);
+      auto* if_proto = expression_proto.mutable_if_expression();
+      if (if_expression.condition()) {
+        *if_proto->mutable_condition() =
+            ExpressionToProto(*if_expression.condition());
+      }
+      if (if_expression.then_expression()) {
+        *if_proto->mutable_then_expression() =
+            ExpressionToProto(*if_expression.then_expression());
+      }
+      if (if_expression.else_expression()) {
+        *if_proto->mutable_else_expression() =
+            ExpressionToProto(*if_expression.else_expression());
+      }
+      break;
+    }
+
+    case ExpressionKind::BoolTypeLiteral:
+      expression_proto.mutable_bool_type_literal();
+      break;
+
+    case ExpressionKind::BoolLiteral:
+      expression_proto.mutable_bool_literal()->set_value(
+          cast<BoolLiteral>(expression).value());
+      break;
+
+    case ExpressionKind::IntTypeLiteral:
+      expression_proto.mutable_int_type_literal();
+      break;
+
+    case ExpressionKind::IntLiteral:
+      expression_proto.mutable_int_literal()->set_value(
+          cast<IntLiteral>(expression).value());
+      break;
+
+    case ExpressionKind::StringLiteral:
+      expression_proto.mutable_string_literal()->set_value(
+          cast<StringLiteral>(expression).value());
+      break;
+
+    case ExpressionKind::StringTypeLiteral:
+      expression_proto.mutable_string_type_literal();
+      break;
+
+    case ExpressionKind::ContinuationTypeLiteral:
+      expression_proto.mutable_continuation_type_literal();
+      break;
+
+    case ExpressionKind::TypeTypeLiteral:
+      expression_proto.mutable_type_type_literal();
+      break;
+
+    case ExpressionKind::UnimplementedExpression:
+      expression_proto.mutable_unimplemented_expression();
+      break;
+  }
+  return expression_proto;
+}
+
+static auto BindingPatternToProto(const BindingPattern& pattern)
+    -> Fuzzing::BindingPattern {
+  Fuzzing::BindingPattern pattern_proto;
+  pattern_proto.set_name(pattern.name());
+  *pattern_proto.mutable_type() = PatternToProto(pattern.type());
+  return pattern_proto;
+}
+
+static auto TuplePatternToProto(const TuplePattern& tuple_pattern)
+    -> Fuzzing::TuplePattern {
+  Fuzzing::TuplePattern tuple_pattern_proto;
+  for (Nonnull<const Pattern*> field : tuple_pattern.fields()) {
+    *tuple_pattern_proto.add_fields() = PatternToProto(*field);
+  }
+  return tuple_pattern_proto;
+}
+
+static auto PatternToProto(const Pattern& pattern) -> Fuzzing::Pattern {
+  Fuzzing::Pattern pattern_proto;
+  switch (pattern.kind()) {
+    case PatternKind::BindingPattern: {
+      const auto& binding = cast<BindingPattern>(pattern);
+      *pattern_proto.mutable_binding_pattern() = BindingPatternToProto(binding);
+      break;
+    }
+    case PatternKind::TuplePattern:
+      *pattern_proto.mutable_tuple_pattern() =
+          TuplePatternToProto(cast<TuplePattern>(pattern));
+      break;
+
+    case PatternKind::AlternativePattern: {
+      const auto& alternative = cast<AlternativePattern>(pattern);
+      auto* alternative_proto = pattern_proto.mutable_alternative_pattern();
+      alternative_proto->set_alternative_name(alternative.alternative_name());
+      *alternative_proto->mutable_choice_type() =
+          ExpressionToProto(alternative.choice_type());
+      *alternative_proto->mutable_arguments() =
+          TuplePatternToProto(alternative.arguments());
+      break;
+    }
+
+    case PatternKind::ExpressionPattern:
+      *pattern_proto.mutable_expression_pattern()->mutable_expression() =
+          ExpressionToProto(cast<ExpressionPattern>(pattern).expression());
+      break;
+
+    case PatternKind::AutoPattern:
+      pattern_proto.mutable_auto_pattern();
+      break;
+
+    case PatternKind::VarPattern:
+      *pattern_proto.mutable_var_pattern()->mutable_pattern() =
+          PatternToProto(cast<VarPattern>(pattern).pattern());
+      break;
+  }
+  return pattern_proto;
+}
+
+static auto BlockStatementToProto(const Block& block)
+    -> Fuzzing::BlockStatement {
+  Fuzzing::BlockStatement block_proto;
+  for (Nonnull<const Statement*> statement : block.statements()) {
+    *block_proto.add_statements() = StatementToProto(*statement);
+  }
+  return block_proto;
+}
+
+static auto StatementToProto(const Statement& statement) -> Fuzzing::Statement {
+  Fuzzing::Statement statement_proto;
+  switch (statement.kind()) {
+    case StatementKind::ExpressionStatement:
+      *statement_proto.mutable_expression_statement()->mutable_expression() =
+          ExpressionToProto(cast<ExpressionStatement>(statement).expression());
+      break;
+
+    case StatementKind::Assign: {
+      const auto& assign = cast<Assign>(statement);
+      auto* assign_proto = statement_proto.mutable_assign();
+      *assign_proto->mutable_lhs() = ExpressionToProto(assign.lhs());
+      *assign_proto->mutable_rhs() = ExpressionToProto(assign.rhs());
+      break;
+    }
+
+    case StatementKind::VariableDefinition: {
+      const auto& def = cast<VariableDefinition>(statement);
+      auto* def_proto = statement_proto.mutable_variable_definition();
+      *def_proto->mutable_pattern() = PatternToProto(def.pattern());
+      *def_proto->mutable_init() = ExpressionToProto(def.init());
+      break;
+    }
+
+    case StatementKind::If: {
+      const auto& if_stmt = cast<If>(statement);
+      auto* if_proto = statement_proto.mutable_if_statement();
+      *if_proto->mutable_condition() = ExpressionToProto(if_stmt.condition());
+      *if_proto->mutable_then_block() =
+          BlockStatementToProto(if_stmt.then_block());
+      if (if_stmt.else_block().has_value()) {
+        *if_proto->mutable_else_block() =
+            BlockStatementToProto(**if_stmt.else_block());
+      }
+      break;
+    }
+
+    case StatementKind::Return: {
+      const auto& ret = cast<Return>(statement);
+      auto* ret_proto = statement_proto.mutable_return_statement();
+      if (!ret.is_omitted_expression()) {
+        *ret_proto->mutable_expression() = ExpressionToProto(ret.expression());
+      } else {
+        ret_proto->set_is_omitted_expression(true);
+      }
+      break;
+    }
+
+    case StatementKind::Block:
+      *statement_proto.mutable_block() =
+          BlockStatementToProto(cast<Block>(statement));
+      break;
+
+    case StatementKind::While: {
+      const auto& while_stmt = cast<While>(statement);
+      auto* while_proto = statement_proto.mutable_while_statement();
+      *while_proto->mutable_condition() =
+          ExpressionToProto(while_stmt.condition());
+      *while_proto->mutable_body() = BlockStatementToProto(while_stmt.body());
+      break;
+    }
+
+    case StatementKind::Match: {
+      const auto& match = cast<Match>(statement);
+      auto* match_proto = statement_proto.mutable_match();
+      *match_proto->mutable_expression() =
+          ExpressionToProto(match.expression());
+      for (const Match::Clause& clause : match.clauses()) {
+        auto* clause_proto = match_proto->add_clauses();
+        *clause_proto->mutable_pattern() = PatternToProto(clause.pattern());
+        *clause_proto->mutable_statement() =
+            StatementToProto(clause.statement());
+      }
+      break;
+    }
+
+    case StatementKind::Continuation: {
+      const auto& continuation = cast<Continuation>(statement);
+      auto* continuation_proto = statement_proto.mutable_continuation();
+      continuation_proto->set_name(continuation.name());
+      *continuation_proto->mutable_body() =
+          BlockStatementToProto(continuation.body());
+      break;
+    }
+
+    case StatementKind::Run:
+      *statement_proto.mutable_run()->mutable_argument() =
+          ExpressionToProto(cast<Run>(statement).argument());
+      break;
+
+    case StatementKind::Await:
+      // Initializes with the default value; there's nothing to set.
+      statement_proto.mutable_await();
+      break;
+
+    case StatementKind::Break:
+      // Initializes with the default value; there's nothing to set.
+      statement_proto.mutable_break_statement();
+      break;
+
+    case StatementKind::Continue:
+      // Initializes with the default value; there's nothing to set.
+      statement_proto.mutable_continue_statement();
+      break;
+  }
+  return statement_proto;
+}
+
+static auto ReturnTermToProto(const ReturnTerm& return_term)
+    -> Fuzzing::ReturnTerm {
+  Fuzzing::ReturnTerm return_term_proto;
+  if (return_term.is_omitted()) {
+    return_term_proto.set_kind(Fuzzing::ReturnTerm::Omitted);
+  } else if (return_term.is_auto()) {
+    return_term_proto.set_kind(Fuzzing::ReturnTerm::Auto);
+  } else {
+    return_term_proto.set_kind(Fuzzing::ReturnTerm::Expression);
+    *return_term_proto.mutable_type() =
+        ExpressionToProto(**return_term.type_expression());
+  }
+  return return_term_proto;
+}
+
+static auto GenericBindingToProto(const GenericBinding& binding)
+    -> Fuzzing::GenericBinding {
+  Fuzzing::GenericBinding binding_proto;
+  binding_proto.set_name(binding.name());
+  *binding_proto.mutable_type() = ExpressionToProto(binding.type());
+  return binding_proto;
+}
+
+static auto DeclarationToProto(const Declaration& declaration)
+    -> Fuzzing::Declaration {
+  Fuzzing::Declaration declaration_proto;
+  switch (declaration.kind()) {
+    case DeclarationKind::FunctionDeclaration: {
+      const auto& function = cast<FunctionDeclaration>(declaration);
+      auto* function_proto = declaration_proto.mutable_function();
+      function_proto->set_name(function.name());
+      for (Nonnull<const GenericBinding*> binding :
+           function.deduced_parameters()) {
+        *function_proto->add_deduced_parameters() =
+            GenericBindingToProto(*binding);
+      }
+      if (function.is_method()) {
+        *function_proto->mutable_me_pattern() =
+            BindingPatternToProto(function.me_pattern());
+      }
+      *function_proto->mutable_param_pattern() =
+          TuplePatternToProto(function.param_pattern());
+      *function_proto->mutable_return_term() =
+          ReturnTermToProto(function.return_term());
+      if (function.body().has_value()) {
+        *function_proto->mutable_body() =
+            BlockStatementToProto(**function.body());
+      }
+      break;
+    }
+
+    case DeclarationKind::ClassDeclaration: {
+      const auto& class_decl = cast<ClassDeclaration>(declaration);
+      auto* class_proto = declaration_proto.mutable_class_declaration();
+      class_proto->set_name(class_decl.name());
+      for (Nonnull<const Declaration*> member : class_decl.members()) {
+        *class_proto->add_members() = DeclarationToProto(*member);
+      }
+      break;
+    }
+
+    case DeclarationKind::ChoiceDeclaration: {
+      const auto& choice = cast<ChoiceDeclaration>(declaration);
+      auto* choice_proto = declaration_proto.mutable_choice();
+      choice_proto->set_name(choice.name());
+      for (Nonnull<const AlternativeSignature*> alternative :
+           choice.alternatives()) {
+        auto* alternative_proto = choice_proto->add_alternatives();
+        alternative_proto->set_name(alternative->name());
+        *alternative_proto->mutable_signature() =
+            ExpressionToProto(alternative->signature());
+      }
+      break;
+    }
+
+    case DeclarationKind::VariableDeclaration: {
+      const auto& var = cast<VariableDeclaration>(declaration);
+      auto* var_proto = declaration_proto.mutable_variable();
+      *var_proto->mutable_binding() = BindingPatternToProto(var.binding());
+      if (var.has_initializer()) {
+        *var_proto->mutable_initializer() =
+            ExpressionToProto(var.initializer());
+      }
+      break;
+    }
+
+    case DeclarationKind::InterfaceDeclaration: {
+      const auto& interface = cast<InterfaceDeclaration>(declaration);
+      auto* interface_proto = declaration_proto.mutable_interface();
+      interface_proto->set_name(interface.name());
+      for (const auto& member : interface.members()) {
+        *interface_proto->add_members() = DeclarationToProto(*member);
+      }
+      *interface_proto->mutable_self() =
+          GenericBindingToProto(*interface.self());
+      break;
+    }
+
+    case DeclarationKind::ImplDeclaration: {
+      const auto& impl = cast<ImplDeclaration>(declaration);
+      auto* impl_proto = declaration_proto.mutable_impl();
+      switch (impl.kind()) {
+        case ImplKind::InternalImpl:
+          impl_proto->set_kind(Fuzzing::ImplDeclaration::InternalImpl);
+          break;
+        case ImplKind::ExternalImpl:
+          impl_proto->set_kind(Fuzzing::ImplDeclaration::ExternalImpl);
+          break;
+      }
+      *impl_proto->mutable_impl_type() = ExpressionToProto(*impl.impl_type());
+      *impl_proto->mutable_interface() = ExpressionToProto(impl.interface());
+      for (const auto& member : impl.members()) {
+        *impl_proto->add_members() = DeclarationToProto(*member);
+      }
+      break;
+    }
+  }
+  return declaration_proto;
+}
+
+Fuzzing::CompilationUnit AstToProto(const AST& ast) {
+  Fuzzing::CompilationUnit compilation_unit;
+  *compilation_unit.mutable_package_statement() =
+      LibraryNameToProto(ast.package);
+  compilation_unit.set_is_api(ast.is_api);
+  for (const Declaration* declaration : ast.declarations) {
+    *compilation_unit.add_declarations() = DeclarationToProto(*declaration);
+  }
+  return compilation_unit;
+}
+
+}  // namespace Carbon

+ 18 - 0
executable_semantics/fuzzing/ast_to_proto.h

@@ -0,0 +1,18 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef THIRD_PARTY_CARBON_LANG_EXECUTABLE_SEMANTICS_FUZZING_AST_TO_PROTO_H_
+#define THIRD_PARTY_CARBON_LANG_EXECUTABLE_SEMANTICS_FUZZING_AST_TO_PROTO_H_
+
+#include "common/fuzzing/carbon.pb.h"
+#include "executable_semantics/ast/ast.h"
+
+namespace Carbon {
+
+// Builds a protobuf representation of `ast`.
+auto AstToProto(const AST& ast) -> Fuzzing::CompilationUnit;
+
+}  // namespace Carbon
+
+#endif  // THIRD_PARTY_CARBON_LANG_EXECUTABLE_SEMANTICS_FUZZING_AST_TO_PROTO_H_

+ 130 - 0
executable_semantics/fuzzing/ast_to_proto_test.cpp

@@ -0,0 +1,130 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "executable_semantics/fuzzing/ast_to_proto.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <filesystem>
+#include <numeric>
+#include <set>
+#include <variant>
+
+#include "executable_semantics/syntax/parse.h"
+#include "google/protobuf/descriptor.h"
+#include "llvm/Support/Error.h"
+
+namespace Carbon::Testing {
+namespace {
+
+using ::google::protobuf::Descriptor;
+using ::google::protobuf::FieldDescriptor;
+using ::google::protobuf::Message;
+using ::google::protobuf::Reflection;
+
+static std::vector<llvm::StringRef>* carbon_files = nullptr;
+
+// Concatenates message and field names.
+auto FieldName(const Descriptor& descriptor, const FieldDescriptor& field)
+    -> std::string {
+  return descriptor.full_name() + "." + field.name();
+}
+
+// Traverses the proto to find all unique messages and fields.
+auto CollectAllFields(const Descriptor& descriptor,
+                      std::set<std::string>& all_messages,
+                      std::set<std::string>& all_fields) -> void {
+  all_messages.insert(descriptor.full_name());
+  for (int i = 0; i < descriptor.field_count(); ++i) {
+    const FieldDescriptor* field = descriptor.field(i);
+    all_fields.insert(FieldName(descriptor, *field));
+    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
+        all_messages.find(field->message_type()->full_name()) ==
+            all_messages.end()) {
+      CollectAllFields(*field->message_type(), all_messages, all_fields);
+    }
+  }
+}
+
+// Traverses an instance of the proto to find all used fields.
+auto CollectUsedFields(const Message& message,
+                       std::set<std::string>& used_fields) -> void {
+  const Descriptor* descriptor = message.GetDescriptor();
+  const Reflection* reflection = message.GetReflection();
+  for (int i = 0; i < descriptor->field_count(); ++i) {
+    const FieldDescriptor* field = descriptor->field(i);
+    if (!field->is_repeated()) {
+      if (reflection->HasField(message, field)) {
+        used_fields.insert(FieldName(*descriptor, *field));
+      }
+    } else {
+      if (reflection->FieldSize(message, field) > 0) {
+        used_fields.insert(FieldName(*descriptor, *field));
+      }
+    }
+    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
+      if (!field->is_repeated()) {
+        if (reflection->HasField(message, field)) {
+          CollectUsedFields(reflection->GetMessage(message, field),
+                            used_fields);
+        }
+      } else {
+        for (int i = 0; i < reflection->FieldSize(message, field); ++i) {
+          CollectUsedFields(reflection->GetRepeatedMessage(message, field, i),
+                            used_fields);
+        }
+      }
+    }
+  }
+}
+
+// Determines which fields in the proto have not been used at all.
+auto GetUnusedFields(const Message& message) -> std::set<std::string> {
+  std::set<std::string> all_messages;
+  std::set<std::string> all_fields;
+  CollectAllFields(*message.GetDescriptor(), all_messages, all_fields);
+
+  std::set<std::string> used_fields;
+  CollectUsedFields(message, used_fields);
+
+  std::set<std::string> unused_fields;
+  std::set_difference(all_fields.begin(), all_fields.end(), used_fields.begin(),
+                      used_fields.end(),
+                      std::inserter(unused_fields, unused_fields.begin()));
+  return unused_fields;
+}
+
+// A 'smoke' test to check that each field present in `carbon.proto` is set at
+// least once after converting all Carbon test sources to proto represention.
+TEST(AstToProtoTest, SetsAllProtoFields) {
+  Carbon::Fuzzing::CompilationUnit merged_proto;
+  for (const llvm::StringRef f : *carbon_files) {
+    Carbon::Arena arena;
+    const ErrorOr<AST> ast = Carbon::Parse(&arena, f, /*trace=*/false);
+    if (ast.ok()) {
+      merged_proto.MergeFrom(AstToProto(*ast));
+    }
+  }
+
+  std::set<std::string> unused_fields = GetUnusedFields(merged_proto);
+  EXPECT_EQ(unused_fields.size(), 0)
+      << "Unused fields"
+      << std::accumulate(unused_fields.begin(), unused_fields.end(),
+                         std::string(),
+                         [](const std::string& a, const std::string& b) {
+                           return a + '\n' + b;
+                         });
+}
+
+}  // namespace
+}  // namespace Carbon::Testing
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  // gtest should remove flags, leaving just input files.
+  Carbon::Testing::carbon_files =
+      new std::vector<llvm::StringRef>(&argv[1], &argv[argc]);
+  return RUN_ALL_TESTS();
+}

+ 2 - 1
executable_semantics/interpreter/resolve_names.cpp

@@ -161,7 +161,8 @@ static auto ResolveNames(Expression& expression,
     case ExpressionKind::TypeTypeLiteral:
       break;
     case ExpressionKind::UnimplementedExpression:
-      FATAL() << "Unimplemented";
+      return FATAL_COMPILATION_ERROR(expression.source_loc())
+             << "Unimplemented";
   }
   return Success();
 }

+ 4 - 1
executable_semantics/syntax/BUILD

@@ -4,7 +4,10 @@
 
 load("@mypy_integration//:mypy.bzl", "mypy_test")
 
-package(default_visibility = ["//executable_semantics:__pkg__"])
+package(default_visibility = [
+    "//executable_semantics:__pkg__",
+    "//executable_semantics/fuzzing:__pkg__",
+])
 
 cc_library(
     name = "bison_wrap",

+ 6 - 0
executable_semantics/testdata/BUILD

@@ -13,3 +13,9 @@ glob_lit_tests(
     driver = "lit.cfg.py",
     test_file_exts = ["carbon"],
 )
+
+filegroup(
+    name = "carbon_files",
+    srcs = glob(["**/*.carbon"]),
+    visibility = ["//visibility:public"],
+)

+ 16 - 0
executable_semantics/testdata/basic_syntax/fail_unimplemented_example.carbon

@@ -0,0 +1,16 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// RUN: %{not} %{executable_semantics} %s 2>&1 | \
+// RUN:   %{FileCheck} --match-full-lines --allow-unused-prefixes=false %s
+// RUN: %{not} %{executable_semantics} --trace %s 2>&1 | \
+// RUN:   %{FileCheck} --match-full-lines --allow-unused-prefixes %s
+// AUTOUPDATE: %{executable_semantics} %s
+// CHECK: COMPILATION ERROR: {{.*}}/executable_semantics/testdata/basic_syntax/fail_unimplemented_example.carbon:15: Unimplemented
+
+package ExecutableSemanticsTest api;
+
+fn Main() -> i32 {
+  return 1 __unimplemented_example_infix 2;
+}

+ 23 - 5
scripts/fix_cc_deps.py

@@ -25,9 +25,20 @@ import scripts_utils  # type: ignore
 # Maps external repository names to a method translating bazel labels to file
 # paths for that repository.
 EXTERNAL_REPOS: Dict[str, Callable[[str], str]] = {
-    "@llvm-project": lambda x: re.sub("^(.*:(lib|include))/", "", x)
+    # @llvm-project//llvm:include/llvm/Support/Error.h ->
+    #   llvm/Support/Error.h
+    "@llvm-project": lambda x: re.sub("^(.*:(lib|include))/", "", x),
+    # @com_google_protobuf//:src/google/protobuf/descriptor.h ->
+    #   google/protobuf/descriptor.h
+    "@com_google_protobuf": lambda x: re.sub("^(.*:src)/", "", x),
 }
 
+# TODO: proto rules are aspect-based and their generated files don't show up in
+# `bazel query` output.
+# Try using `bazel cquery --output=starlark` to print `target.files`.
+# For protobuf, need to add support for `alias` rule kind.
+IGNORE_HEADER_REGEX = re.compile("^(.*\\.pb\\.h)|(.*google/protobuf/.*)$")
+
 
 class Rule(NamedTuple):
     # For cc_* rules:
@@ -157,10 +168,17 @@ def get_missing_deps(
                 if header in rule_files:
                     continue
                 if header not in header_to_rule_map:
-                    exit(
-                        f"Missing rule for #include '{header}' in "
-                        f"'{source_file}'"
-                    )
+                    if IGNORE_HEADER_REGEX.match(header):
+                        print(
+                            f"Ignored missing #include '{header}' in "
+                            f"'{source_file}'"
+                        )
+                        continue
+                    else:
+                        exit(
+                            f"Missing rule for #include '{header}' in "
+                            f"'{source_file}'"
+                        )
                 dep_choices = header_to_rule_map[header]
                 if not dep_choices.intersection(rule.deps):
                     if len(dep_choices) > 1: