// Part of the Carbon Language project, under the Apache License v2.0 with LLVM // Exceptions. See /LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // ----------------------------------------------------------------------------- // Bison Configuration // ----------------------------------------------------------------------------- %require "3.2" %language "c++" // We don't need a separate header for Bison locations. %define api.location.file none // Use a type-safe C++ variant for semantic values %define api.value.type variant // Have Bison generate the functions ‘make_TEXT’ and ‘make_NUMBER’, but also // ‘make_YYEOF’, for the end of input. %define api.token.constructor // Generate the parser as `::Carbon::Parser`. %define api.namespace { Carbon } %define api.parser.class { Parser } // Make parse error messages more detailed %define parse.error verbose // Enable support for parser debugging %define parse.trace true // Generate location structs. %locations // Parameters to the parser and lexer. // // Parameters to the parser are stored therein as protected data members, and // thus available to its methods. // "inout" parameters passed to both the parser and the lexer. %param {Nonnull arena} %param {yyscan_t yyscanner} %param {ParseAndLexContext& context} // "out" parameter passed to the parser, where the AST is written. %parse-param {std::optional* ast} // No shift-reduce conflicts are expected. // See README.md#precedence-and-associativity for a description of how // operator precedence is expressed. %expect 0 // ----------------------------------------------------------------------------- %code top { #include #include #include #include #include #include "common/check.h" #include "executable_semantics/syntax/parse_and_lex_context.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" } // %code top %code requires { #include #include "executable_semantics/ast/ast.h" #include "executable_semantics/ast/declaration.h" #include "executable_semantics/ast/expression.h" #include "executable_semantics/ast/paren_contents.h" #include "executable_semantics/ast/pattern.h" #include "executable_semantics/ast/value_category.h" #include "executable_semantics/common/arena.h" #include "executable_semantics/common/nonnull.h" #include "executable_semantics/syntax/bison_wrap.h" namespace Carbon { class ParseAndLexContext; } // namespace Carbon typedef void* yyscan_t; } // %code requires %code { void Carbon::Parser::error(const location_type&, const std::string& message) { context.RecordSyntaxError(message); } } // %code %token integer_literal %token identifier %token intrinsic_identifier %token sized_type_literal %token string_literal %type designator %type impl_kind %type > package_directive %type import_directive %type > import_directives %type optional_library_path %type api_or_impl %type > declaration %type > function_declaration %type >> declaration_list %type > statement %type > if_statement %type >> optional_else %type , bool>> return_expression %type > nonempty_block %type > block %type >> statement_list %type > primary_expression %type > postfix_expression %type > ref_deref_expression %type > type_expression %type > fn_type_expression %type > minus_expression %type > multiplicative_operand %type > multiplicative_lhs %type > multiplicative_expression %type > additive_operand %type > additive_lhs %type > additive_expression %type > unimpl_expression %type > value_expression %type > comparison_operand %type > comparison_expression %type > not_expression %type > predicate_expression %type > and_or_operand %type > and_lhs %type > and_expression %type > or_lhs %type > or_expression %type > statement_expression %type > if_expression %type > expression %type > generic_binding %type >> deduced_params %type >> deduced_param_list %type > pattern %type > non_expression_pattern %type > return_term %type > paren_expression %type > struct_literal %type > struct_literal_contents %type > struct_type_literal %type > struct_type_literal_contents %type > tuple %type binding_lhs %type >> receiver %type > variable_declaration %type > paren_expression_base %type > paren_expression_contents %type > paren_pattern %type > tuple_pattern %type > maybe_empty_tuple_pattern %type > paren_pattern_base %type > paren_pattern_contents %type > alternative %type >> alternative_list %type >> alternative_list_contents %type > clause %type > clause_list %token // Most tokens have their spelling defined in lexer.lpp. // table-begin AMPERSAND AND API ARROW AS AUTO AWAIT BOOL BREAK CASE CHOICE CLASS COLON COLON_BANG COMMA CONTINUATION CONTINUATION_TYPE CONTINUE DEFAULT DOUBLE_ARROW ELSE EQUAL EQUAL_EQUAL EXTERNAL FALSE FN FN_TYPE IF IMPL IMPORT INTERFACE LEFT_CURLY_BRACE LEFT_PARENTHESIS LEFT_SQUARE_BRACKET LET LIBRARY MATCH MINUS NOT OR PACKAGE PERIOD PLUS RETURN RIGHT_CURLY_BRACE RIGHT_PARENTHESIS RIGHT_SQUARE_BRACKET RUN SEMICOLON SLASH STRING THEN TRUE TYPE UNDERSCORE UNIMPL_EXAMPLE VAR WHILE // table-end // Used to track EOF. END_OF_FILE 0 // Only used for precedence. FNARROW "-> in return type" // The lexer determines the arity and fixity of each `*` based on whitespace // and adjacent tokens. UNARY_STAR indicates that the operator is unary but // could be either prefix or postfix. UNARY_STAR "unary *" PREFIX_STAR "prefix *" POSTFIX_STAR "postfix *" BINARY_STAR "binary *" ; %start input %% input: package_directive import_directives declaration_list { *ast = AST({.package = $1.first, .is_api = $1.second, .imports = std::move($2), .declarations = std::move($3)}); } ; package_directive: PACKAGE identifier optional_library_path api_or_impl SEMICOLON { $$ = {LibraryName({.package = $2, .path = $3}), $4}; } ; import_directive: IMPORT identifier optional_library_path SEMICOLON { $$ = LibraryName({.package = $2, .path = $3}); } ; import_directives: // Empty { $$ = std::vector(); } | import_directives import_directive { $$ = std::move($1); $$.push_back($2); } ; optional_library_path: // Empty { $$ = ""; } | LIBRARY string_literal { $$ = $2; } ; api_or_impl: API { $$ = true; } | IMPL { $$ = false; } ; primary_expression: identifier { $$ = arena->New(context.source_loc(), $1); } | integer_literal { $$ = arena->New(context.source_loc(), $1); } | string_literal { $$ = arena->New(context.source_loc(), $1); } | TRUE { $$ = arena->New(context.source_loc(), true); } | FALSE { $$ = arena->New(context.source_loc(), false); } | sized_type_literal { int val; CHECK(llvm::to_integer(llvm::StringRef($1).substr(1), val)); CHECK($1[0] == 'i' && val == 32) << "Only i32 is supported for now: " << $1; $$ = arena->New(context.source_loc()); } | STRING { $$ = arena->New(context.source_loc()); } | BOOL { $$ = arena->New(context.source_loc()); } | TYPE { $$ = arena->New(context.source_loc()); } | CONTINUATION_TYPE { $$ = arena->New(context.source_loc()); } | paren_expression { $$ = $1; } | struct_literal { $$ = $1; } | struct_type_literal { $$ = $1; } ; postfix_expression: primary_expression | postfix_expression designator { $$ = arena->New(context.source_loc(), $1, $2); } | postfix_expression LEFT_SQUARE_BRACKET expression RIGHT_SQUARE_BRACKET { $$ = arena->New(context.source_loc(), $1, $3); } | intrinsic_identifier tuple { $$ = arena->New($1, $2, context.source_loc()); } | postfix_expression tuple { $$ = arena->New(context.source_loc(), $1, $2); } | postfix_expression POSTFIX_STAR { $$ = arena->New( context.source_loc(), Operator::Ptr, std::vector>({$1})); } | postfix_expression UNARY_STAR { $$ = arena->New( context.source_loc(), Operator::Ptr, std::vector>({$1})); } ; ref_deref_expression: postfix_expression | PREFIX_STAR ref_deref_expression { $$ = arena->New( context.source_loc(), Operator::Deref, std::vector>({$2})); } | UNARY_STAR ref_deref_expression { $$ = arena->New( context.source_loc(), Operator::Deref, std::vector>({$2})); } | AMPERSAND ref_deref_expression { $$ = arena->New( context.source_loc(), Operator::AddressOf, std::vector>({$2})); } ; fn_type_expression: FN_TYPE tuple ARROW type_expression { $$ = arena->New(context.source_loc(), $2, $4); } ; type_expression: ref_deref_expression | fn_type_expression ; minus_expression: // ref_deref_expression excluded due to precedence diamond. MINUS ref_deref_expression { $$ = arena->New( context.source_loc(), Operator::Neg, std::vector>({$2})); } ; multiplicative_operand: ref_deref_expression | minus_expression ; multiplicative_lhs: ref_deref_expression | multiplicative_expression ; multiplicative_expression: minus_expression | multiplicative_lhs BINARY_STAR multiplicative_operand { $$ = arena->New( context.source_loc(), Operator::Mul, std::vector>({$1, $3})); } ; additive_operand: ref_deref_expression | multiplicative_expression ; additive_lhs: ref_deref_expression | additive_expression ; additive_expression: multiplicative_expression | additive_lhs PLUS additive_operand { $$ = arena->New( context.source_loc(), Operator::Add, std::vector>({$1, $3})); } | additive_lhs MINUS additive_operand { $$ = arena->New( context.source_loc(), Operator::Sub, std::vector>({$1, $3})); } ; unimpl_expression: // ref_deref_expression excluded due to precedence diamond. ref_deref_expression UNIMPL_EXAMPLE ref_deref_expression { $$ = arena->New(context.source_loc(), "ExampleInfix", $1, $3); } ; value_expression: // ref_deref_expression excluded due to precedence diamond. additive_expression | fn_type_expression | unimpl_expression ; comparison_operand: ref_deref_expression | value_expression ; comparison_expression: value_expression | comparison_operand EQUAL_EQUAL comparison_operand { $$ = arena->New( context.source_loc(), Operator::Eq, std::vector>({$1, $3})); } ; not_expression: NOT ref_deref_expression { $$ = arena->New( context.source_loc(), Operator::Not, std::vector>({$2})); } ; predicate_expression: // ref_deref_expression excluded due to precedence diamond. not_expression | comparison_expression ; and_or_operand: ref_deref_expression | predicate_expression ; and_lhs: and_or_operand | and_expression ; and_expression: // predicate_expression excluded due to precedence diamond. and_lhs AND and_or_operand { $$ = arena->New( context.source_loc(), Operator::And, std::vector>({$1, $3})); } ; or_lhs: and_or_operand | or_expression ; or_expression: // predicate_expression excluded due to precedence diamond. or_lhs OR and_or_operand { $$ = arena->New( context.source_loc(), Operator::Or, std::vector>({$1, $3})); } ; statement_expression: ref_deref_expression | predicate_expression | and_expression | or_expression ; if_expression: statement_expression | IF expression THEN if_expression ELSE if_expression { $$ = arena->New(context.source_loc(), $2, $4, $6); } ; expression: if_expression ; designator: PERIOD identifier { $$ = $2; } ; paren_expression: paren_expression_base { $$ = ExpressionFromParenContents(arena, context.source_loc(), $1); } ; tuple: paren_expression_base { $$ = TupleExpressionFromParenContents(arena, context.source_loc(), $1); } ; paren_expression_base: LEFT_PARENTHESIS RIGHT_PARENTHESIS { $$ = {.elements = {}, .has_trailing_comma = false}; } | LEFT_PARENTHESIS paren_expression_contents RIGHT_PARENTHESIS { $$ = $2; } | LEFT_PARENTHESIS paren_expression_contents COMMA RIGHT_PARENTHESIS { $$ = $2; $$.has_trailing_comma = true; } ; paren_expression_contents: expression { $$ = {.elements = {$1}, .has_trailing_comma = false}; } | paren_expression_contents COMMA expression { $$ = $1; $$.elements.push_back($3); } ; struct_literal: LEFT_CURLY_BRACE struct_literal_contents RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $2); } | LEFT_CURLY_BRACE struct_literal_contents COMMA RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $2); } ; struct_literal_contents: designator EQUAL expression { $$ = {FieldInitializer($1, $3)}; } | struct_literal_contents COMMA designator EQUAL expression { $$ = $1; $$.push_back(FieldInitializer($3, $5)); } ; struct_type_literal: LEFT_CURLY_BRACE RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc()); } | LEFT_CURLY_BRACE struct_type_literal_contents RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $2); } | LEFT_CURLY_BRACE struct_type_literal_contents COMMA RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $2); } ; struct_type_literal_contents: designator COLON expression { $$ = {FieldInitializer($1, $3)}; } | struct_type_literal_contents COMMA designator COLON expression { $$ = $1; $$.push_back(FieldInitializer($3, $5)); } ; // In many cases, using `pattern` recursively will result in ambiguities. // When that happens, it's necessary to factor out two separate productions, // one for when the sub-pattern is an expression, and one for when it is not. // To facilitate this, non-terminals besides `pattern` whose names contain // `pattern` are structured to be disjoint from `expression`, unless otherwise // specified. pattern: non_expression_pattern { $$ = $1; } | expression { $$ = arena->New($1); } ; non_expression_pattern: AUTO { $$ = arena->New(context.source_loc()); } | binding_lhs COLON pattern { $$ = arena->New(context.source_loc(), $1, $3, std::nullopt); } | paren_pattern { $$ = $1; } | postfix_expression tuple_pattern { ErrorOr> alternative_pattern = AlternativePattern::Create(arena, context.source_loc(), $1, $2); if (alternative_pattern.ok()) { $$ = *alternative_pattern; } else { context.RecordSyntaxError(alternative_pattern.error().message()); YYERROR; } } | VAR non_expression_pattern { $$ = arena->New(context.source_loc(), $2); } ; binding_lhs: identifier { $$ = $1; } | UNDERSCORE { $$ = AnonymousName; } ; paren_pattern: paren_pattern_base { $$ = PatternFromParenContents(arena, context.source_loc(), $1); } ; paren_pattern_base: LEFT_PARENTHESIS paren_pattern_contents RIGHT_PARENTHESIS { $$ = $2; } | LEFT_PARENTHESIS paren_pattern_contents COMMA RIGHT_PARENTHESIS { $$ = $2; $$.has_trailing_comma = true; } ; // paren_pattern is analogous to paren_expression, but in order to avoid // ambiguities, it must be disjoint from paren_expression, meaning it must // contain at least one non_expression_pattern. The structure of this rule // is very different from the corresponding expression rule because is has to // enforce that requirement. paren_pattern_contents: non_expression_pattern { $$ = {.elements = {$1}, .has_trailing_comma = false}; } | paren_expression_contents COMMA non_expression_pattern { $$ = ParenExpressionToParenPattern(arena, $1); $$.elements.push_back($3); } | paren_pattern_contents COMMA expression { $$ = $1; $$.elements.push_back(arena->New($3)); } | paren_pattern_contents COMMA non_expression_pattern { $$ = $1; $$.elements.push_back($3); } ; tuple_pattern: paren_pattern_base { $$ = TuplePatternFromParenContents(arena, context.source_loc(), $1); } ; // Unlike most `pattern` nonterminals, this one overlaps with `expression`, // so it should be used only when prior context (such as an introducer) // rules out the possibility of an `expression` at this point. maybe_empty_tuple_pattern: LEFT_PARENTHESIS RIGHT_PARENTHESIS { $$ = arena->New(context.source_loc(), std::vector>()); } | tuple_pattern { $$ = $1; } ; clause: CASE pattern DOUBLE_ARROW statement { $$ = Match::Clause($2, $4); } | DEFAULT DOUBLE_ARROW statement { $$ = Match::Clause(arena->New( context.source_loc(), std::string(AnonymousName), arena->New(context.source_loc()), ValueCategory::Let), $3); } ; clause_list: // Empty { $$ = {}; } | clause_list clause { $$ = $1; $$.push_back($2); } ; statement: statement_expression EQUAL expression SEMICOLON { $$ = arena->New(context.source_loc(), $1, $3); } | VAR pattern EQUAL expression SEMICOLON { $$ = arena->New(context.source_loc(), $2, $4, ValueCategory::Var); } | LET pattern EQUAL expression SEMICOLON { $$ = arena->New(context.source_loc(), $2, $4, ValueCategory::Let); } | statement_expression SEMICOLON { $$ = arena->New(context.source_loc(), $1); } | if_statement { $$ = $1; } | WHILE LEFT_PARENTHESIS expression RIGHT_PARENTHESIS block { $$ = arena->New(context.source_loc(), $3, $5); } | BREAK SEMICOLON { $$ = arena->New(context.source_loc()); } | CONTINUE SEMICOLON { $$ = arena->New(context.source_loc()); } | RETURN return_expression SEMICOLON { auto [return_exp, is_omitted_exp] = $2; $$ = arena->New(context.source_loc(), return_exp, is_omitted_exp); } // We disallow empty blocks in places where an arbitrary statement can occur // in order to avoid ambiguity with the empty struct literal `{}`. We can // allow non-empty blocks because a non-empty struct literal always starts with // a designator, and a block never does, so one token of lookahead suffices // to disambiguate. As of this writing, the "official" resolution of this // ambiguity is an open question (see // https://github.com/carbon-language/carbon-lang/blob/trunk/docs/design/classes.md#literals) | nonempty_block { $$ = $1; } | MATCH LEFT_PARENTHESIS expression RIGHT_PARENTHESIS LEFT_CURLY_BRACE clause_list RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $3, $6); } | CONTINUATION identifier block { $$ = arena->New(context.source_loc(), $2, $3); } | RUN expression SEMICOLON { $$ = arena->New(context.source_loc(), $2); } | AWAIT SEMICOLON { $$ = arena->New(context.source_loc()); } ; if_statement: IF LEFT_PARENTHESIS expression RIGHT_PARENTHESIS block optional_else { $$ = arena->New(context.source_loc(), $3, $5, $6); } ; optional_else: // Empty { $$ = std::nullopt; } | ELSE if_statement { $$ = arena->New(context.source_loc(), std::vector>({$2})); } | ELSE block { $$ = $2; } ; return_expression: // Empty { $$ = {arena->New(context.source_loc()), true}; } | expression { $$ = {$1, false}; } ; statement_list: // Empty { $$ = {}; } | statement_list statement { $$ = std::move($1); $$.push_back($2); } ; block: LEFT_CURLY_BRACE statement_list RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), std::move($2)); } ; nonempty_block: LEFT_CURLY_BRACE statement_list statement RIGHT_CURLY_BRACE { $2.push_back($3); $$ = arena->New(context.source_loc(), std::move($2)); } ; return_term: // Empty { $$ = ReturnTerm::Omitted(context.source_loc()); } | ARROW AUTO { $$ = ReturnTerm::Auto(context.source_loc()); } | ARROW expression { $$ = ReturnTerm::Explicit($2); } ; generic_binding: identifier COLON_BANG expression { $$ = arena->New(context.source_loc(), std::move($1), $3); } ; deduced_param_list: // Empty { $$ = std::vector>(); } | generic_binding { $$ = std::vector>(); $$.push_back($1); } | generic_binding COMMA deduced_param_list { $$ = $3; $$.push_back($1); } | variable_declaration { $$ = std::vector>(); $$.push_back($1); } | variable_declaration COMMA deduced_param_list { $$ = $3; $$.push_back($1); } ; deduced_params: // Empty { $$ = std::vector>(); } | LEFT_SQUARE_BRACKET deduced_param_list RIGHT_SQUARE_BRACKET { $$ = $2; } ; receiver: // Empty { $$ = std::nullopt; } | LEFT_CURLY_BRACE variable_declaration RIGHT_CURLY_BRACE { $$ = $2; } ; function_declaration: FN identifier deduced_params receiver maybe_empty_tuple_pattern return_term block { ErrorOr fn = FunctionDeclaration::Create( arena, context.source_loc(), $2, $3, $4, $5, $6, $7); if (fn.ok()) { $$ = *fn; } else { context.RecordSyntaxError(fn.error().message()); YYERROR; } } | FN identifier deduced_params receiver maybe_empty_tuple_pattern return_term SEMICOLON { ErrorOr fn = FunctionDeclaration::Create( arena, context.source_loc(), $2, $3, $4, $5, $6, std::nullopt); if (fn.ok()) { $$ = *fn; } else { context.RecordSyntaxError(fn.error().message()); YYERROR; } } ; variable_declaration: identifier COLON pattern { $$ = arena->New(context.source_loc(), $1, $3, std::nullopt); } ; alternative: identifier tuple { $$ = arena->New(context.source_loc(), $1, $2); } | identifier { $$ = arena->New( context.source_loc(), $1, arena->New(context.source_loc())); } ; alternative_list: // Empty { $$ = {}; } | alternative_list_contents { $$ = $1; } | alternative_list_contents COMMA { $$ = $1; } ; alternative_list_contents: alternative { $$ = {std::move($1)}; } | alternative_list_contents COMMA alternative { $$ = $1; $$.push_back(std::move($3)); } ; declaration: function_declaration { $$ = $1; } | CLASS identifier LEFT_CURLY_BRACE declaration_list RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $2, $4); } | CHOICE identifier LEFT_CURLY_BRACE alternative_list RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $2, $4); } | VAR variable_declaration SEMICOLON { $$ = arena->New(context.source_loc(), $2, std::nullopt, ValueCategory::Var); } | VAR variable_declaration EQUAL expression SEMICOLON { $$ = arena->New(context.source_loc(), $2, $4, ValueCategory::Var); } | LET variable_declaration EQUAL expression SEMICOLON { $$ = arena->New(context.source_loc(), $2, $4, ValueCategory::Let); } | INTERFACE identifier LEFT_CURLY_BRACE declaration_list RIGHT_CURLY_BRACE { auto ty_ty = arena -> New(context.source_loc()); auto self = arena -> New(context.source_loc(), "Self", ty_ty); $$ = arena->New(context.source_loc(), $2, self, $4); } | impl_kind IMPL expression AS expression LEFT_CURLY_BRACE declaration_list RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $1, $3, $5, $7); } ; impl_kind: // Internal { $$ = Carbon::ImplKind::InternalImpl; } | EXTERNAL { $$ = Carbon::ImplKind::ExternalImpl; } ; declaration_list: // Empty { $$ = {}; } | declaration_list declaration { $$ = $1; $$.push_back(Nonnull($2)); } ; %%