// Part of the Carbon Language project, under the Apache License v2.0 with LLVM // Exceptions. See /LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // ----------------------------------------------------------------------------- // Bison Configuration // ----------------------------------------------------------------------------- %require "3.2" %language "c++" // We don't need a separate header for Bison locations. %define api.location.file none // Use a type-safe C++ variant for semantic values %define api.value.type variant // Have Bison generate the functions ‘make_TEXT’ and ‘make_NUMBER’, but also // ‘make_YYEOF’, for the end of input. %define api.token.constructor // Generate the parser as `::Carbon::Parser`. %define api.namespace { Carbon } %define api.parser.class { Parser } // Make parse error messages more detailed %define parse.error verbose // Enable support for parser debugging %define parse.trace true // Generate location structs. %locations // Parameters to the parser and lexer. // // Parameters to the parser are stored therein as protected data members, and // thus available to its methods. // "inout" parameters passed to both the parser and the lexer. %param {Nonnull arena} %param {yyscan_t yyscanner} %param {ParseAndLexContext& context} // "out" parameter passed to the parser, where the AST is written. %parse-param {std::optional* ast} // No shift-reduce conflicts are expected. %expect 0 // ----------------------------------------------------------------------------- %code top { #include #include #include #include #include #include "common/check.h" #include "executable_semantics/syntax/parse_and_lex_context.h" #include "llvm/ADT/StringExtras.h" } // %code top %code requires { #include #include "executable_semantics/ast/ast.h" #include "executable_semantics/ast/declaration.h" #include "executable_semantics/ast/expression.h" #include "executable_semantics/ast/paren_contents.h" #include "executable_semantics/ast/pattern.h" #include "executable_semantics/common/arena.h" #include "executable_semantics/common/nonnull.h" #include "executable_semantics/syntax/bison_wrap.h" namespace Carbon { class ParseAndLexContext; } // namespace Carbon typedef void* yyscan_t; } // %code requires %code { void Carbon::Parser::error(const location_type&, const std::string& message) { context.PrintDiagnostic(message); } } // %code %token integer_literal %token identifier %token intrinsic_identifier %token sized_type_literal %token string_literal %type designator %type > package_directive %type import_directive %type > import_directives %type optional_library_path %type api_or_impl %type > declaration %type > function_declaration %type >> declaration_list %type > statement %type > if_statement %type >> optional_else %type , bool>> return_expression %type > nonempty_block %type > block %type >> statement_list %type > expression %type > generic_binding %type >> deduced_params %type >> deduced_param_list %type > pattern %type > non_expression_pattern %type > return_term %type > paren_expression %type > struct_literal %type > struct_literal_contents %type > struct_type_literal %type > struct_type_literal_contents %type > tuple %type binding_lhs %type > variable_declaration %type > member %type >> member_list %type > paren_expression_base %type > paren_expression_contents %type > paren_pattern %type > tuple_pattern %type > maybe_empty_tuple_pattern %type > paren_pattern_base %type > paren_pattern_contents %type > alternative %type >> alternative_list %type >> alternative_list_contents %type > clause %type > clause_list %token // Most tokens have their spelling defined in lexer.lpp. // table-begin AND API ARROW AUTO AWAIT BOOL BREAK CASE CHOICE CLASS COLON COLON_BANG COMMA CONTINUATION CONTINUATION_TYPE CONTINUE DEFAULT DOUBLE_ARROW ELSE EQUAL EQUAL_EQUAL FALSE FN FN_TYPE IF IMPL IMPORT LEFT_CURLY_BRACE LEFT_PARENTHESIS LEFT_SQUARE_BRACKET LIBRARY MATCH MINUS NOT OR PACKAGE PERIOD PLUS RETURN RIGHT_CURLY_BRACE RIGHT_PARENTHESIS RIGHT_SQUARE_BRACKET RUN SEMICOLON SLASH STRING TRUE TYPE UNDERSCORE UNIMPL_EXAMPLE VAR WHILE // table-end // Used to track EOF. END_OF_FILE 0 // Only used for precedence. FNARROW "-> in return type" // The lexer determines the arity and fixity of each `*` based on whitespace // and adjacent tokens. UNARY_STAR indicates that the operator is unary but // could be either prefix or postfix. UNARY_STAR "unary *" PREFIX_STAR "prefix *" POSTFIX_STAR "postfix *" BINARY_STAR "binary *" ; %precedence FNARROW %precedence LEFT_CURLY_BRACE RIGHT_CURLY_BRACE %precedence COLON_BANG COLON COMMA DOUBLE_ARROW %left OR AND %nonassoc EQUAL_EQUAL %left PLUS MINUS %left BINARY_STAR %precedence NOT UNARY_MINUS PREFIX_STAR // We need to give the `UNARY_STAR` token a precedence, rather than overriding // the precedence of the `expression UNARY_STAR` rule below, because bison // compares the precedence of the final token (for a shift) to the precedence // of the other rule (for a reduce) when attempting to resolve a shift-reduce // conflict. See https://stackoverflow.com/a/26188429/1041090. When UNARY_STAR // is the final token of a rule, it must be a postfix usage, so we give it the // same precedence as POSTFIX_STAR. %precedence POSTFIX_STAR UNARY_STAR %left PERIOD ARROW %nonassoc UNIMPL_EXAMPLE %precedence LEFT_PARENTHESIS RIGHT_PARENTHESIS LEFT_SQUARE_BRACKET RIGHT_SQUARE_BRACKET ; %start input %% input: package_directive import_directives declaration_list { *ast = AST({.package = $1.first, .is_api = $1.second, .imports = std::move($2), .declarations = std::move($3)}); } ; package_directive: PACKAGE identifier optional_library_path api_or_impl SEMICOLON { $$ = {LibraryName({.package = $2, .path = $3}), $4}; } ; import_directive: IMPORT identifier optional_library_path SEMICOLON { $$ = LibraryName({.package = $2, .path = $3}); } ; import_directives: // Empty { $$ = std::vector(); } | import_directives import_directive { $$ = std::move($1); $$.push_back($2); } ; optional_library_path: // Empty { $$ = ""; } | LIBRARY string_literal { $$ = $2; } ; api_or_impl: API { $$ = true; } | IMPL { $$ = false; } ; expression: identifier { $$ = arena->New(context.source_loc(), $1); } | expression designator { $$ = arena->New(context.source_loc(), $1, $2); } | expression LEFT_SQUARE_BRACKET expression RIGHT_SQUARE_BRACKET { $$ = arena->New(context.source_loc(), $1, $3); } | integer_literal { $$ = arena->New(context.source_loc(), $1); } | string_literal { $$ = arena->New(context.source_loc(), $1); } | TRUE { $$ = arena->New(context.source_loc(), true); } | FALSE { $$ = arena->New(context.source_loc(), false); } | sized_type_literal { int val; CHECK(llvm::to_integer(llvm::StringRef($1).substr(1), val)); CHECK($1[0] == 'i' && val == 32) << "Only i32 is supported for now: " << $1; $$ = arena->New(context.source_loc()); } | STRING { $$ = arena->New(context.source_loc()); } | BOOL { $$ = arena->New(context.source_loc()); } | TYPE { $$ = arena->New(context.source_loc()); } | CONTINUATION_TYPE { $$ = arena->New(context.source_loc()); } | paren_expression { $$ = $1; } | struct_literal { $$ = $1; } | struct_type_literal { $$ = $1; } | intrinsic_identifier tuple { $$ = arena->New($1, $2, context.source_loc()); } | expression EQUAL_EQUAL expression { $$ = arena->New( context.source_loc(), Operator::Eq, std::vector>({$1, $3})); } | expression PLUS expression { $$ = arena->New( context.source_loc(), Operator::Add, std::vector>({$1, $3})); } | expression MINUS expression { $$ = arena->New( context.source_loc(), Operator::Sub, std::vector>({$1, $3})); } | expression BINARY_STAR expression { $$ = arena->New( context.source_loc(), Operator::Mul, std::vector>({$1, $3})); } | expression AND expression { $$ = arena->New( context.source_loc(), Operator::And, std::vector>({$1, $3})); } | expression OR expression { $$ = arena->New( context.source_loc(), Operator::Or, std::vector>({$1, $3})); } | NOT expression { $$ = arena->New( context.source_loc(), Operator::Not, std::vector>({$2})); } | MINUS expression %prec UNARY_MINUS { $$ = arena->New( context.source_loc(), Operator::Neg, std::vector>({$2})); } | PREFIX_STAR expression { $$ = arena->New( context.source_loc(), Operator::Deref, std::vector>({$2})); } | UNARY_STAR expression %prec PREFIX_STAR { $$ = arena->New( context.source_loc(), Operator::Deref, std::vector>({$2})); } | expression tuple { $$ = arena->New(context.source_loc(), $1, $2); } | expression POSTFIX_STAR { $$ = arena->New( context.source_loc(), Operator::Ptr, std::vector>({$1})); } | expression UNARY_STAR { $$ = arena->New( context.source_loc(), Operator::Ptr, std::vector>({$1})); } | FN_TYPE tuple ARROW expression { $$ = arena->New(context.source_loc(), $2, $4); } | expression UNIMPL_EXAMPLE expression { $$ = arena->New(context.source_loc(), "ExampleInfix", $1, $3); } ; designator: PERIOD identifier { $$ = $2; } ; paren_expression: paren_expression_base { $$ = ExpressionFromParenContents(arena, context.source_loc(), $1); } ; tuple: paren_expression_base { $$ = TupleExpressionFromParenContents(arena, context.source_loc(), $1); } ; paren_expression_base: LEFT_PARENTHESIS RIGHT_PARENTHESIS { $$ = {.elements = {}, .has_trailing_comma = false}; } | LEFT_PARENTHESIS paren_expression_contents RIGHT_PARENTHESIS { $$ = $2; } | LEFT_PARENTHESIS paren_expression_contents COMMA RIGHT_PARENTHESIS { $$ = $2; $$.has_trailing_comma = true; } ; paren_expression_contents: expression { $$ = {.elements = {$1}, .has_trailing_comma = false}; } | paren_expression_contents COMMA expression { $$ = $1; $$.elements.push_back($3); } ; struct_literal: LEFT_CURLY_BRACE struct_literal_contents RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $2); } | LEFT_CURLY_BRACE struct_literal_contents COMMA RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $2); } ; struct_literal_contents: designator EQUAL expression { $$ = {FieldInitializer($1, $3)}; } | struct_literal_contents COMMA designator EQUAL expression { $$ = $1; $$.push_back(FieldInitializer($3, $5)); } ; struct_type_literal: LEFT_CURLY_BRACE RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc()); } | LEFT_CURLY_BRACE struct_type_literal_contents RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $2); } | LEFT_CURLY_BRACE struct_type_literal_contents COMMA RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $2); } ; struct_type_literal_contents: designator COLON expression { $$ = {FieldInitializer($1, $3)}; } | struct_type_literal_contents COMMA designator COLON expression { $$ = $1; $$.push_back(FieldInitializer($3, $5)); } ; // In many cases, using `pattern` recursively will result in ambiguities. // When that happens, it's necessary to factor out two separate productions, // one for when the sub-pattern is an expression, and one for when it is not. // To facilitate this, non-terminals besides `pattern` whose names contain // `pattern` are structured to be disjoint from `expression`, unless otherwise // specified. pattern: non_expression_pattern { $$ = $1; } | expression { $$ = arena->New($1); } ; non_expression_pattern: AUTO { $$ = arena->New(context.source_loc()); } | binding_lhs COLON pattern { $$ = arena->New(context.source_loc(), $1, $3); } | paren_pattern { $$ = $1; } | expression tuple_pattern { $$ = arena->New(context.source_loc(), $1, $2); } ; binding_lhs: identifier { $$ = $1; } | UNDERSCORE { $$ = AnonymousName; } ; paren_pattern: paren_pattern_base { $$ = PatternFromParenContents(arena, context.source_loc(), $1); } ; paren_pattern_base: LEFT_PARENTHESIS paren_pattern_contents RIGHT_PARENTHESIS { $$ = $2; } | LEFT_PARENTHESIS paren_pattern_contents COMMA RIGHT_PARENTHESIS { $$ = $2; $$.has_trailing_comma = true; } ; // paren_pattern is analogous to paren_expression, but in order to avoid // ambiguities, it must be disjoint from paren_expression, meaning it must // contain at least one non_expression_pattern. The structure of this rule // is very different from the corresponding expression rule because is has to // enforce that requirement. paren_pattern_contents: non_expression_pattern { $$ = {.elements = {$1}, .has_trailing_comma = false}; } | paren_expression_contents COMMA non_expression_pattern { $$ = ParenExpressionToParenPattern(arena, $1); $$.elements.push_back($3); } | paren_pattern_contents COMMA expression { $$ = $1; $$.elements.push_back(arena->New($3)); } | paren_pattern_contents COMMA non_expression_pattern { $$ = $1; $$.elements.push_back($3); } ; tuple_pattern: paren_pattern_base { $$ = TuplePatternFromParenContents(arena, context.source_loc(), $1); } ; // Unlike most `pattern` nonterminals, this one overlaps with `expression`, // so it should be used only when prior context (such as an introducer) // rules out the possibility of an `expression` at this point. maybe_empty_tuple_pattern: LEFT_PARENTHESIS RIGHT_PARENTHESIS { $$ = arena->New(context.source_loc(), std::vector>()); } | tuple_pattern { $$ = $1; } ; clause: CASE pattern DOUBLE_ARROW statement { $$ = Match::Clause($2, $4); } | DEFAULT DOUBLE_ARROW statement { $$ = Match::Clause(arena->New( context.source_loc(), std::string(AnonymousName), arena->New(context.source_loc())), $3); } ; clause_list: // Empty { $$ = {}; } | clause_list clause { $$ = $1; $$.push_back($2); } ; statement: expression EQUAL expression SEMICOLON { $$ = arena->New(context.source_loc(), $1, $3); } | VAR pattern EQUAL expression SEMICOLON { $$ = arena->New(context.source_loc(), $2, $4); } | expression SEMICOLON { $$ = arena->New(context.source_loc(), $1); } | if_statement { $$ = $1; } | WHILE LEFT_PARENTHESIS expression RIGHT_PARENTHESIS block { $$ = arena->New(context.source_loc(), $3, $5); } | BREAK SEMICOLON { $$ = arena->New(context.source_loc()); } | CONTINUE SEMICOLON { $$ = arena->New(context.source_loc()); } | RETURN return_expression SEMICOLON { auto [return_exp, is_omitted_exp] = $2; $$ = arena->New(context.source_loc(), return_exp, is_omitted_exp); } // We disallow empty blocks in places where an arbitrary statement can occur // in order to avoid ambiguity with the empty struct literal `{}`. We can // allow non-empty blocks because a non-empty struct literal always starts with // a designator, and a block never does, so one token of lookahead suffices // to disambiguate. As of this writing, the "official" resolution of this // ambiguity is an open question (see // https://github.com/carbon-language/carbon-lang/blob/trunk/docs/design/classes.md#literals) | nonempty_block { $$ = $1; } | MATCH LEFT_PARENTHESIS expression RIGHT_PARENTHESIS LEFT_CURLY_BRACE clause_list RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $3, $6); } | CONTINUATION identifier block { $$ = arena->New(context.source_loc(), $2, $3); } | RUN expression SEMICOLON { $$ = arena->New(context.source_loc(), $2); } | AWAIT SEMICOLON { $$ = arena->New(context.source_loc()); } ; if_statement: IF LEFT_PARENTHESIS expression RIGHT_PARENTHESIS block optional_else { $$ = arena->New(context.source_loc(), $3, $5, $6); } ; optional_else: // Empty { $$ = std::nullopt; } | ELSE if_statement { $$ = arena->New(context.source_loc(), std::vector>({$2})); } | ELSE block { $$ = $2; } ; return_expression: // Empty { $$ = {arena->New(context.source_loc()), true}; } | expression { $$ = {$1, false}; } ; statement_list: // Empty { $$ = {}; } | statement_list statement { $$ = std::move($1); $$.push_back($2); } ; block: LEFT_CURLY_BRACE statement_list RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), std::move($2)); } ; nonempty_block: LEFT_CURLY_BRACE statement_list statement RIGHT_CURLY_BRACE { $2.push_back($3); $$ = arena->New(context.source_loc(), std::move($2)); } ; return_term: // Empty { $$ = ReturnTerm::Omitted(context.source_loc()); } | ARROW AUTO %prec FNARROW { $$ = ReturnTerm::Auto(context.source_loc()); } | ARROW expression %prec FNARROW { $$ = ReturnTerm::Explicit($2); } ; generic_binding: identifier COLON_BANG expression { $$ = arena->New(context.source_loc(), std::move($1), $3); } ; deduced_param_list: // Empty { $$ = std::vector>(); } | generic_binding { $$ = std::vector>(); $$.push_back($1); } | generic_binding COMMA deduced_param_list { $$ = $3; $$.push_back($1); } ; deduced_params: // Empty { $$ = std::vector>(); } | LEFT_SQUARE_BRACKET deduced_param_list RIGHT_SQUARE_BRACKET { $$ = $2; } ; function_declaration: FN identifier deduced_params maybe_empty_tuple_pattern return_term block { $$ = arena->New(context.source_loc(), $2, $3, $4, $5, $6); } | FN identifier deduced_params maybe_empty_tuple_pattern return_term SEMICOLON { $$ = arena->New(context.source_loc(), $2, $3, $4, $5, std::nullopt); } ; variable_declaration: identifier COLON pattern { $$ = arena->New(context.source_loc(), $1, $3); } ; member: VAR variable_declaration SEMICOLON { $$ = arena->New(context.source_loc(), $2); } ; member_list: // Empty { $$ = {}; } | member_list member { $$ = $1; $$.push_back($2); } ; alternative: identifier tuple { $$ = arena->New(context.source_loc(), $1, $2); } | identifier { $$ = arena->New( context.source_loc(), $1, arena->New(context.source_loc())); } ; alternative_list: // Empty { $$ = {}; } | alternative_list_contents { $$ = $1; } | alternative_list_contents COMMA { $$ = $1; } ; alternative_list_contents: alternative { $$ = {std::move($1)}; } | alternative_list_contents COMMA alternative { $$ = $1; $$.push_back(std::move($3)); } ; declaration: function_declaration { $$ = $1; } | CLASS identifier LEFT_CURLY_BRACE member_list RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $2, $4); } | CHOICE identifier LEFT_CURLY_BRACE alternative_list RIGHT_CURLY_BRACE { $$ = arena->New(context.source_loc(), $2, $4); } | VAR variable_declaration EQUAL expression SEMICOLON { $$ = arena->New(context.source_loc(), $2, $4); } ; declaration_list: // Empty { $$ = {}; } | declaration_list declaration { $$ = $1; $$.push_back(Nonnull($2)); } ; %%