// Part of the Carbon Language project, under the Apache License v2.0 with LLVM // Exceptions. See /LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // ----------------------------------------------------------------------------- // Bison Configuration // ----------------------------------------------------------------------------- %require "3.2" %language "c++" // We don't need a separate header for Bison locations. %define api.location.file none // Use a type-safe C++ variant for semantic values %define api.value.type variant // Have Bison generate the functions ‘make_TEXT’ and ‘make_NUMBER’, but also // ‘make_YYEOF’, for the end of input. %define api.token.constructor // Generate the parser as `::Carbon::Parser`. %define api.namespace { Carbon } %define api.parser.class { Parser } // Make parse error messages more detailed %define parse.error verbose // Enable support for parser debugging %define parse.trace true // Generate location structs. %locations // Parameters to the parser and lexer. // // Parameters to the parser are stored therein as protected data members, and // thus available to its methods. // "out" parameter passed to the parser, where the AST is written. %parse-param {std::optional& parsed_program} // "inout" parameters passed to both the parser and the lexer. %param {yyscan_t yyscanner} %param {ParseAndLexContext& context} // No shift-reduce conflicts are expected. %expect 0 // ----------------------------------------------------------------------------- %code top { #include #include #include #include #include #include #include "common/check.h" #include "executable_semantics/syntax/syntax_helpers.h" #include "executable_semantics/syntax/parse_and_lex_context.h" #include "llvm/ADT/StringExtras.h" } // %code top %code requires { #include #include "executable_semantics/ast/abstract_syntax_tree.h" #include "executable_semantics/ast/declaration.h" #include "executable_semantics/ast/expression.h" #include "executable_semantics/ast/function_definition.h" #include "executable_semantics/ast/pattern.h" #include "executable_semantics/common/arena.h" #include "executable_semantics/common/ptr.h" #include "executable_semantics/ast/paren_contents.h" #include "executable_semantics/syntax/bison_wrap.h" namespace Carbon { class ParseAndLexContext; } // namespace Carbon typedef void* yyscan_t; } // %code requires %code { void Carbon::Parser::error(const location_type&, const std::string& message) { context.PrintDiagnostic(message); } } // %code %token integer_literal %token identifier %token sized_type_literal %token string_literal %type designator %type >> declaration %type >> function_declaration %type >> function_definition %type >> declaration_list %type >> statement %type >> if_statement %type >> optional_else %type , bool>>> return_expression %type >> block %type >> statement_list %type >> expression %type > generic_binding %type > deduced_params %type > deduced_param_list %type >> pattern %type >> non_expression_pattern %type , bool>>> return_type %type >> paren_expression %type >> tuple %type > binding_lhs %type >> variable_declaration %type >> member %type >> member_list %type ::Element>> paren_expression_element %type > paren_expression_base %type > paren_expression_contents %type >> paren_pattern %type >> tuple_pattern %type >> maybe_empty_tuple_pattern %type > paren_pattern_base %type ::Element>> paren_pattern_element %type > paren_pattern_contents %type >>> alternative %type >>> alternative_list %type , Ptr>*> clause %type , Ptr>>*> clause_list %token END_OF_FILE 0 %token AND %token OR %token NOT %token STRING %token BOOL %token TYPE %token FN %token FNTY %token ARROW "->" %token FNARROW "-> in return type" %token VAR %token EQUAL_EQUAL %token IF %token ELSE %token WHILE %token CONTINUATION_TYPE %token CONTINUATION %token RUN %token AWAIT %token BREAK %token CONTINUE %token RETURN %token TRUE %token FALSE %token CLASS %token CHOICE %token MATCH %token CASE %token DBLARROW "=>" %token DEFAULT %token AUTO %token UNDERSCORE %token EQUAL "=" MINUS "-" PLUS "+" // The lexer determines the arity and fixity of each `*` based on whitespace // and adjacent tokens. UNARY_STAR indicates that the operator is unary but // could be either prefix or postfix. UNARY_STAR "unary *" PREFIX_STAR "prefix *" POSTFIX_STAR "postfix *" BINARY_STAR "binary *" SLASH "/" LEFT_PARENTHESIS "(" RIGHT_PARENTHESIS ")" LEFT_CURLY_BRACE "{" RIGHT_CURLY_BRACE "}" LEFT_SQUARE_BRACKET "[" RIGHT_SQUARE_BRACKET "]" PERIOD "." COMMA "," SEMICOLON ";" COLON_BANG ":!" COLON ":" ; %precedence FNARROW %precedence "{" "}" %precedence ":!" ":" "," DBLARROW %left OR AND %nonassoc EQUAL_EQUAL %left "+" "-" %left BINARY_STAR %precedence NOT UNARY_MINUS PREFIX_STAR // We need to give the `UNARY_STAR` token a precedence, rather than overriding // the precedence of the `expression UNARY_STAR` rule below, because bison // compares the precedence of the final token (for a shift) to the precedence // of the other rule (for a reduce) when attempting to resolve a shift-reduce // conflict. See https://stackoverflow.com/a/26188429/1041090. When UNARY_STAR // is the final token of a rule, it must be a postfix usage, so we give it the // same precedence as POSTFIX_STAR. %precedence POSTFIX_STAR UNARY_STAR %left "." ARROW %precedence "(" ")" "[" "]" %start input %% input: declaration_list { parsed_program = $1; } ; expression: identifier { $$ = global_arena->New(context.SourceLoc(), $1); } | expression designator { $$ = global_arena->New(context.SourceLoc(), $1, $2); } | expression "[" expression "]" { $$ = global_arena->New(context.SourceLoc(), $1, $3); } | integer_literal { $$ = global_arena->New(context.SourceLoc(), $1); } | string_literal { $$ = global_arena->New(context.SourceLoc(), $1); } | TRUE { $$ = global_arena->New(context.SourceLoc(), true); } | FALSE { $$ = global_arena->New(context.SourceLoc(), false); } | sized_type_literal { int val; CHECK(llvm::to_integer(llvm::StringRef($1).substr(1), val)); CHECK($1[0] == 'i' && val == 32) << "Only i32 is supported for now: " << $1; $$ = global_arena->New(context.SourceLoc()); } | STRING { $$ = global_arena->New(context.SourceLoc()); } | BOOL { $$ = global_arena->New(context.SourceLoc()); } | TYPE { $$ = global_arena->New(context.SourceLoc()); } | CONTINUATION_TYPE { $$ = global_arena->New(context.SourceLoc()); } | paren_expression { $$ = $1; } | expression EQUAL_EQUAL expression { $$ = global_arena->New( context.SourceLoc(), Operator::Eq, std::vector>({$1, $3})); } | expression "+" expression { $$ = global_arena->New( context.SourceLoc(), Operator::Add, std::vector>({$1, $3})); } | expression "-" expression { $$ = global_arena->New( context.SourceLoc(), Operator::Sub, std::vector>({$1, $3})); } | expression BINARY_STAR expression { $$ = global_arena->New( context.SourceLoc(), Operator::Mul, std::vector>({$1, $3})); } | expression AND expression { $$ = global_arena->New( context.SourceLoc(), Operator::And, std::vector>({$1, $3})); } | expression OR expression { $$ = global_arena->New( context.SourceLoc(), Operator::Or, std::vector>({$1, $3})); } | NOT expression { $$ = global_arena->New( context.SourceLoc(), Operator::Not, std::vector>({$2})); } | "-" expression %prec UNARY_MINUS { $$ = global_arena->New( context.SourceLoc(), Operator::Neg, std::vector>({$2})); } | PREFIX_STAR expression { $$ = global_arena->New( context.SourceLoc(), Operator::Deref, std::vector>({$2})); } | UNARY_STAR expression %prec PREFIX_STAR { $$ = global_arena->New( context.SourceLoc(), Operator::Deref, std::vector>({$2})); } | expression tuple { $$ = global_arena->New(context.SourceLoc(), $1, $2); } | expression POSTFIX_STAR { $$ = global_arena->New( context.SourceLoc(), Operator::Ptr, std::vector>({$1})); } | expression UNARY_STAR { $$ = global_arena->New( context.SourceLoc(), Operator::Ptr, std::vector>({$1})); } | FNTY tuple return_type { auto [return_exp, is_omitted_exp] = $3.Release(); $$ = global_arena->New( context.SourceLoc(), $2, return_exp, is_omitted_exp); } ; designator: "." identifier { $$ = $2; } ; paren_expression: paren_expression_base { $$ = ExpressionFromParenContents(context.SourceLoc(), $1); } ; tuple: paren_expression_base { $$ = TupleExpressionFromParenContents(context.SourceLoc(), $1); } ; paren_expression_element: expression { $$ = {.name = std::nullopt, .term = $1}; } | designator "=" expression { $$ = {.name = $1, .term = $3}; } ; paren_expression_base: "(" ")" { $$ = {.elements = {}, .has_trailing_comma = false}; } | "(" paren_expression_contents ")" { $$ = $2; } | "(" paren_expression_contents "," ")" { $$ = $2; $$.has_trailing_comma = true; } ; paren_expression_contents: paren_expression_element { $$ = {.elements = {$1}, .has_trailing_comma = false}; } | paren_expression_contents "," paren_expression_element { $$ = $1; $$.elements.push_back($3); } ; // In many cases, using `pattern` recursively will result in ambiguities. // When that happens, it's necessary to factor out two separate productions, // one for when the sub-pattern is an expression, and one for when it is not. // To facilitate this, non-terminals besides `pattern` whose names contain // `pattern` are structured to be disjoint from `expression`, unless otherwise // specified. pattern: non_expression_pattern { $$ = $1; } | expression { $$ = global_arena->New($1); } ; non_expression_pattern: AUTO { $$ = global_arena->New(context.SourceLoc()); } | binding_lhs ":" pattern { $$ = global_arena->New(context.SourceLoc(), $1, $3); } | paren_pattern { $$ = $1; } | expression tuple_pattern { $$ = global_arena->New(context.SourceLoc(), $1, $2); } ; binding_lhs: identifier { $$ = $1; } | UNDERSCORE { $$ = std::nullopt; } ; paren_pattern: paren_pattern_base { $$ = PatternFromParenContents(context.SourceLoc(), $1); } ; paren_pattern_base: "(" paren_pattern_contents ")" { $$ = $2; } | "(" paren_pattern_contents "," ")" { $$ = $2; $$.has_trailing_comma = true; } ; // paren_pattern is analogous to paren_expression, but in order to avoid // ambiguities, it must be disjoint from paren_expression, meaning it must // contain at least one non_expression_pattern. The structure of this rule // is very different from the corresponding expression rule because is has to // enforce that requirement. paren_pattern_contents: paren_pattern_element { $$ = {.elements = {$1}, .has_trailing_comma = false }; } | paren_expression_contents "," paren_pattern_element { $$ = ParenExpressionToParenPattern($1); $$.elements.push_back($3); } | paren_pattern_contents "," paren_expression_element { $$ = $1; auto el = $3.Release(); $$.elements.push_back({.name = el.name, .term = global_arena->New(el.term)}); } | paren_pattern_contents "," paren_pattern_element { $$ = $1; $$.elements.push_back($3); } ; paren_pattern_element: non_expression_pattern { $$ = {.name = std::nullopt, .term = $1}; } | designator "=" non_expression_pattern { $$ = {.name = $1, .term = $3}; } ; tuple_pattern: paren_pattern_base { $$ = TuplePatternFromParenContents(context.SourceLoc(), $1); } ; // Unlike most `pattern` nonterminals, this one overlaps with `expression`, // so it should be used only when prior context (such as an introducer) // rules out the possibility of an `expression` at this point. maybe_empty_tuple_pattern: "(" ")" { $$ = global_arena->New(context.SourceLoc(), std::vector()); } | tuple_pattern { $$ = $1; } ; clause: CASE pattern DBLARROW statement { $$ = global_arena->RawNew, Ptr>>($2, $4); } | DEFAULT DBLARROW statement { auto vp = global_arena->New( context.SourceLoc(), std::nullopt, global_arena->New(context.SourceLoc())); $$ = global_arena->RawNew, Ptr>>(vp, $3); } ; clause_list: // Empty { $$ = global_arena->RawNew, Ptr>>>(); } | clause clause_list { $$ = $2; $$->push_front(*$1); } ; statement: expression "=" expression ";" { $$ = global_arena->New(context.SourceLoc(), $1, $3); } | VAR pattern "=" expression ";" { $$ = global_arena->New(context.SourceLoc(), $2, $4); } | expression ";" { $$ = global_arena->New(context.SourceLoc(), $1); } | if_statement { $$ = $1; } | WHILE "(" expression ")" block { $$ = global_arena->New(context.SourceLoc(), $3, $5); } | BREAK ";" { $$ = global_arena->New(context.SourceLoc()); } | CONTINUE ";" { $$ = global_arena->New(context.SourceLoc()); } | RETURN return_expression ";" { auto [return_exp, is_omitted_exp] = $2.Release(); $$ = global_arena->New(context.SourceLoc(), return_exp, is_omitted_exp); } | block { $$ = $1; } | MATCH "(" expression ")" "{" clause_list "}" { $$ = global_arena->New(context.SourceLoc(), $3, $6); } | CONTINUATION identifier statement { $$ = global_arena->New(context.SourceLoc(), $2, $3); } | RUN expression ";" { $$ = global_arena->New(context.SourceLoc(), $2); } | AWAIT ";" { $$ = global_arena->New(context.SourceLoc()); } ; if_statement: IF "(" expression ")" block optional_else { $$ = global_arena->New(context.SourceLoc(), $3, $5, $6); } ; optional_else: // Empty { $$ = std::nullopt; } | ELSE if_statement { $$ = $2; } | ELSE block { $$ = $2; } ; return_expression: // Empty { $$ = {global_arena->New(context.SourceLoc()), true}; } | expression { $$ = {$1, false}; } ; statement_list: // Empty { $$ = std::nullopt; } | statement statement_list { $$ = global_arena->New(context.SourceLoc(), $1, $2); } ; block: "{" statement_list "}" { $$ = global_arena->New(context.SourceLoc(), $2); } ; return_type: // Empty { $$ = {global_arena->New(context.SourceLoc()), true}; } | ARROW expression %prec FNARROW { $$ = {$2, false}; } ; generic_binding: identifier ":!" expression { $$ = GenericBinding({.name = std::move($1), .type = $3}); } ; deduced_param_list: // Empty { $$ = std::vector(); } | generic_binding { $$ = std::vector(); $$.push_back($1); } | generic_binding "," deduced_param_list { $$ = $3; $$.push_back($1); } ; deduced_params: // Empty { $$ = std::vector(); } | "[" deduced_param_list "]" { $$ = $2; } ; function_definition: FN identifier deduced_params maybe_empty_tuple_pattern return_type block { auto [return_exp, is_omitted_exp] = $5.Release(); $$ = global_arena->New( context.SourceLoc(), $2, $3, $4, global_arena->New(return_exp), is_omitted_exp, $6); } | FN identifier deduced_params maybe_empty_tuple_pattern DBLARROW expression ";" { // The return type is not considered "omitted" because it's automatic from // the expression. $$ = global_arena->New( context.SourceLoc(), $2, $3, $4, global_arena->New(context.SourceLoc()), true, global_arena->New(context.SourceLoc(), $6, true)); } ; function_declaration: FN identifier deduced_params maybe_empty_tuple_pattern return_type ";" { auto [return_exp, is_omitted_exp] = $5.Release(); $$ = global_arena->New( context.SourceLoc(), $2, $3, $4, global_arena->New(return_exp), is_omitted_exp, std::nullopt); } ; variable_declaration: identifier ":" pattern { $$ = global_arena->New(context.SourceLoc(), $1, $3); } ; member: VAR variable_declaration ";" { $$ = global_arena->New(context.SourceLoc(), $2); } ; member_list: // Empty { $$ = std::list>(); } | member member_list { $$ = $2; $$.push_front($1); } ; alternative: identifier tuple { $$ = std::pair>($1, $2); } | identifier { $$ = std::pair>( $1, global_arena->New(context.SourceLoc())); } ; alternative_list: // Empty { $$ = std::list>>(); } | alternative { $$ = std::list>>(); $$.push_front($1); } | alternative "," alternative_list { $$ = std::move($3); $$.push_front($1); } ; declaration: function_definition { $$ = global_arena->New($1); } | function_declaration { $$ = global_arena->New($1); } | CLASS identifier "{" member_list "}" { $$ = global_arena->New(context.SourceLoc(), $2, $4); } | CHOICE identifier "{" alternative_list "}" { $$ = global_arena->New(context.SourceLoc(), $2, $4); } | VAR variable_declaration "=" expression ";" { $$ = global_arena->New(context.SourceLoc(), $2, $4); } ; declaration_list: // Empty { $$ = std::list>(); } | declaration declaration_list { $$ = $2; $$.push_front(Ptr($1)); } ; %%