/* Part of the Carbon Language project, under the Apache License v2.0 with LLVM Exceptions. See /LICENSE for license information. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ %{ #include #include "common/check.h" #include "executable_semantics/common/tracing_flag.h" #include "executable_semantics/syntax/parse_and_lex_context.h" #include "llvm/ADT/StringExtras.h" %} /* Turn off legacy bits we don't need */ %option noyywrap nounput nodefault noinput /* maintains the number of the current line read from input in the global variable yylineno. */ %option yylineno /* Lexing a token immediately after consuming some whitespace. */ %s AFTER_WHITESPACE /* Lexing a token immediately after consuming an operand-ending token: * a closing bracket, identifier, or literal. */ %s AFTER_OPERAND AND "and" ARROW "->" AUTO "auto" BOOL "Bool" BREAK "break" CASE "case" CHOICE "choice" ONE_LINE_COMMENT \/\/[^\n]*\n CONTINUE "continue" DBLARROW "=>" DEFAULT "default" ELSE "else" EQUAL_EQUAL "==" FALSE "false" FN "fn" FNTY "fnty" IF "if" MATCH "match" NOT "not" OR "or" RETURN "return" STRUCT "struct" TRUE "true" TYPE "Type" VAR "var" WHILE "while" CONTINUATION_TYPE "__Continuation" CONTINUATION "__continuation" RUN "__run" AWAIT "__await" UNDERSCORE "_" identifier [A-Za-z_][A-Za-z0-9_]* sized_type_literal [iuf][1-9][0-9]* integer_literal [0-9]+ horizontal_whitespace [ \t\r] whitespace [ \t\r\n] operand_start [(A-Za-z0-9_"] %{ // This macro is expanded immediately before each action specified below. // // Advances the current token position by yyleng columns without changing // the line number, and takes us out of the after-whitespace / after-operand // state. # define YY_USER_ACTION \ context.current_token_position.columns(yyleng); \ if (YY_START == AFTER_WHITESPACE || \ YY_START == AFTER_OPERAND) { \ BEGIN(INITIAL); \ } %} %% %{ // Code run each time yylex is called. // Begin with an empty token span starting where its previous end was. context.current_token_position.step(); %} {AND} { return Carbon::Parser::make_AND(context.current_token_position); } {ARROW} { return Carbon::Parser::make_ARROW(context.current_token_position); } {AUTO} { return Carbon::Parser::make_AUTO(context.current_token_position); } {BOOL} { return Carbon::Parser::make_BOOL(context.current_token_position); } {BREAK} { return Carbon::Parser::make_BREAK(context.current_token_position); } {CASE} { return Carbon::Parser::make_CASE(context.current_token_position); } {CHOICE} { return Carbon::Parser::make_CHOICE(context.current_token_position); } {CONTINUE} { return Carbon::Parser::make_CONTINUE(context.current_token_position); } {DBLARROW} { return Carbon::Parser::make_DBLARROW(context.current_token_position); } {DEFAULT} { return Carbon::Parser::make_DEFAULT(context.current_token_position); } {ELSE} { return Carbon::Parser::make_ELSE(context.current_token_position); } "==" { return Carbon::Parser::make_EQUAL_EQUAL(context.current_token_position); } {FALSE} { return Carbon::Parser::make_FALSE(context.current_token_position); } {FN} { return Carbon::Parser::make_FN(context.current_token_position); } {FNTY} { return Carbon::Parser::make_FNTY(context.current_token_position); } {IF} { return Carbon::Parser::make_IF(context.current_token_position); } {MATCH} { return Carbon::Parser::make_MATCH(context.current_token_position); } {NOT} { return Carbon::Parser::make_NOT(context.current_token_position); } {OR} { return Carbon::Parser::make_OR(context.current_token_position); } {RETURN} { return Carbon::Parser::make_RETURN(context.current_token_position); } {STRUCT} { return Carbon::Parser::make_STRUCT(context.current_token_position); } {TRUE} { return Carbon::Parser::make_TRUE(context.current_token_position); } {TYPE} { return Carbon::Parser::make_TYPE(context.current_token_position); } {VAR} { return Carbon::Parser::make_VAR(context.current_token_position); } {WHILE} { return Carbon::Parser::make_WHILE(context.current_token_position); } {CONTINUATION_TYPE} { return Carbon::Parser::make_CONTINUATION_TYPE(context.current_token_position); } {CONTINUATION} { return Carbon::Parser::make_CONTINUATION(context.current_token_position); } {RUN} { return Carbon::Parser::make_RUN(context.current_token_position); } {AWAIT} { return Carbon::Parser::make_AWAIT(context.current_token_position); } {UNDERSCORE} { return Carbon::Parser::make_UNDERSCORE(context.current_token_position); } {sized_type_literal} { return Carbon::Parser::make_sized_type_literal(yytext, context.current_token_position); } "=" return Carbon::Parser::make_EQUAL(context.current_token_position); "-" return Carbon::Parser::make_MINUS(context.current_token_position); "+" return Carbon::Parser::make_PLUS(context.current_token_position); "/" return Carbon::Parser::make_SLASH(context.current_token_position); "(" return Carbon::Parser::make_LEFT_PARENTHESIS(context.current_token_position); ")" { BEGIN(AFTER_OPERAND); return Carbon::Parser::make_RIGHT_PARENTHESIS(context.current_token_position); } "{" return Carbon::Parser::make_LEFT_CURLY_BRACE(context.current_token_position); "}" { BEGIN(AFTER_OPERAND); return Carbon::Parser::make_RIGHT_CURLY_BRACE(context.current_token_position); } "[" return Carbon::Parser::make_LEFT_SQUARE_BRACKET(context.current_token_position); "]" { BEGIN(AFTER_OPERAND); return Carbon::Parser::make_RIGHT_SQUARE_BRACKET(context.current_token_position); } "." return Carbon::Parser::make_PERIOD(context.current_token_position); "," return Carbon::Parser::make_COMMA(context.current_token_position); ";" return Carbon::Parser::make_SEMICOLON(context.current_token_position); ":!" return Carbon::Parser::make_COLON_BANG(context.current_token_position); ":" return Carbon::Parser::make_COLON(context.current_token_position); /* For a `*` operator, we look at whitespace and local context to determine the arity and fixity. There are two ways to write a binary operator: 1) Whitespace on both sides. 2) Whitespace on neither side, and the previous token is considered to be the end of an operand, and the next token is considered to be the start of an operand. Otherwise, the operator is unary, but we also check for whitespace to help the parser enforce the rule that whitespace is not permitted between the operator and its operand, leading to three more cases: 3) Whitespace before (but implicitly not after, because that would give a longer match and hit case 1): this can only be a prefix operator. 4) Whitespace after and not before: this can only be a postfix operator. 5) No whitespace on either side (otherwise the longest match would take us to case 4): this is a unary operator and could be either prefix or postfix. */ "*"{whitespace}+ /*case 1*/ { BEGIN(AFTER_WHITESPACE); return Carbon::Parser::make_BINARY_STAR(context.current_token_position); } "*"/{operand_start} /*case 2*/ { return Carbon::Parser::make_BINARY_STAR(context.current_token_position); } "*" /*case 3*/ { return Carbon::Parser::make_PREFIX_STAR(context.current_token_position); } "*"{whitespace}+ /*case 4*/ { BEGIN(AFTER_WHITESPACE); return Carbon::Parser::make_POSTFIX_STAR(context.current_token_position); } "*" /*case 5*/ { return Carbon::Parser::make_UNARY_STAR(context.current_token_position); } {identifier} { BEGIN(AFTER_OPERAND); return Carbon::Parser::make_identifier(yytext, context.current_token_position); } {integer_literal} { BEGIN(AFTER_OPERAND); int val; CHECK(llvm::to_integer(yytext, val)); return Carbon::Parser::make_integer_literal(val, context.current_token_position); } {ONE_LINE_COMMENT} { // Advance end by 1 line, resetting the column to zero. context.current_token_position.lines(1); // Make the span empty by setting start to end. context.current_token_position.step(); } {horizontal_whitespace}+ { // Make the span empty by setting start to end. context.current_token_position.step(); BEGIN(AFTER_WHITESPACE); } \n+ { // Advance end by yyleng lines, resetting the column to zero. context.current_token_position.lines(yyleng); // Make the span empty by setting start to end. context.current_token_position.step(); BEGIN(AFTER_WHITESPACE); } . { if (Carbon::tracing_output) { // Print a newline because tracing prints an incomplete line // "Reading a token: ". llvm::errs() << "\n"; } FATAL_COMPILATION_ERROR(yylineno) << "invalid character '\\x" << llvm::toHex(llvm::StringRef(yytext, 1)) << "' in source file."; } <> { // A more modern Bison would give us make_EOF. return Carbon::Parser::make_END_OF_FILE(context.current_token_position); } %%