/* Part of the Carbon Language project, under the Apache License v2.0 with LLVM Exceptions. See /LICENSE for license information. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ %{ #include #include "common/check.h" #include "common/error.h" #include "explorer/syntax/lex_helper.h" #include "explorer/syntax/lex_scan_helper.h" #include "explorer/syntax/parse_and_lex_context.h" #include "explorer/syntax/parser.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/FormatVariadic.h" %} /* Turn off legacy bits we don't need. */ %option noyywrap nounput nodefault %option reentrant /* Lexing a token immediately after consuming some whitespace. */ %s AFTER_WHITESPACE /* * Lexing a token immediately after consuming an operand-ending token: * a closing bracket, identifier, or literal. */ %s AFTER_OPERAND /* table-begin */ ABSTRACT "abstract" ADDR "addr" ALIAS "alias" AMPERSAND "&" AMPERSAND_EQUAL "&=" AND "and" API "api" ARROW "->" AS "as" AUTO "auto" BASE "base" BOOL "bool" BREAK "break" CARET "^" CARET_EQUAL "^=" CASE "case" CHOICE "choice" CLASS "class" COLON ":" COLON_BANG ":!" COMMA "," CONSTRAINT "constraint" CONTINUE "continue" DEFAULT "default" DESTRUCTOR "destructor" DOUBLE_ARROW "=>" ELSE "else" EQUAL "=" EQUAL_EQUAL "==" EXTEND "extend" FALSE "false" FN "fn" FN_TYPE "__Fn" FOR "for" FORALL "forall" GREATER ">" GREATER_EQUAL ">=" GREATER_GREATER ">>" GREATER_GREATER_EQUAL ">>=" IF "if" IMPL "impl" IMPLS "impls" IMPORT "import" IN "in" INTERFACE "interface" LEFT_CURLY_BRACE "{" LEFT_PARENTHESIS "(" LEFT_SQUARE_BRACKET "[" LESS "<" LESS_EQUAL "<=" LESS_LESS "<<" LESS_LESS_EQUAL "<<=" LET "let" LIBRARY "library" MATCH "match" MATCH_FIRST "__match_first" MINUS "-" MINUS_EQUAL "-=" MINUS_MINUS "--" MIX "__mix" MIXIN "__mixin" NAMESPACE "namespace" NOT "not" NOT_EQUAL "!=" OR "or" PACKAGE "package" PERCENT "%" PERCENT_EQUAL "%=" PERIOD "." PIPE "|" PIPE_EQUAL "|=" PLUS "+" PLUS_EQUAL "+=" PLUS_PLUS "++" REQUIRE "require" RETURN "return" RETURNED "returned" RIGHT_CURLY_BRACE "}" RIGHT_PARENTHESIS ")" RIGHT_SQUARE_BRACKET "]" SELF "Self" SEMICOLON ";" SLASH "/" SLASH_EQUAL "/=" STAR_EQUAL "*=" STRING "String" TEMPLATE "template" THEN "then" TRUE "true" TYPE "type" UNDERSCORE "_" UNIMPL_EXAMPLE "__unimplemented_example_infix" VAR "var" VIRTUAL "virtual" WHERE "where" WHILE "while" /* table-end */ /* This should be kept table-like, but isn't automatic due to spaces. */ identifier [A-Za-z_][A-Za-z0-9_]* /* TODO: Remove Print special casing once we have variadics or overloads. */ intrinsic_identifier (Print|__intrinsic_[A-Za-z0-9_]*) sized_type_literal [iuf][1-9][0-9]* integer_literal [0-9]+ horizontal_whitespace [ \t\r] whitespace [ \t\r\n] one_line_comment \/\/[^\n]*\n operand_start [(A-Za-z0-9_\"] %% %{ // Code run each time yylex is called. // Begin with an empty token span starting where its previous end was. context.current_token_position.step(); %} /* table-begin */ {ABSTRACT} { return CARBON_SIMPLE_TOKEN(ABSTRACT); } {ADDR} { return CARBON_SIMPLE_TOKEN(ADDR); } {ALIAS} { return CARBON_SIMPLE_TOKEN(ALIAS); } {AMPERSAND_EQUAL} { return CARBON_SIMPLE_TOKEN(AMPERSAND_EQUAL); } {AMPERSAND} { return CARBON_SIMPLE_TOKEN(AMPERSAND); } {AND} { return CARBON_SIMPLE_TOKEN(AND); } {API} { return CARBON_SIMPLE_TOKEN(API); } {ARROW} { return CARBON_SIMPLE_TOKEN(ARROW); } {AS} { return CARBON_SIMPLE_TOKEN(AS); } {AUTO} { return CARBON_SIMPLE_TOKEN(AUTO); } {BASE} { return CARBON_SIMPLE_TOKEN(BASE); } {BOOL} { return CARBON_SIMPLE_TOKEN(BOOL); } {BREAK} { return CARBON_SIMPLE_TOKEN(BREAK); } {CARET_EQUAL} { return CARBON_SIMPLE_TOKEN(CARET_EQUAL); } {CARET} { return CARBON_SIMPLE_TOKEN(CARET); } {CASE} { return CARBON_SIMPLE_TOKEN(CASE); } {CHOICE} { return CARBON_SIMPLE_TOKEN(CHOICE); } {CLASS} { return CARBON_SIMPLE_TOKEN(CLASS); } {COLON_BANG} { return CARBON_SIMPLE_TOKEN(COLON_BANG); } {COLON} { return CARBON_SIMPLE_TOKEN(COLON); } {COMMA} { return CARBON_SIMPLE_TOKEN(COMMA); } {CONSTRAINT} { return CARBON_SIMPLE_TOKEN(CONSTRAINT); } {CONTINUE} { return CARBON_SIMPLE_TOKEN(CONTINUE); } {DEFAULT} { return CARBON_SIMPLE_TOKEN(DEFAULT); } {DESTRUCTOR} { return CARBON_SIMPLE_TOKEN(DESTRUCTOR); } {DOUBLE_ARROW} { return CARBON_SIMPLE_TOKEN(DOUBLE_ARROW); } {ELSE} { return CARBON_SIMPLE_TOKEN(ELSE); } {EQUAL_EQUAL} { return CARBON_SIMPLE_TOKEN(EQUAL_EQUAL); } {EQUAL} { return CARBON_SIMPLE_TOKEN(EQUAL); } {EXTEND} { return CARBON_SIMPLE_TOKEN(EXTEND); } {FALSE} { return CARBON_SIMPLE_TOKEN(FALSE); } {FN_TYPE} { return CARBON_SIMPLE_TOKEN(FN_TYPE); } {FN} { return CARBON_SIMPLE_TOKEN(FN); } {FORALL} { return CARBON_SIMPLE_TOKEN(FORALL); } {FOR} { return CARBON_SIMPLE_TOKEN(FOR); } {GREATER_EQUAL} { return CARBON_SIMPLE_TOKEN(GREATER_EQUAL); } {GREATER_GREATER_EQUAL} { return CARBON_SIMPLE_TOKEN(GREATER_GREATER_EQUAL); } {GREATER_GREATER} { return CARBON_SIMPLE_TOKEN(GREATER_GREATER); } {GREATER} { return CARBON_SIMPLE_TOKEN(GREATER); } {IF} { return CARBON_SIMPLE_TOKEN(IF); } {IMPLS} { return CARBON_SIMPLE_TOKEN(IMPLS); } {IMPL} { return CARBON_SIMPLE_TOKEN(IMPL); } {IMPORT} { return CARBON_SIMPLE_TOKEN(IMPORT); } {INTERFACE} { return CARBON_SIMPLE_TOKEN(INTERFACE); } {IN} { return CARBON_SIMPLE_TOKEN(IN); } {LEFT_CURLY_BRACE} { return CARBON_SIMPLE_TOKEN(LEFT_CURLY_BRACE); } {LEFT_PARENTHESIS} { return CARBON_SIMPLE_TOKEN(LEFT_PARENTHESIS); } {LEFT_SQUARE_BRACKET} { return CARBON_SIMPLE_TOKEN(LEFT_SQUARE_BRACKET); } {LESS_EQUAL} { return CARBON_SIMPLE_TOKEN(LESS_EQUAL); } {LESS_LESS_EQUAL} { return CARBON_SIMPLE_TOKEN(LESS_LESS_EQUAL); } {LESS_LESS} { return CARBON_SIMPLE_TOKEN(LESS_LESS); } {LESS} { return CARBON_SIMPLE_TOKEN(LESS); } {LET} { return CARBON_SIMPLE_TOKEN(LET); } {LIBRARY} { return CARBON_SIMPLE_TOKEN(LIBRARY); } {MATCH_FIRST} { return CARBON_SIMPLE_TOKEN(MATCH_FIRST); } {MATCH} { return CARBON_SIMPLE_TOKEN(MATCH); } {MINUS_EQUAL} { return CARBON_SIMPLE_TOKEN(MINUS_EQUAL); } {MINUS_MINUS} { return CARBON_SIMPLE_TOKEN(MINUS_MINUS); } {MINUS} { return CARBON_SIMPLE_TOKEN(MINUS); } {MIXIN} { return CARBON_SIMPLE_TOKEN(MIXIN); } {MIX} { return CARBON_SIMPLE_TOKEN(MIX); } {NAMESPACE} { return CARBON_SIMPLE_TOKEN(NAMESPACE); } {NOT_EQUAL} { return CARBON_SIMPLE_TOKEN(NOT_EQUAL); } {NOT} { return CARBON_SIMPLE_TOKEN(NOT); } {OR} { return CARBON_SIMPLE_TOKEN(OR); } {PACKAGE} { return CARBON_SIMPLE_TOKEN(PACKAGE); } {PERCENT_EQUAL} { return CARBON_SIMPLE_TOKEN(PERCENT_EQUAL); } {PERCENT} { return CARBON_SIMPLE_TOKEN(PERCENT); } {PERIOD} { return CARBON_SIMPLE_TOKEN(PERIOD); } {PIPE_EQUAL} { return CARBON_SIMPLE_TOKEN(PIPE_EQUAL); } {PIPE} { return CARBON_SIMPLE_TOKEN(PIPE); } {PLUS_EQUAL} { return CARBON_SIMPLE_TOKEN(PLUS_EQUAL); } {PLUS_PLUS} { return CARBON_SIMPLE_TOKEN(PLUS_PLUS); } {PLUS} { return CARBON_SIMPLE_TOKEN(PLUS); } {REQUIRE} { return CARBON_SIMPLE_TOKEN(REQUIRE); } {RETURNED} { return CARBON_SIMPLE_TOKEN(RETURNED); } {RETURN} { return CARBON_SIMPLE_TOKEN(RETURN); } {SELF} { return CARBON_SIMPLE_TOKEN(SELF); } {SEMICOLON} { return CARBON_SIMPLE_TOKEN(SEMICOLON); } {SLASH_EQUAL} { return CARBON_SIMPLE_TOKEN(SLASH_EQUAL); } {SLASH} { return CARBON_SIMPLE_TOKEN(SLASH); } {STAR_EQUAL} { return CARBON_SIMPLE_TOKEN(STAR_EQUAL); } {STRING} { return CARBON_SIMPLE_TOKEN(STRING); } {TEMPLATE} { return CARBON_SIMPLE_TOKEN(TEMPLATE); } {THEN} { return CARBON_SIMPLE_TOKEN(THEN); } {TRUE} { return CARBON_SIMPLE_TOKEN(TRUE); } {TYPE} { return CARBON_SIMPLE_TOKEN(TYPE); } {UNDERSCORE} { return CARBON_SIMPLE_TOKEN(UNDERSCORE); } {UNIMPL_EXAMPLE} { return CARBON_SIMPLE_TOKEN(UNIMPL_EXAMPLE); } {VAR} { return CARBON_SIMPLE_TOKEN(VAR); } {VIRTUAL} { return CARBON_SIMPLE_TOKEN(VIRTUAL); } {WHERE} { return CARBON_SIMPLE_TOKEN(WHERE); } {WHILE} { return CARBON_SIMPLE_TOKEN(WHILE); } /* table-end */ /* More modern Bisons provide make_EOF. */ <> { return CARBON_SIMPLE_TOKEN(END_OF_FILE); } {RIGHT_PARENTHESIS} { BEGIN(AFTER_OPERAND); return CARBON_SIMPLE_TOKEN(RIGHT_PARENTHESIS); } {RIGHT_CURLY_BRACE} { BEGIN(AFTER_OPERAND); return CARBON_SIMPLE_TOKEN(RIGHT_CURLY_BRACE); } {RIGHT_SQUARE_BRACKET} { BEGIN(AFTER_OPERAND); return CARBON_SIMPLE_TOKEN(RIGHT_SQUARE_BRACKET); } /* * For a `*` operator, we look at whitespace and local context to determine the * arity and fixity. There are two ways to write a binary operator: * * 1) Whitespace on both sides. * 2) Whitespace on neither side, and the previous token is considered to be * the end of an operand, and the next token is considered to be the start * of an operand. * * Otherwise, the operator is unary, but we also check for whitespace to help * the parser enforce the rule that whitespace is not permitted between the * operator and its operand, leading to three more cases: * * 3) Whitespace before (but implicitly not after, because that would give a * longer match and hit case 1): this can only be a prefix operator. * 4) Whitespace after and not before: this can only be a postfix operator. * 5) No whitespace on either side (otherwise the longest match would take us * to case 4): this is a unary operator and could be either prefix or * postfix. */ /* `*` operator case 1: */ "*"{whitespace}+ { BEGIN(AFTER_WHITESPACE); return CARBON_SIMPLE_TOKEN(BINARY_STAR); } /* `*` operator case 2: */ "*"/{operand_start} { return CARBON_SIMPLE_TOKEN(BINARY_STAR); } /* `*` operator case 3: */ "*" { return CARBON_SIMPLE_TOKEN(PREFIX_STAR); } /* `*` operator case 4: */ "*"{whitespace}+ { BEGIN(AFTER_WHITESPACE); return CARBON_SIMPLE_TOKEN(POSTFIX_STAR); } /* `*` operator case 5: */ "*" { return CARBON_SIMPLE_TOKEN(UNARY_STAR); } {sized_type_literal} { BEGIN(AFTER_OPERAND); return CARBON_ARG_TOKEN(sized_type_literal, yytext); } {intrinsic_identifier} { BEGIN(AFTER_OPERAND); Carbon::ErrorOr intrinsic = Carbon::IntrinsicExpression::FindIntrinsic(yytext, context.source_loc()); if (intrinsic.ok()) { return CARBON_ARG_TOKEN(intrinsic_identifier, *intrinsic); } else { return context.RecordSyntaxError(std::move(intrinsic).error()); } } {identifier} { BEGIN(AFTER_OPERAND); return CARBON_ARG_TOKEN(identifier, yytext); } {integer_literal} { BEGIN(AFTER_OPERAND); int val = 0; if (!llvm::to_integer(yytext, val)) { return context.RecordSyntaxError( llvm::formatv("Invalid integer literal: {0}", yytext)); } return CARBON_ARG_TOKEN(integer_literal, val); } #*\" { // Found a double quote character. Carbon::StringLexHelper str_lex_helper(yytext, yyscanner, context); const std::string& s = str_lex_helper.str(); const int hashtag_num = s.find_first_of('"'); if (!str_lex_helper.Advance()) { return CARBON_SIMPLE_TOKEN(END_OF_FILE); } while (!str_lex_helper.is_eof()) { switch (str_lex_helper.last_char()) { case '\n': // Fall through. case '\v': // Fall through. case '\f': // Fall through. case '\r': return context.RecordSyntaxError( llvm::formatv("missing closing quote in single-line string: {0}", str_lex_helper.str())); break; case '"': if (Carbon::ReadHashTags(str_lex_helper, hashtag_num)) { // Reach closing quotes, break out of the loop. return Carbon::ProcessSingleLineString(str_lex_helper.str(), context, hashtag_num); } break; case '\\': if (Carbon::ReadHashTags(str_lex_helper, hashtag_num)) { // Read the escaped char. if (!str_lex_helper.Advance()) { continue; } // Read the next char. str_lex_helper.Advance(); } break; default: str_lex_helper.Advance(); } } return CARBON_SIMPLE_TOKEN(END_OF_FILE); } #*\'\'\' { // Multi-line string literal. Carbon::StringLexHelper str_lex_helper(yytext, yyscanner, context); const std::string& s = str_lex_helper.str(); const int hashtag_num = s.find_first_of('\''); if (!str_lex_helper.Advance()) { return CARBON_SIMPLE_TOKEN(END_OF_FILE); } while (!str_lex_helper.is_eof()) { switch (str_lex_helper.last_char()) { case '\'': // Check for 2 more '\''s on block string. if (!(str_lex_helper.Advance() && str_lex_helper.last_char() == '\'')) { continue; } if (!(str_lex_helper.Advance() && str_lex_helper.last_char() == '\'')) { continue; } // Now we are at the last ' of '''. if (Carbon::ReadHashTags(str_lex_helper, hashtag_num)) { // Reach closing quotes, break out of the loop. return Carbon::ProcessMultiLineString(str_lex_helper.str(), context, hashtag_num); } break; case '\\': if (Carbon::ReadHashTags(str_lex_helper, hashtag_num)) { // Read the escaped char. if (!str_lex_helper.Advance()) { continue; } // Read the next char. str_lex_helper.Advance(); } break; default: str_lex_helper.Advance(); } } return CARBON_SIMPLE_TOKEN(END_OF_FILE); } {one_line_comment} { // Advance end by 1 line, resetting the column to zero. context.current_token_position.lines(1); // Make the span empty by setting start to end. context.current_token_position.step(); } {horizontal_whitespace}+ { // Make the span empty by setting start to end. context.current_token_position.step(); BEGIN(AFTER_WHITESPACE); } \n+ { // Advance end by yyleng lines, resetting the column to zero. context.current_token_position.lines(yyleng); // Make the span empty by setting start to end. context.current_token_position.step(); BEGIN(AFTER_WHITESPACE); } . { return context.RecordSyntaxError( llvm::formatv("invalid character '\\x{0}' in source file.", llvm::toHex(llvm::StringRef(yytext, 1)))); } %% auto YyinputWrapper(yyscan_t yyscanner) -> int { return yyinput(yyscanner); }