| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227 |
- /*
- Part of the Carbon Language project, under the Apache License v2.0 with LLVM
- Exceptions. See /LICENSE for license information.
- SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
- %{
- #include <cstdlib>
- #include <iostream>
- #include "executable_semantics/common/tracing_flag.h"
- #include "executable_semantics/syntax/parse_and_lex_context.h"
- #include "llvm/ADT/StringExtras.h"
- %}
- /* Turn off legacy bits we don't need */
- %option noyywrap nounput nodefault noinput
- /* maintains the number of the current line read from input in the
- global variable yylineno.
- */
- %option yylineno
- /* Lexing a token immediately after consuming some whitespace. */
- %s AFTER_WHITESPACE
- /* Lexing a token immediately after consuming an operand-ending token:
- * a closing bracket, identifier, or literal.
- */
- %s AFTER_OPERAND
- AND "and"
- ARROW "->"
- AUTO "auto"
- BOOL "Bool"
- BREAK "break"
- CASE "case"
- CHOICE "choice"
- ONE_LINE_COMMENT \/\/[^\n]*\n
- CONTINUE "continue"
- DBLARROW "=>"
- DEFAULT "default"
- ELSE "else"
- EQUAL_EQUAL "=="
- FALSE "false"
- FN "fn"
- FNTY "fnty"
- IF "if"
- INT "Int"
- MATCH "match"
- NOT "not"
- OR "or"
- RETURN "return"
- STRUCT "struct"
- TRUE "true"
- TYPE "Type"
- VAR "var"
- WHILE "while"
- CONTINUATION_TYPE "__Continuation"
- CONTINUATION "__continuation"
- RUN "__run"
- AWAIT "__await"
- UNDERSCORE "_"
- identifier [A-Za-z_][A-Za-z0-9_]*
- integer_literal [0-9]+
- horizontal_whitespace [ \t\r]
- whitespace [ \t\r\n]
- operand_start [(A-Za-z0-9_"]
- %{
- // This macro is expanded immediately before each action specified below.
- //
- // Advances the current token position by yyleng columns without changing
- // the line number, and takes us out of the after-whitespace / after-operand
- // state.
- # define YY_USER_ACTION \
- context.current_token_position.columns(yyleng); \
- if (YY_START == AFTER_WHITESPACE || \
- YY_START == AFTER_OPERAND) { \
- BEGIN(INITIAL); \
- }
- %}
- %%
- %{
- // Code run each time yylex is called.
- // Begin with an empty token span starting where its previous end was.
- context.current_token_position.step();
- %}
- {AND} { return Carbon::Parser::make_AND(context.current_token_position); }
- {ARROW} { return Carbon::Parser::make_ARROW(context.current_token_position); }
- {AUTO} { return Carbon::Parser::make_AUTO(context.current_token_position); }
- {BOOL} { return Carbon::Parser::make_BOOL(context.current_token_position); }
- {BREAK} { return Carbon::Parser::make_BREAK(context.current_token_position); }
- {CASE} { return Carbon::Parser::make_CASE(context.current_token_position); }
- {CHOICE} { return Carbon::Parser::make_CHOICE(context.current_token_position); }
- {CONTINUE} { return Carbon::Parser::make_CONTINUE(context.current_token_position); }
- {DBLARROW} { return Carbon::Parser::make_DBLARROW(context.current_token_position); }
- {DEFAULT} { return Carbon::Parser::make_DEFAULT(context.current_token_position); }
- {ELSE} { return Carbon::Parser::make_ELSE(context.current_token_position); }
- "==" { return Carbon::Parser::make_EQUAL_EQUAL(context.current_token_position); }
- {FALSE} { return Carbon::Parser::make_FALSE(context.current_token_position); }
- {FN} { return Carbon::Parser::make_FN(context.current_token_position); }
- {FNTY} { return Carbon::Parser::make_FNTY(context.current_token_position); }
- {IF} { return Carbon::Parser::make_IF(context.current_token_position); }
- {INT} { return Carbon::Parser::make_INT(context.current_token_position); }
- {MATCH} { return Carbon::Parser::make_MATCH(context.current_token_position); }
- {NOT} { return Carbon::Parser::make_NOT(context.current_token_position); }
- {OR} { return Carbon::Parser::make_OR(context.current_token_position); }
- {RETURN} { return Carbon::Parser::make_RETURN(context.current_token_position); }
- {STRUCT} { return Carbon::Parser::make_STRUCT(context.current_token_position); }
- {TRUE} { return Carbon::Parser::make_TRUE(context.current_token_position); }
- {TYPE} { return Carbon::Parser::make_TYPE(context.current_token_position); }
- {VAR} { return Carbon::Parser::make_VAR(context.current_token_position); }
- {WHILE} { return Carbon::Parser::make_WHILE(context.current_token_position); }
- {CONTINUATION_TYPE} { return Carbon::Parser::make_CONTINUATION_TYPE(context.current_token_position); }
- {CONTINUATION} { return Carbon::Parser::make_CONTINUATION(context.current_token_position); }
- {RUN} { return Carbon::Parser::make_RUN(context.current_token_position); }
- {AWAIT} { return Carbon::Parser::make_AWAIT(context.current_token_position); }
- {UNDERSCORE} { return Carbon::Parser::make_UNDERSCORE(context.current_token_position); }
- "=" return Carbon::Parser::make_EQUAL(context.current_token_position);
- "-" return Carbon::Parser::make_MINUS(context.current_token_position);
- "+" return Carbon::Parser::make_PLUS(context.current_token_position);
- "/" return Carbon::Parser::make_SLASH(context.current_token_position);
- "(" return Carbon::Parser::make_LEFT_PARENTHESIS(context.current_token_position);
- ")" { BEGIN(AFTER_OPERAND); return Carbon::Parser::make_RIGHT_PARENTHESIS(context.current_token_position); }
- "{" return Carbon::Parser::make_LEFT_CURLY_BRACE(context.current_token_position);
- "}" { BEGIN(AFTER_OPERAND); return Carbon::Parser::make_RIGHT_CURLY_BRACE(context.current_token_position); }
- "[" return Carbon::Parser::make_LEFT_SQUARE_BRACKET(context.current_token_position);
- "]" { BEGIN(AFTER_OPERAND); return Carbon::Parser::make_RIGHT_SQUARE_BRACKET(context.current_token_position); }
- "." return Carbon::Parser::make_PERIOD(context.current_token_position);
- "," return Carbon::Parser::make_COMMA(context.current_token_position);
- ";" return Carbon::Parser::make_SEMICOLON(context.current_token_position);
- ":!" return Carbon::Parser::make_COLON_BANG(context.current_token_position);
- ":" return Carbon::Parser::make_COLON(context.current_token_position);
- /*
- For a `*` operator, we look at whitespace and local context to determine the
- arity and fixity. There are two ways to write a binary operator:
- 1) Whitespace on both sides.
- 2) Whitespace on neither side, and the previous token is considered to be
- the end of an operand, and the next token is considered to be the start
- of an operand.
- Otherwise, the operator is unary, but we also check for whitespace to help
- the parser enforce the rule that whitespace is not permitted between the
- operator and its operand, leading to three more cases:
- 3) Whitespace before (but implicitly not after, because that would give a
- longer match and hit case 1): this can only be a prefix operator.
- 4) Whitespace after and not before: this can only be a postfix operator.
- 5) No whitespace on either side (otherwise the longest match would take us
- to case 4): this is a unary operator and could be either prefix or
- postfix.
- */
- <AFTER_WHITESPACE>"*"{whitespace}+ /*case 1*/ {
- BEGIN(AFTER_WHITESPACE);
- return Carbon::Parser::make_BINARY_STAR(context.current_token_position);
- }
- <AFTER_OPERAND>"*"/{operand_start} /*case 2*/ {
- return Carbon::Parser::make_BINARY_STAR(context.current_token_position);
- }
- <AFTER_WHITESPACE>"*" /*case 3*/ {
- return Carbon::Parser::make_PREFIX_STAR(context.current_token_position);
- }
- <INITIAL,AFTER_OPERAND>"*"{whitespace}+ /*case 4*/ {
- BEGIN(AFTER_WHITESPACE);
- return Carbon::Parser::make_POSTFIX_STAR(context.current_token_position);
- }
- <INITIAL,AFTER_OPERAND>"*" /*case 5*/ {
- return Carbon::Parser::make_UNARY_STAR(context.current_token_position);
- }
- {identifier} {
- BEGIN(AFTER_OPERAND);
- return Carbon::Parser::make_identifier(yytext, context.current_token_position);
- }
- {integer_literal} {
- BEGIN(AFTER_OPERAND);
- auto r = atof(yytext);
- return Carbon::Parser::make_integer_literal(r, context.current_token_position);
- }
- {ONE_LINE_COMMENT} {
- // Advance end by 1 line, resetting the column to zero.
- context.current_token_position.lines(1);
- // Make the span empty by setting start to end.
- context.current_token_position.step();
- }
- {horizontal_whitespace}+ {
- // Make the span empty by setting start to end.
- context.current_token_position.step();
- BEGIN(AFTER_WHITESPACE);
- }
- \n+ {
- // Advance end by yyleng lines, resetting the column to zero.
- context.current_token_position.lines(yyleng);
- // Make the span empty by setting start to end.
- context.current_token_position.step();
- BEGIN(AFTER_WHITESPACE);
- }
- . {
- if (Carbon::tracing_output) {
- // Print a newline because tracing prints an incomplete line
- // "Reading a token: ".
- std::cerr << std::endl;
- }
- std::cerr << context.current_token_position << ": invalid character '\\x"
- << llvm::toHex(llvm::StringRef(yytext, 1)) << "' in source file." << std::endl;
- std::exit(1);
- }
- <<EOF>> {
- // A more modern Bison would give us make_EOF.
- return Carbon::Parser::make_END_OF_FILE(context.current_token_position);
- }
- %%
|