tomteb
/
carbon-lang


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
							/*
Part of the Carbon Language project, under the Apache License v2.0 with LLVM
Exceptions. See /LICENSE for license information.
SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

%{
#include <cstdlib>
#include <iostream>

#include "executable_semantics/common/tracing_flag.h"
#include "executable_semantics/syntax/parse_and_lex_context.h"
#include "llvm/ADT/StringExtras.h"
%}

/* Turn off legacy bits we don't need */
%option noyywrap nounput nodefault noinput

/* maintains the number of the current line read from input in the
   global variable yylineno.
*/
%option yylineno

/* Lexing a token immediately after consuming some whitespace. */
%s AFTER_WHITESPACE
/* Lexing a token immediately after consuming an operand-ending token:
 * a closing bracket, identifier, or literal.
 */
%s AFTER_OPERAND

AND               "and"
ARROW             "->"
AUTO              "auto"
BOOL              "Bool"
BREAK             "break"
CASE              "case"
CHOICE            "choice"
ONE_LINE_COMMENT  \/\/[^\n]*\n
CONTINUE          "continue"
DBLARROW          "=>"
DEFAULT           "default"
ELSE              "else"
EQUAL_EQUAL       "=="
FALSE             "false"
FN                "fn"
FNTY              "fnty"
IF                "if"
INT               "Int"
MATCH             "match"
NOT               "not"
OR                "or"
RETURN            "return"
STRUCT            "struct"
TRUE              "true"
TYPE              "Type"
VAR               "var"
WHILE             "while"
CONTINUATION_TYPE "__Continuation"
CONTINUATION      "__continuation"
RUN               "__run"
AWAIT             "__await"
UNDERSCORE        "_"

identifier    [A-Za-z_][A-Za-z0-9_]*
integer_literal   [0-9]+
horizontal_whitespace [ \t\r]
whitespace [ \t\r\n]
operand_start [(A-Za-z0-9_"]

%{
  // This macro is expanded immediately before each action specified below.
  //
  // Advances the current token position by yyleng columns without changing
  // the line number, and takes us out of the after-whitespace / after-operand
  // state.
  # define YY_USER_ACTION \
      context.current_token_position.columns(yyleng); \
      if (YY_START == AFTER_WHITESPACE || \
          YY_START == AFTER_OPERAND) { \
        BEGIN(INITIAL); \
      }
%}

%%

%{
  // Code run each time yylex is called.

  // Begin with an empty token span starting where its previous end was.
  context.current_token_position.step();
%}

{AND}      { return Carbon::Parser::make_AND(context.current_token_position); }
{ARROW}    { return Carbon::Parser::make_ARROW(context.current_token_position); }
{AUTO}     { return Carbon::Parser::make_AUTO(context.current_token_position); }
{BOOL}     { return Carbon::Parser::make_BOOL(context.current_token_position); }
{BREAK}    { return Carbon::Parser::make_BREAK(context.current_token_position); }
{CASE}     { return Carbon::Parser::make_CASE(context.current_token_position); }
{CHOICE}   { return Carbon::Parser::make_CHOICE(context.current_token_position); }
{CONTINUE} { return Carbon::Parser::make_CONTINUE(context.current_token_position); }
{DBLARROW} { return Carbon::Parser::make_DBLARROW(context.current_token_position); }
{DEFAULT}  { return Carbon::Parser::make_DEFAULT(context.current_token_position); }
{ELSE}     { return Carbon::Parser::make_ELSE(context.current_token_position); }
"=="       { return Carbon::Parser::make_EQUAL_EQUAL(context.current_token_position); }
{FALSE}    { return Carbon::Parser::make_FALSE(context.current_token_position); }
{FN}       { return Carbon::Parser::make_FN(context.current_token_position); }
{FNTY}     { return Carbon::Parser::make_FNTY(context.current_token_position); }
{IF}       { return Carbon::Parser::make_IF(context.current_token_position); }
{INT}      { return Carbon::Parser::make_INT(context.current_token_position); }
{MATCH}    { return Carbon::Parser::make_MATCH(context.current_token_position); }
{NOT}      { return Carbon::Parser::make_NOT(context.current_token_position); }
{OR}       { return Carbon::Parser::make_OR(context.current_token_position); }
{RETURN}   { return Carbon::Parser::make_RETURN(context.current_token_position); }
{STRUCT}   { return Carbon::Parser::make_STRUCT(context.current_token_position); }
{TRUE}     { return Carbon::Parser::make_TRUE(context.current_token_position); }
{TYPE}     { return Carbon::Parser::make_TYPE(context.current_token_position); }
{VAR}      { return Carbon::Parser::make_VAR(context.current_token_position); }
{WHILE}    { return Carbon::Parser::make_WHILE(context.current_token_position); }
{CONTINUATION_TYPE} { return Carbon::Parser::make_CONTINUATION_TYPE(context.current_token_position); }
{CONTINUATION}    { return Carbon::Parser::make_CONTINUATION(context.current_token_position); }
{RUN}      { return Carbon::Parser::make_RUN(context.current_token_position); }
{AWAIT}    { return Carbon::Parser::make_AWAIT(context.current_token_position); }
{UNDERSCORE}      { return Carbon::Parser::make_UNDERSCORE(context.current_token_position); }

"=" return Carbon::Parser::make_EQUAL(context.current_token_position);
"-" return Carbon::Parser::make_MINUS(context.current_token_position);
"+" return Carbon::Parser::make_PLUS(context.current_token_position);
"/" return Carbon::Parser::make_SLASH(context.current_token_position);
"(" return Carbon::Parser::make_LEFT_PARENTHESIS(context.current_token_position);
")" { BEGIN(AFTER_OPERAND); return Carbon::Parser::make_RIGHT_PARENTHESIS(context.current_token_position); }
"{" return Carbon::Parser::make_LEFT_CURLY_BRACE(context.current_token_position);
"}" { BEGIN(AFTER_OPERAND); return Carbon::Parser::make_RIGHT_CURLY_BRACE(context.current_token_position); }
"[" return Carbon::Parser::make_LEFT_SQUARE_BRACKET(context.current_token_position);
"]" { BEGIN(AFTER_OPERAND); return Carbon::Parser::make_RIGHT_SQUARE_BRACKET(context.current_token_position); }
"." return Carbon::Parser::make_PERIOD(context.current_token_position);
"," return Carbon::Parser::make_COMMA(context.current_token_position);
";" return Carbon::Parser::make_SEMICOLON(context.current_token_position);
":!" return Carbon::Parser::make_COLON_BANG(context.current_token_position);
":" return Carbon::Parser::make_COLON(context.current_token_position);

 /*
For a `*` operator, we look at whitespace and local context to determine the
arity and fixity. There are two ways to write a binary operator:

 1) Whitespace on both sides.
 2) Whitespace on neither side, and the previous token is considered to be
    the end of an operand, and the next token is considered to be the start
    of an operand.

Otherwise, the operator is unary, but we also check for whitespace to help
the parser enforce the rule that whitespace is not permitted between the
operator and its operand, leading to three more cases:

 3) Whitespace before (but implicitly not after, because that would give a
    longer match and hit case 1): this can only be a prefix operator.
 4) Whitespace after and not before: this can only be a postfix operator.
 5) No whitespace on either side (otherwise the longest match would take us
    to case 4): this is a unary operator and could be either prefix or
    postfix.
*/
<AFTER_WHITESPACE>"*"{whitespace}+ /*case 1*/ {
  BEGIN(AFTER_WHITESPACE);
  return Carbon::Parser::make_BINARY_STAR(context.current_token_position);
}
<AFTER_OPERAND>"*"/{operand_start} /*case 2*/ {
  return Carbon::Parser::make_BINARY_STAR(context.current_token_position);
}
<AFTER_WHITESPACE>"*" /*case 3*/ {
  return Carbon::Parser::make_PREFIX_STAR(context.current_token_position);
}
<INITIAL,AFTER_OPERAND>"*"{whitespace}+ /*case 4*/ {
  BEGIN(AFTER_WHITESPACE);
  return Carbon::Parser::make_POSTFIX_STAR(context.current_token_position);
}
<INITIAL,AFTER_OPERAND>"*" /*case 5*/ {
  return Carbon::Parser::make_UNARY_STAR(context.current_token_position);
}

{identifier} {
  BEGIN(AFTER_OPERAND);
  return Carbon::Parser::make_identifier(yytext, context.current_token_position);
}

{integer_literal} {
  BEGIN(AFTER_OPERAND);
  auto r = atof(yytext);
  return Carbon::Parser::make_integer_literal(r, context.current_token_position);
}

{ONE_LINE_COMMENT} {
  // Advance end by 1 line, resetting the column to zero.
  context.current_token_position.lines(1);
  // Make the span empty by setting start to end.
  context.current_token_position.step();
}

{horizontal_whitespace}+ {
  // Make the span empty by setting start to end.
  context.current_token_position.step();
  BEGIN(AFTER_WHITESPACE);
}

\n+ {
  // Advance end by yyleng lines, resetting the column to zero.
  context.current_token_position.lines(yyleng);
  // Make the span empty by setting start to end.
  context.current_token_position.step();
  BEGIN(AFTER_WHITESPACE);
}

. {
  if (Carbon::tracing_output) {
    // Print a newline because tracing prints an incomplete line
    // "Reading a token: ".
    std::cerr << std::endl;
  }
  std::cerr << context.current_token_position << ": invalid character '\\x"
            << llvm::toHex(llvm::StringRef(yytext, 1)) << "' in source file." << std::endl;
  std::exit(1);
}

<<EOF>>    {
  // A more modern Bison would give us make_EOF.
  return Carbon::Parser::make_END_OF_FILE(context.current_token_position);
}

%%