lexer.lpp 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. /*
  2. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  3. Exceptions. See /LICENSE for license information.
  4. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  5. */
  6. %{
  7. #include <cstdlib>
  8. #include "common/check.h"
  9. #include "executable_semantics/common/tracing_flag.h"
  10. #include "executable_semantics/syntax/parse_and_lex_context.h"
  11. #include "llvm/ADT/StringExtras.h"
  12. %}
  13. /* Turn off legacy bits we don't need */
  14. %option noyywrap nounput nodefault noinput
  15. /* maintains the number of the current line read from input in the
  16. global variable yylineno.
  17. */
  18. %option yylineno
  19. /* Lexing a token immediately after consuming some whitespace. */
  20. %s AFTER_WHITESPACE
  21. /* Lexing a token immediately after consuming an operand-ending token:
  22. * a closing bracket, identifier, or literal.
  23. */
  24. %s AFTER_OPERAND
  25. AND "and"
  26. ARROW "->"
  27. AUTO "auto"
  28. BOOL "Bool"
  29. BREAK "break"
  30. CASE "case"
  31. CHOICE "choice"
  32. ONE_LINE_COMMENT \/\/[^\n]*\n
  33. CONTINUE "continue"
  34. DBLARROW "=>"
  35. DEFAULT "default"
  36. ELSE "else"
  37. EQUAL_EQUAL "=="
  38. FALSE "false"
  39. FN "fn"
  40. FNTY "fnty"
  41. IF "if"
  42. MATCH "match"
  43. NOT "not"
  44. OR "or"
  45. RETURN "return"
  46. STRUCT "struct"
  47. TRUE "true"
  48. TYPE "Type"
  49. VAR "var"
  50. WHILE "while"
  51. CONTINUATION_TYPE "__Continuation"
  52. CONTINUATION "__continuation"
  53. RUN "__run"
  54. AWAIT "__await"
  55. UNDERSCORE "_"
  56. identifier [A-Za-z_][A-Za-z0-9_]*
  57. sized_type_literal [iuf][1-9][0-9]*
  58. integer_literal [0-9]+
  59. horizontal_whitespace [ \t\r]
  60. whitespace [ \t\r\n]
  61. operand_start [(A-Za-z0-9_"]
  62. %{
  63. // This macro is expanded immediately before each action specified below.
  64. //
  65. // Advances the current token position by yyleng columns without changing
  66. // the line number, and takes us out of the after-whitespace / after-operand
  67. // state.
  68. # define YY_USER_ACTION \
  69. context.current_token_position.columns(yyleng); \
  70. if (YY_START == AFTER_WHITESPACE || \
  71. YY_START == AFTER_OPERAND) { \
  72. BEGIN(INITIAL); \
  73. }
  74. %}
  75. %%
  76. %{
  77. // Code run each time yylex is called.
  78. // Begin with an empty token span starting where its previous end was.
  79. context.current_token_position.step();
  80. %}
  81. {AND} { return Carbon::Parser::make_AND(context.current_token_position); }
  82. {ARROW} { return Carbon::Parser::make_ARROW(context.current_token_position); }
  83. {AUTO} { return Carbon::Parser::make_AUTO(context.current_token_position); }
  84. {BOOL} { return Carbon::Parser::make_BOOL(context.current_token_position); }
  85. {BREAK} { return Carbon::Parser::make_BREAK(context.current_token_position); }
  86. {CASE} { return Carbon::Parser::make_CASE(context.current_token_position); }
  87. {CHOICE} { return Carbon::Parser::make_CHOICE(context.current_token_position); }
  88. {CONTINUE} { return Carbon::Parser::make_CONTINUE(context.current_token_position); }
  89. {DBLARROW} { return Carbon::Parser::make_DBLARROW(context.current_token_position); }
  90. {DEFAULT} { return Carbon::Parser::make_DEFAULT(context.current_token_position); }
  91. {ELSE} { return Carbon::Parser::make_ELSE(context.current_token_position); }
  92. "==" { return Carbon::Parser::make_EQUAL_EQUAL(context.current_token_position); }
  93. {FALSE} { return Carbon::Parser::make_FALSE(context.current_token_position); }
  94. {FN} { return Carbon::Parser::make_FN(context.current_token_position); }
  95. {FNTY} { return Carbon::Parser::make_FNTY(context.current_token_position); }
  96. {IF} { return Carbon::Parser::make_IF(context.current_token_position); }
  97. {MATCH} { return Carbon::Parser::make_MATCH(context.current_token_position); }
  98. {NOT} { return Carbon::Parser::make_NOT(context.current_token_position); }
  99. {OR} { return Carbon::Parser::make_OR(context.current_token_position); }
  100. {RETURN} { return Carbon::Parser::make_RETURN(context.current_token_position); }
  101. {STRUCT} { return Carbon::Parser::make_STRUCT(context.current_token_position); }
  102. {TRUE} { return Carbon::Parser::make_TRUE(context.current_token_position); }
  103. {TYPE} { return Carbon::Parser::make_TYPE(context.current_token_position); }
  104. {VAR} { return Carbon::Parser::make_VAR(context.current_token_position); }
  105. {WHILE} { return Carbon::Parser::make_WHILE(context.current_token_position); }
  106. {CONTINUATION_TYPE} { return Carbon::Parser::make_CONTINUATION_TYPE(context.current_token_position); }
  107. {CONTINUATION} { return Carbon::Parser::make_CONTINUATION(context.current_token_position); }
  108. {RUN} { return Carbon::Parser::make_RUN(context.current_token_position); }
  109. {AWAIT} { return Carbon::Parser::make_AWAIT(context.current_token_position); }
  110. {UNDERSCORE} { return Carbon::Parser::make_UNDERSCORE(context.current_token_position); }
  111. {sized_type_literal} { return Carbon::Parser::make_sized_type_literal(yytext, context.current_token_position); }
  112. "=" return Carbon::Parser::make_EQUAL(context.current_token_position);
  113. "-" return Carbon::Parser::make_MINUS(context.current_token_position);
  114. "+" return Carbon::Parser::make_PLUS(context.current_token_position);
  115. "/" return Carbon::Parser::make_SLASH(context.current_token_position);
  116. "(" return Carbon::Parser::make_LEFT_PARENTHESIS(context.current_token_position);
  117. ")" { BEGIN(AFTER_OPERAND); return Carbon::Parser::make_RIGHT_PARENTHESIS(context.current_token_position); }
  118. "{" return Carbon::Parser::make_LEFT_CURLY_BRACE(context.current_token_position);
  119. "}" { BEGIN(AFTER_OPERAND); return Carbon::Parser::make_RIGHT_CURLY_BRACE(context.current_token_position); }
  120. "[" return Carbon::Parser::make_LEFT_SQUARE_BRACKET(context.current_token_position);
  121. "]" { BEGIN(AFTER_OPERAND); return Carbon::Parser::make_RIGHT_SQUARE_BRACKET(context.current_token_position); }
  122. "." return Carbon::Parser::make_PERIOD(context.current_token_position);
  123. "," return Carbon::Parser::make_COMMA(context.current_token_position);
  124. ";" return Carbon::Parser::make_SEMICOLON(context.current_token_position);
  125. ":!" return Carbon::Parser::make_COLON_BANG(context.current_token_position);
  126. ":" return Carbon::Parser::make_COLON(context.current_token_position);
  127. /*
  128. For a `*` operator, we look at whitespace and local context to determine the
  129. arity and fixity. There are two ways to write a binary operator:
  130. 1) Whitespace on both sides.
  131. 2) Whitespace on neither side, and the previous token is considered to be
  132. the end of an operand, and the next token is considered to be the start
  133. of an operand.
  134. Otherwise, the operator is unary, but we also check for whitespace to help
  135. the parser enforce the rule that whitespace is not permitted between the
  136. operator and its operand, leading to three more cases:
  137. 3) Whitespace before (but implicitly not after, because that would give a
  138. longer match and hit case 1): this can only be a prefix operator.
  139. 4) Whitespace after and not before: this can only be a postfix operator.
  140. 5) No whitespace on either side (otherwise the longest match would take us
  141. to case 4): this is a unary operator and could be either prefix or
  142. postfix.
  143. */
  144. <AFTER_WHITESPACE>"*"{whitespace}+ /*case 1*/ {
  145. BEGIN(AFTER_WHITESPACE);
  146. return Carbon::Parser::make_BINARY_STAR(context.current_token_position);
  147. }
  148. <AFTER_OPERAND>"*"/{operand_start} /*case 2*/ {
  149. return Carbon::Parser::make_BINARY_STAR(context.current_token_position);
  150. }
  151. <AFTER_WHITESPACE>"*" /*case 3*/ {
  152. return Carbon::Parser::make_PREFIX_STAR(context.current_token_position);
  153. }
  154. <INITIAL,AFTER_OPERAND>"*"{whitespace}+ /*case 4*/ {
  155. BEGIN(AFTER_WHITESPACE);
  156. return Carbon::Parser::make_POSTFIX_STAR(context.current_token_position);
  157. }
  158. <INITIAL,AFTER_OPERAND>"*" /*case 5*/ {
  159. return Carbon::Parser::make_UNARY_STAR(context.current_token_position);
  160. }
  161. {identifier} {
  162. BEGIN(AFTER_OPERAND);
  163. return Carbon::Parser::make_identifier(yytext, context.current_token_position);
  164. }
  165. {integer_literal} {
  166. BEGIN(AFTER_OPERAND);
  167. int val;
  168. CHECK(llvm::to_integer(yytext, val));
  169. return Carbon::Parser::make_integer_literal(val, context.current_token_position);
  170. }
  171. {ONE_LINE_COMMENT} {
  172. // Advance end by 1 line, resetting the column to zero.
  173. context.current_token_position.lines(1);
  174. // Make the span empty by setting start to end.
  175. context.current_token_position.step();
  176. }
  177. {horizontal_whitespace}+ {
  178. // Make the span empty by setting start to end.
  179. context.current_token_position.step();
  180. BEGIN(AFTER_WHITESPACE);
  181. }
  182. \n+ {
  183. // Advance end by yyleng lines, resetting the column to zero.
  184. context.current_token_position.lines(yyleng);
  185. // Make the span empty by setting start to end.
  186. context.current_token_position.step();
  187. BEGIN(AFTER_WHITESPACE);
  188. }
  189. . {
  190. if (Carbon::tracing_output) {
  191. // Print a newline because tracing prints an incomplete line
  192. // "Reading a token: ".
  193. llvm::errs() << "\n";
  194. }
  195. FATAL_COMPILATION_ERROR(yylineno) << "invalid character '\\x"
  196. << llvm::toHex(llvm::StringRef(yytext, 1)) << "' in source file.";
  197. }
  198. <<EOF>> {
  199. // A more modern Bison would give us make_EOF.
  200. return Carbon::Parser::make_END_OF_FILE(context.current_token_position);
  201. }
  202. %%