lexer.lpp 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. /*
  2. Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  3. Exceptions. See /LICENSE for license information.
  4. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  5. */
  6. %{
  7. #include <cstdlib>
  8. #include <iostream>
  9. #include "executable_semantics/syntax/parse_and_lex_context.h"
  10. %}
  11. /* Turn off legacy bits we don't need */
  12. %option noyywrap nounput nodefault noinput
  13. /* maintains the number of the current line read from input in the
  14. global variable yylineno.
  15. */
  16. %option yylineno
  17. AND "and"
  18. ARROW "->"
  19. AUTO "auto"
  20. BOOL "Bool"
  21. BREAK "break"
  22. CASE "case"
  23. CHOICE "choice"
  24. ONE_LINE_COMMENT \/\/[^\n]*\n
  25. CONTINUE "continue"
  26. DBLARROW "=>"
  27. DEFAULT "default"
  28. ELSE "else"
  29. EQUAL_EQUAL "=="
  30. FALSE "false"
  31. FN "fn"
  32. FNTY "fnty"
  33. IF "if"
  34. INT "Int"
  35. MATCH "match"
  36. NOT "not"
  37. OR "or"
  38. RETURN "return"
  39. STRUCT "struct"
  40. TRUE "true"
  41. TYPE "Type"
  42. VAR "var"
  43. WHILE "while"
  44. identifier [A-Za-z_][A-Za-z0-9_]*
  45. integer_literal [0-9]+
  46. horizontal_whitespace [ \t\r]
  47. %{
  48. // This macro is expanded to run each time a token is recognized.
  49. //
  50. // Advances the current token position by yyleng columns without changing
  51. // the line number.
  52. # define YY_USER_ACTION context.current_token_position.columns(yyleng);
  53. %}
  54. %%
  55. %{
  56. // Code run each time yylex is called.
  57. // Begin with an empty token span starting where its previous end was.
  58. context.current_token_position.step();
  59. %}
  60. {AND} { return yy::parser::make_AND(context.current_token_position); }
  61. {ARROW} { return yy::parser::make_ARROW(context.current_token_position); }
  62. {AUTO} { return yy::parser::make_AUTO(context.current_token_position); }
  63. {BOOL} { return yy::parser::make_BOOL(context.current_token_position); }
  64. {BREAK} { return yy::parser::make_BREAK(context.current_token_position); }
  65. {CASE} { return yy::parser::make_CASE(context.current_token_position); }
  66. {CHOICE} { return yy::parser::make_CHOICE(context.current_token_position); }
  67. {CONTINUE} { return yy::parser::make_CONTINUE(context.current_token_position); }
  68. {DBLARROW} { return yy::parser::make_DBLARROW(context.current_token_position); }
  69. {DEFAULT} { return yy::parser::make_DEFAULT(context.current_token_position); }
  70. {ELSE} { return yy::parser::make_ELSE(context.current_token_position); }
  71. "==" { return yy::parser::make_EQUAL_EQUAL(context.current_token_position); }
  72. {FALSE} { return yy::parser::make_FALSE(context.current_token_position); }
  73. {FN} { return yy::parser::make_FN(context.current_token_position); }
  74. {FNTY} { return yy::parser::make_FNTY(context.current_token_position); }
  75. {IF} { return yy::parser::make_IF(context.current_token_position); }
  76. {INT} { return yy::parser::make_INT(context.current_token_position); }
  77. {MATCH} { return yy::parser::make_MATCH(context.current_token_position); }
  78. {NOT} { return yy::parser::make_NOT(context.current_token_position); }
  79. {OR} { return yy::parser::make_OR(context.current_token_position); }
  80. {RETURN} { return yy::parser::make_RETURN(context.current_token_position); }
  81. {STRUCT} { return yy::parser::make_STRUCT(context.current_token_position); }
  82. {TRUE} { return yy::parser::make_TRUE(context.current_token_position); }
  83. {TYPE} { return yy::parser::make_TYPE(context.current_token_position); }
  84. {VAR} { return yy::parser::make_VAR(context.current_token_position); }
  85. {WHILE} { return yy::parser::make_WHILE(context.current_token_position); }
  86. "=" return yy::parser::make_EQUAL(context.current_token_position);
  87. "-" return yy::parser::make_MINUS(context.current_token_position);
  88. "+" return yy::parser::make_PLUS(context.current_token_position);
  89. "*" return yy::parser::make_STAR(context.current_token_position);
  90. "/" return yy::parser::make_SLASH(context.current_token_position);
  91. "(" return yy::parser::make_LEFT_PARENTHESIS(context.current_token_position);
  92. ")" return yy::parser::make_RIGHT_PARENTHESIS(context.current_token_position);
  93. "{" return yy::parser::make_LEFT_CURLY_BRACE(context.current_token_position);
  94. "}" return yy::parser::make_RIGHT_CURLY_BRACE(context.current_token_position);
  95. "[" return yy::parser::make_LEFT_SQUARE_BRACKET(context.current_token_position);
  96. "]" return yy::parser::make_RIGHT_SQUARE_BRACKET(context.current_token_position);
  97. "." return yy::parser::make_PERIOD(context.current_token_position);
  98. "," return yy::parser::make_COMMA(context.current_token_position);
  99. ";" return yy::parser::make_SEMICOLON(context.current_token_position);
  100. ":" return yy::parser::make_COLON(context.current_token_position);
  101. {identifier} {
  102. int n = strlen(yytext);
  103. auto r = reinterpret_cast<char*>(malloc((n + 1) * sizeof(char)));
  104. strncpy(r, yytext, n + 1);
  105. return yy::parser::make_identifier(r, context.current_token_position);
  106. }
  107. {integer_literal} {
  108. auto r = atof(yytext);
  109. return yy::parser::make_integer_literal(r, context.current_token_position);
  110. }
  111. {ONE_LINE_COMMENT} {
  112. // Advance end by 1 line, resetting the column to zero.
  113. context.current_token_position.lines(1);
  114. // Make the span empty by setting start to end.
  115. context.current_token_position.step();
  116. }
  117. {horizontal_whitespace}+ {
  118. // Make the span empty by setting start to end.
  119. context.current_token_position.step();
  120. }
  121. \n+ {
  122. // Advance end by yyleng lines, resetting the column to zero.
  123. context.current_token_position.lines(yyleng);
  124. // Make the span empty by setting start to end.
  125. context.current_token_position.step();
  126. }
  127. . {
  128. std::cerr << context.current_token_position << ": invalid character '"
  129. << yytext[0] << "' in source file." << std::endl;
  130. std::exit(1);
  131. }
  132. <<EOF>> {
  133. // A more modern Bison would give us make_EOF.
  134. return yy::parser::make_END_OF_FILE(context.current_token_position);
  135. }
  136. %%