grammar.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. /*
  2. * Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  3. * Exceptions. See /LICENSE for license information.
  4. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  5. */
  6. // This grammar is more permissive than toolchain because it is geared towards
  7. // editor use.
  8. function repeat_sep1(thing, sep) {
  9. return seq(thing, repeat(seq(sep, thing)));
  10. }
  11. function comma_sep(thing) {
  12. // Trailing comma is only allowed if there is at least one element.
  13. return optional(seq(repeat_sep1(thing, ','), optional(',')));
  14. }
  15. // This is based on toolchain/parser/precedence.cpp
  16. const PREC = {
  17. TermPrefix: 12,
  18. TermPostfix: 12,
  19. NumericPrefix: 11,
  20. NumericPostfix: 11,
  21. Multiplicative: 10,
  22. Additive: 9,
  23. BitwisePrefix: 8,
  24. BitwiseAnd: 7,
  25. BitwiseOr: 7,
  26. BitwiseXor: 7,
  27. BitShift: 7,
  28. TypePostfix: 6,
  29. LogicalPrefix: 5,
  30. Relational: 4,
  31. LogicalAnd: 3,
  32. LogicalOr: 3,
  33. IfExpression: 2,
  34. WhereClause: 1,
  35. };
  36. module.exports = grammar({
  37. name: 'carbon',
  38. word: ($) => $.ident,
  39. conflicts: ($) => [
  40. [$.paren_pattern, $.paren_expression],
  41. [$.struct_literal, $.struct_type_literal],
  42. ],
  43. extras: ($) => [/\s/, $.comment],
  44. // NOTE: This must match the order in src/scanner.c, names are not used for matching.
  45. externals: ($) => [$.binary_star, $.postfix_star],
  46. rules: {
  47. source_file: ($) =>
  48. seq(
  49. optional($.package_directive),
  50. repeat($.import_directive),
  51. repeat($.declaration)
  52. ),
  53. api_or_impl: ($) => choice('api', 'impl'),
  54. library_path: ($) => seq('library', $.string_literal),
  55. package_directive: ($) =>
  56. seq('package', $.ident, optional($.library_path), $.api_or_impl, ';'),
  57. import_directive: ($) =>
  58. seq('import', $.ident, optional($.library_path), ';'),
  59. comment: ($) => token(seq('//', /.*/)),
  60. // NOTE: this must be before ident rule to increase its priority.
  61. // https://github.com/carbon-language/carbon-lang/blob/trunk/proposals/p2015.md#syntax
  62. numeric_type_literal: ($) => /[iuf][1-9][0-9]*/,
  63. ident: ($) => /[A-Za-z_][A-Za-z0-9_]*/,
  64. bool_literal: ($) => choice('true', 'false'),
  65. numeric_literal: ($) => {
  66. // This is using variables because rules are not allowed in
  67. // token.immediate and token.
  68. // https://github.com/tree-sitter/tree-sitter/issues/449
  69. const decimal_integer_literal = choice('0', /[1-9](_?[0-9])*/);
  70. const hex_digits = /[0-9A-F](_?[0-9A-F])*/;
  71. const binary_integer_literal = /0b[01](_?[01])*/;
  72. const hex_integer_literal = seq('0x', token.immediate(hex_digits));
  73. const decimal_real_number_literal = seq(
  74. decimal_integer_literal,
  75. token.immediate(/\.[0-9](_?[0-9])*/),
  76. optional(
  77. seq(
  78. token.immediate(/e[+-]?/),
  79. token.immediate(decimal_integer_literal)
  80. )
  81. )
  82. );
  83. const hex_real_number_literal = seq(
  84. hex_integer_literal,
  85. token.immediate('.'),
  86. token.immediate(hex_digits),
  87. optional(
  88. seq(
  89. token.immediate(/p[+-]?/),
  90. token.immediate(decimal_integer_literal)
  91. )
  92. )
  93. );
  94. return token(
  95. choice(
  96. decimal_integer_literal,
  97. binary_integer_literal,
  98. hex_integer_literal,
  99. decimal_real_number_literal,
  100. hex_real_number_literal
  101. )
  102. );
  103. },
  104. _string_content: ($) => token.immediate(/[^\\"]+/),
  105. escape_sequence: ($) =>
  106. token.immediate(
  107. seq(
  108. '\\',
  109. choice(
  110. 'n',
  111. 't',
  112. 'r',
  113. "'",
  114. '"',
  115. '\\',
  116. '0',
  117. /x[0-9A-F]{2}/,
  118. /u\{[0-9A-F]+\}/
  119. )
  120. )
  121. ),
  122. // TODO: multiline string
  123. string_literal: ($) =>
  124. seq(
  125. '"',
  126. repeat(choice($._string_content, $.escape_sequence)),
  127. token.immediate('"')
  128. ),
  129. array_literal: ($) =>
  130. seq(
  131. '[',
  132. field('type', $._expression),
  133. ';',
  134. optional(field('size', $._expression)),
  135. ']'
  136. ),
  137. struct_literal: ($) =>
  138. seq('{', comma_sep(seq($.designator, '=', $._expression)), '}'),
  139. struct_type_literal: ($) =>
  140. seq('{', comma_sep(seq($.designator, ':', $._expression)), '}'),
  141. builtin_type: ($) => choice('Self', 'String', 'bool', 'type'),
  142. literal: ($) =>
  143. choice(
  144. $.bool_literal,
  145. $.numeric_literal,
  146. $.numeric_type_literal,
  147. $.string_literal,
  148. $.struct_literal,
  149. $.struct_type_literal
  150. ),
  151. _binding_lhs: ($) => choice($.ident, '_'),
  152. paren_pattern: ($) =>
  153. seq(
  154. '(',
  155. comma_sep(choice($._pattern_without_expression, $._expression)),
  156. ')'
  157. ),
  158. _pattern_without_expression: ($) =>
  159. choice(
  160. 'auto',
  161. seq($._binding_lhs, ':', $._expression),
  162. seq($._binding_lhs, ':!', $._expression),
  163. seq('template', $._binding_lhs, ':!', $._expression),
  164. seq('var', $._pattern),
  165. $.paren_pattern,
  166. // alternative patterns
  167. // example: Optional(i32).Some(x: i32)
  168. seq($._simple_expression, $.paren_pattern)
  169. ),
  170. _pattern: ($) =>
  171. choice($._pattern_without_expression, $._simple_expression),
  172. unary_prefix_expression: ($) => {
  173. const table = [
  174. [PREC.NumericPrefix, '-'],
  175. [PREC.NumericPrefix, '--'],
  176. [PREC.NumericPrefix, '++'],
  177. [PREC.BitwisePrefix, '^'],
  178. [PREC.LogicalPrefix, 'not'],
  179. ];
  180. return choice(
  181. ...table.map(([precedence, operator]) =>
  182. prec(
  183. precedence,
  184. seq(field('operator', operator), field('value', $._expression))
  185. )
  186. )
  187. );
  188. },
  189. binary_expression: ($) => {
  190. const table = [
  191. [PREC.LogicalAnd, 'and'],
  192. [PREC.LogicalOr, 'or'],
  193. [PREC.BitwiseAnd, '&'],
  194. [PREC.BitwiseOr, '|'],
  195. [PREC.BitwiseXor, '^'],
  196. [PREC.BitShift, choice('<<', '>>')],
  197. [PREC.Relational, choice('==', '!=', '<', '<=', '>', '>=')],
  198. [PREC.Additive, choice('+', '-')],
  199. [PREC.Multiplicative, choice($.binary_star, '/', '%')],
  200. ];
  201. return choice(
  202. ...table.map(([precedence, operator]) =>
  203. prec.left(
  204. precedence,
  205. seq(
  206. field('left', $._expression),
  207. field('operator', operator),
  208. field('right', $._expression)
  209. )
  210. )
  211. )
  212. );
  213. },
  214. // This should be non-associative but conflicts are not allowed in tree-sitter
  215. as_expression: ($) => prec.left(seq($._expression, 'as', $._expression)),
  216. ref_expression: ($) =>
  217. prec.right(PREC.TermPrefix, seq('&', $._simple_expression)),
  218. deref_expression: ($) =>
  219. prec.right(PREC.TermPrefix, seq('*', $._simple_expression)),
  220. fn_type_expression: ($) =>
  221. prec.left(seq('__Fn', $.paren_expression, '->', $._simple_expression)),
  222. if_expression: ($) =>
  223. prec(
  224. PREC.IfExpression,
  225. seq('if', $._expression, 'then', $._expression, 'else', $._expression)
  226. ),
  227. paren_expression: ($) => seq('(', comma_sep($._expression), ')'),
  228. index_expression: ($) =>
  229. prec(
  230. PREC.TermPostfix,
  231. seq($._simple_expression, '[', $._expression, ']')
  232. ),
  233. designator: ($) => seq('.', choice('base', $.ident)),
  234. postfix_expression: ($) =>
  235. prec(
  236. PREC.TermPostfix,
  237. seq(
  238. $._simple_expression,
  239. choice(
  240. '++',
  241. '--',
  242. $.designator,
  243. seq('->', $.ident),
  244. seq(choice('.', '->'), '(', $._expression, ')')
  245. )
  246. )
  247. ),
  248. where_clause: ($) =>
  249. choice(
  250. seq($._simple_expression, '==', $._simple_expression),
  251. seq($._simple_expression, 'impls', $._simple_expression),
  252. seq($._simple_expression, '=', $._simple_expression),
  253. prec.left(
  254. PREC.WhereClause + 1,
  255. seq($.where_clause, 'and', $.where_clause)
  256. )
  257. ),
  258. where_expression: ($) =>
  259. prec.left(PREC.WhereClause, seq($._expression, 'where', $.where_clause)),
  260. call_expression: ($) =>
  261. prec(PREC.TermPostfix, seq($._simple_expression, $.paren_expression)),
  262. pointer_expression: ($) =>
  263. prec(PREC.TypePostfix, seq($._simple_expression, $.postfix_star)),
  264. _simple_expression: ($) =>
  265. choice(
  266. $.array_literal,
  267. $.builtin_type,
  268. $.call_expression,
  269. $.deref_expression,
  270. $.fn_type_expression,
  271. $.ident,
  272. $.index_expression,
  273. $.literal,
  274. $.paren_expression,
  275. $.pointer_expression,
  276. $.postfix_expression,
  277. $.ref_expression,
  278. 'self',
  279. '.Self',
  280. $.designator
  281. ),
  282. _expression: ($) =>
  283. choice(
  284. $.as_expression,
  285. $.binary_expression,
  286. $.if_expression,
  287. $.unary_prefix_expression,
  288. $.where_expression,
  289. $._simple_expression
  290. ),
  291. var_declaration: ($) =>
  292. seq(
  293. 'var',
  294. $._pattern_without_expression,
  295. optional(seq('=', $._expression)),
  296. ';'
  297. ),
  298. let_declaration: ($) =>
  299. seq('let', $._pattern_without_expression, '=', $._expression, ';'),
  300. assign_statement: ($) =>
  301. seq($._expression, $._assign_operator, $._expression, ';'),
  302. _assign_operator: ($) =>
  303. choice('=', '+=', '/=', '*=', '%=', '-=', '&=', '|=', '^=', '<<=', '>>='),
  304. match_clause: ($) =>
  305. seq(choice(seq('case', $._pattern), 'default'), '=>', $.block),
  306. match_statement: ($) =>
  307. seq('match', '(', $._expression, ')', '{', repeat($.match_clause), '}'),
  308. returned_var_statement: ($) => seq('returned', $.var_declaration),
  309. while_statement: ($) => seq('while', '(', $._expression, ')', $.block),
  310. break_statement: ($) => seq('break', ';'),
  311. continue_statement: ($) => seq('continue', ';'),
  312. return_statement: ($) =>
  313. seq('return', optional(choice('var', $._expression)), ';'),
  314. if_statement: ($) =>
  315. seq('if', '(', $._expression, ')', $.block, optional($.else)),
  316. else: ($) => choice(seq('else', $.if_statement), seq('else', $.block)),
  317. for_statement: ($) =>
  318. seq('for', '(', $._pattern, 'in', $._expression, ')', $.block),
  319. statement: ($) =>
  320. choice(
  321. seq($._expression, ';'),
  322. $.assign_statement,
  323. $.var_declaration,
  324. $.let_declaration,
  325. $.match_statement,
  326. $.returned_var_statement,
  327. $.if_statement,
  328. $.while_statement,
  329. $.break_statement,
  330. $.continue_statement,
  331. $.return_statement,
  332. $.for_statement
  333. ),
  334. block: ($) => seq('{', repeat($.statement), '}'),
  335. declared_name: ($) => repeat_sep1($.ident, '.'),
  336. generic_binding: ($) =>
  337. seq(optional('template'), $.ident, ':!', $._expression),
  338. deduced_param: ($) =>
  339. choice(
  340. $.generic_binding,
  341. seq(optional('addr'), 'self', ':', $._expression)
  342. ),
  343. deduced_params: ($) => seq('[', comma_sep($.deduced_param), ']'),
  344. return_type: ($) => seq('->', choice('auto', $._expression)),
  345. function_declaration: ($) =>
  346. seq(
  347. optional(choice('abstract', 'virtual', 'impl')),
  348. 'fn',
  349. $.declared_name,
  350. optional($.deduced_params),
  351. $.paren_pattern,
  352. optional($.return_type),
  353. choice($.block, ';')
  354. ),
  355. namespace_declaration: ($) => seq('namespace', $.declared_name, ';'),
  356. alias_declaration: ($) =>
  357. seq('alias', $.declared_name, '=', $._expression, ';'),
  358. type_params: ($) => $.paren_pattern,
  359. interface_body_item: ($) =>
  360. choice(
  361. $.function_declaration,
  362. seq('let', $.generic_binding, ';'),
  363. seq('extend', $._expression, ';'),
  364. seq('require', $._expression, 'impls', $._expression, ';')
  365. ),
  366. interface_body: ($) => seq('{', repeat($.interface_body_item), '}'),
  367. interface_declaration: ($) =>
  368. seq(
  369. 'interface',
  370. $.declared_name,
  371. optional($.deduced_params),
  372. optional($.type_params),
  373. choice(';', $.interface_body)
  374. ),
  375. constraint_declaration: ($) =>
  376. seq(
  377. 'constraint',
  378. $.declared_name,
  379. optional($.deduced_params),
  380. optional($.type_params),
  381. choice(';', $.interface_body)
  382. ),
  383. impl_body_item: ($) => choice($.function_declaration, $.alias_declaration),
  384. impl_body: ($) => seq('{', repeat($.impl_body_item), '}'),
  385. impl_declaration: ($) =>
  386. seq(
  387. 'impl',
  388. optional(seq('forall', $.deduced_params)),
  389. optional($._expression),
  390. 'as',
  391. $._expression,
  392. $.impl_body
  393. ),
  394. extend_impl_declaration: ($) =>
  395. seq('extend', 'impl', 'as', $._expression, $.impl_body),
  396. extend_base_declaration: ($) =>
  397. seq('extend', 'base', ':', $._expression, ';'),
  398. destructor_declaration: ($) =>
  399. seq(
  400. optional(choice('virtual', 'impl')),
  401. 'destructor',
  402. optional($.deduced_params),
  403. choice($.block, ';')
  404. ),
  405. class_body_item: ($) =>
  406. choice(
  407. $.declaration,
  408. $.extend_base_declaration,
  409. $.extend_impl_declaration,
  410. $.mix_declaration,
  411. $.destructor_declaration
  412. ),
  413. class_body: ($) => seq('{', repeat($.class_body_item), '}'),
  414. class_declaration: ($) =>
  415. seq(
  416. optional(choice('base', 'abstract')),
  417. 'class',
  418. $.declared_name,
  419. optional($.deduced_params),
  420. optional($.type_params),
  421. choice(';', $.class_body)
  422. ),
  423. choice_declaration: ($) =>
  424. seq(
  425. 'choice',
  426. $.declared_name,
  427. optional($.type_params),
  428. '{',
  429. comma_sep(seq($.ident, optional($.paren_expression))),
  430. '}'
  431. ),
  432. empty_declaration: ($) => ';',
  433. declaration: ($) =>
  434. choice(
  435. $.empty_declaration,
  436. $.namespace_declaration,
  437. $.var_declaration,
  438. $.let_declaration,
  439. $.function_declaration,
  440. $.alias_declaration,
  441. $.interface_declaration,
  442. $.constraint_declaration,
  443. $.impl_declaration,
  444. $.class_declaration,
  445. $.choice_declaration,
  446. $.mixin_declaration,
  447. $.match_first_declaration
  448. ),
  449. // Explorer only experimental featurues
  450. mix_declaration: ($) => seq('__mix', $._expression, ';'),
  451. mixin_declaration: ($) =>
  452. seq(
  453. '__mixin',
  454. $.declared_name,
  455. optional($.type_params),
  456. optional(seq('for', $._expression)),
  457. '{',
  458. repeat(choice($.function_declaration, $.mix_declaration)),
  459. '}'
  460. ),
  461. match_first_declaration: ($) =>
  462. seq('__match_first', '{', repeat($.impl_declaration), '}'),
  463. },
  464. });