| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687 |
- // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
- // Exceptions. See /LICENSE for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- #include "tree_sitter/parser.h"
- enum TokenType {
- BINARY_STAR,
- POSTFIX_STAR,
- };
- // our scanner is stateless
- void* tree_sitter_carbon_external_scanner_create() { return NULL; }
- unsigned tree_sitter_carbon_external_scanner_serialize(
- __attribute__((unused)) void* payload,
- __attribute__((unused)) char* buffer) {
- return 0; // zero bytes used to serialize
- }
- void tree_sitter_carbon_external_scanner_deserialize(
- __attribute__((unused)) void* payload,
- __attribute__((unused)) const char* buffer,
- __attribute__((unused)) unsigned length) {}
- void tree_sitter_carbon_external_scanner_destroy(
- __attribute__((unused)) void* payload) {}
- // https://github.com/carbon-language/carbon-lang/blob/trunk/docs/design/lexical_conventions/symbolic_tokens.md#overview
- // > the token after the operator must be an identifier, a literal, or any kind
- // of opening bracket (for example, (, [, or {).
- static bool token_allowed_after_binary_operator(char c) {
- return
- // identifier
- c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
- // string literal
- c == '\"' ||
- // TODO: character literal
- // number literal
- (c >= '0' && c <= '9') ||
- // opening bracket
- c == '(' || c == '[' || c == '{';
- }
- static bool is_whitespace(char c) { return c == ' ' || c == '\n'; }
- // https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
- //
- // > If a token in the externals array is valid at a given position in the
- // > parse, the external scanner will be called first before anything else is
- // > done.
- //
- // > But the external scanner may return false and in this case Tree-sitter
- // > fallbacks to the internal lexing mechanism.
- bool tree_sitter_carbon_external_scanner_scan(
- __attribute__((unused)) void* payload,
- __attribute__((unused)) TSLexer* lexer,
- __attribute__((unused)) const bool* valid_symbols) {
- // skip past whitespace if any
- bool whitespace = false;
- while (is_whitespace(lexer->lookahead)) {
- whitespace = true;
- lexer->advance(lexer, /* skip= */ true);
- }
- // if any other symbol than *, fallback to treesitter internal lexer
- if (lexer->lookahead != '*') {
- return false;
- }
- // move to past the *, add * to current token
- lexer->advance(lexer, /* skip= */ false);
- // https://github.com/carbon-language/carbon-lang/blob/trunk/docs/design/lexical_conventions/symbolic_tokens.md
- if (is_whitespace(lexer->lookahead) && whitespace) {
- // foo * bar
- lexer->result_symbol = BINARY_STAR;
- } else if (!whitespace &&
- token_allowed_after_binary_operator(lexer->lookahead)) {
- // foo*bar or foo*(bar)
- lexer->result_symbol = BINARY_STAR;
- } else {
- // foo*
- lexer->result_symbol = POSTFIX_STAR;
- }
- return true;
- }
|