character_set.h 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_LEX_CHARACTER_SET_H_
  5. #define CARBON_TOOLCHAIN_LEX_CHARACTER_SET_H_
  6. #include "llvm/ADT/StringExtras.h"
  7. namespace Carbon::Lex {
  8. // TODO: These definitions need to be updated to match whatever Unicode lexical
  9. // rules we pick. The function interfaces will need to change to accommodate
  10. // multi-byte characters.
  11. // Is this an alphabetical character according to Carbon's lexical rules?
  12. //
  13. // Alphabetical characters are permitted at the start of identifiers. This
  14. // currently includes 'A'..'Z' and 'a'..'z'.
  15. inline auto IsAlpha(char c) -> bool { return llvm::isAlpha(c); }
  16. // Is this a decimal digit according to Carbon's lexical rules?
  17. //
  18. // This currently includes '0'..'9'.
  19. inline auto IsDecimalDigit(char c) -> bool { return llvm::isDigit(c); }
  20. // Is this an alphanumeric character according to Carbon's lexical rules?
  21. //
  22. // Alphanumeric characters are permitted as trailing characters in identifiers
  23. // and numeric literals. This includes alphabetical characters plus decimal
  24. // digits.
  25. //
  26. // Note that '_' is not considered alphanumeric, despite in most circumstances
  27. // being a valid continuation character of an identifier or numeric literal.
  28. inline auto IsAlnum(char c) -> bool { return llvm::isAlnum(c); }
  29. // Is this a hexadecimal digit according to Carbon's lexical rules?
  30. //
  31. // Hexadecimal digits are permitted in `0x`-prefixed literals, as well as after
  32. // a `\x` escape sequence.
  33. //
  34. // Note that lowercase 'a'..'f' are currently not considered hexadecimal digits
  35. // in any context.
  36. inline auto IsUpperHexDigit(char c) -> bool {
  37. return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F');
  38. }
  39. // Is this a lowercase letter?
  40. //
  41. // Lowercase letters in numeric literals can be followed by `+` or `-` to
  42. // extend the literal.
  43. inline auto IsLower(char c) -> bool { return 'a' <= c && c <= 'z'; }
  44. // Is this character considered to be horizontal whitespace?
  45. //
  46. // Such characters can appear in the indentation of a line.
  47. inline auto IsHorizontalWhitespace(char c) -> bool {
  48. return c == ' ' || c == '\t';
  49. }
  50. // Is this character considered to be vertical whitespace?
  51. //
  52. // Such characters are considered to terminate lines.
  53. inline auto IsVerticalWhitespace(char c) -> bool { return c == '\n'; }
  54. // Is this character considered to be whitespace?
  55. //
  56. // Changes here will need matching changes in
  57. // `TokenizedBuffer::Lexer::SkipWhitespace`.
  58. inline auto IsSpace(char c) -> bool {
  59. return IsHorizontalWhitespace(c) || IsVerticalWhitespace(c);
  60. }
  61. } // namespace Carbon::Lex
  62. #endif // CARBON_TOOLCHAIN_LEX_CHARACTER_SET_H_