character_set.h 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef LEXER_CHARACTER_SET_H_
  5. #define LEXER_CHARACTER_SET_H_
  6. #include "llvm/ADT/StringExtras.h"
  7. #include "llvm/ADT/StringRef.h"
  8. namespace Carbon {
  9. // TODO: These definitions need to be updated to match whatever Unicode lexical
  10. // rules we pick. The function interfaces will need to change to accommodate
  11. // multi-byte characters.
  12. // Is this an alphabetical character according to Carbon's lexical rules?
  13. //
  14. // Alphabetical characters are permitted at the start of identifiers. This
  15. // currently includes 'A'..'Z' and 'a'..'z'.
  16. inline bool IsAlpha(char c) { return llvm::isAlpha(c); }
  17. // Is this a decimal digit according to Carbon's lexical rules?
  18. //
  19. // This currently includes '0'..'9'.
  20. inline bool IsDecimalDigit(char c) { return llvm::isDigit(c); }
  21. // Is this an alphanumeric character according to Carbon's lexical rules?
  22. //
  23. // Alphanumeric characters are permitted as trailing characters in identifiers
  24. // and numeric literals. This includes alphabetical characters plus decimal
  25. // digits.
  26. //
  27. // Note that '_' is not considered alphanumeric, despite in most circumstances
  28. // being a valid continuation character of an identifier or numeric literal.
  29. inline bool IsAlnum(char c) { return llvm::isAlnum(c); }
  30. // Is this a hexadecimal digit according to Carbon's lexical rules?
  31. //
  32. // Hexadecimal digits are permitted in `0x`-prefixed literals, as well as after
  33. // a `\x` escape sequence.
  34. //
  35. // Note that lowercase 'a'..'f' are currently not considered hexadecimal digits
  36. // in any context.
  37. inline bool IsUpperHexDigit(char c) {
  38. return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F');
  39. }
  40. // Is this a lowercase letter?
  41. //
  42. // Lowercase letters in numeric literals can be followed by `+` or `-` to
  43. // extend the literal.
  44. inline bool IsLower(char c) { return 'a' <= c && c <= 'z'; }
  45. // Is this character considered to be horizontal whitespace?
  46. //
  47. // Such characters can appear in the indentation of a line.
  48. inline bool IsHorizontalWhitespace(char c) { return c == ' ' || c == '\t'; }
  49. // Is this character considered to be vertical whitespace?
  50. //
  51. // Such characters are considered to terminate lines.
  52. inline bool IsVerticalWhitespace(char c) { return c == '\n'; }
  53. // Is this character considered to be whitespace?
  54. //
  55. // Changes here will need matching changes in
  56. // `TokenizedBuffer::Lexer::SkipWhitespace`.
  57. inline bool IsSpace(char c) {
  58. return IsHorizontalWhitespace(c) || IsVerticalWhitespace(c);
  59. }
  60. } // namespace Carbon
  61. #endif // LEXER_CHARACTER_SET_H_