string_literal.h 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_LEX_STRING_LITERAL_H_
  5. #define CARBON_TOOLCHAIN_LEX_STRING_LITERAL_H_
  6. #include <optional>
  7. #include "llvm/ADT/StringRef.h"
  8. #include "llvm/Support/Allocator.h"
  9. #include "toolchain/diagnostics/emitter.h"
  10. #include "toolchain/lex/token_info.h"
  11. namespace Carbon::Lex {
  12. class StringLiteral {
  13. public:
  14. // A string literal's kind.
  15. enum class Kind : int8_t {
  16. // A character literal is still handled through string literal lexing.
  17. Char,
  18. // A single-line string, `"<content>"`.
  19. SingleLine,
  20. // A multi-line string, `'''<content>'''`.
  21. MultiLine,
  22. // An incorrectly double-quoted multi-line string, `"""<content>"""`.
  23. MultiLineWithDoubleQuotes,
  24. };
  25. // Extract a string literal token from the given text, if it has a suitable
  26. // form. Returning std::nullopt indicates no string literal was found;
  27. // returning an invalid literal indicates a string prefix was found, but it's
  28. // malformed and is returning a partial string literal to assist error
  29. // construction.
  30. static auto Lex(llvm::StringRef source_text) -> std::optional<StringLiteral>;
  31. // Expand any escape sequences and compute the resulting character. This
  32. // handles error recovery internally, but can return nullopt for an invalid
  33. // character.
  34. auto ComputeCharLiteralValue(Diagnostics::Emitter<const char*>& emitter) const
  35. -> std::optional<CharLiteralValue>;
  36. // Expand any escape sequences in the given string literal and compute the
  37. // resulting value. This handles error recovery internally and cannot fail.
  38. //
  39. // When content_needs_validation_ is false and the string has no indent to
  40. // deal with, this can return the content directly. Otherwise, the allocator
  41. // will be used for the StringRef.
  42. auto ComputeStringValue(llvm::BumpPtrAllocator& allocator,
  43. Diagnostics::Emitter<const char*>& emitter) const
  44. -> llvm::StringRef;
  45. // Get the text corresponding to this literal.
  46. auto text() const -> llvm::StringRef { return text_; }
  47. // Determine whether this is a multi-line string literal.
  48. auto kind() const -> Kind { return kind_; }
  49. // Returns true if the string has a valid terminator.
  50. auto is_terminated() const -> bool { return is_terminated_; }
  51. private:
  52. struct Introducer;
  53. explicit StringLiteral(llvm::StringRef text, llvm::StringRef content,
  54. bool content_needs_validation, int hash_level,
  55. Kind kind, bool is_terminated)
  56. : text_(text),
  57. content_(content),
  58. content_needs_validation_(content_needs_validation),
  59. hash_level_(hash_level),
  60. kind_(kind),
  61. is_terminated_(is_terminated) {}
  62. // The complete text of the string literal.
  63. llvm::StringRef text_;
  64. // The content of the literal. For a multi-line literal, this begins
  65. // immediately after the newline following the file type indicator, and ends
  66. // at the start of the closing `"""`. Leading whitespace is not removed from
  67. // either end.
  68. llvm::StringRef content_;
  69. // Whether content needs validation, in particular due to either an escape
  70. // (which needs modifications) or a tab character (which may cause a warning).
  71. bool content_needs_validation_;
  72. // The number of `#`s preceding the opening `"` or `"""`.
  73. int hash_level_;
  74. // Whether this was a single-line string literal, multi-line string literal,
  75. // or a char literal.
  76. Kind kind_;
  77. // Whether the literal is valid, or should only be used for errors.
  78. bool is_terminated_;
  79. };
  80. } // namespace Carbon::Lex
  81. #endif // CARBON_TOOLCHAIN_LEX_STRING_LITERAL_H_