numeric_literal.h 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef LEXER_NUMERIC_LITERAL_H_
  5. #define LEXER_NUMERIC_LITERAL_H_
  6. #include <utility>
  7. #include "diagnostics/diagnostic_emitter.h"
  8. #include "llvm/ADT/APInt.h"
  9. #include "llvm/ADT/Optional.h"
  10. #include "llvm/ADT/StringRef.h"
  11. namespace Carbon {
  12. // A numeric literal token that has been extracted from a source buffer.
  13. class NumericLiteralToken {
  14. public:
  15. // Get the text corresponding to this literal.
  16. auto Text() const -> llvm::StringRef { return text; }
  17. // Extract a numeric literal from the given text, if it has a suitable form.
  18. //
  19. // The supplied `source_text` must outlive the return value.
  20. static auto Lex(llvm::StringRef source_text)
  21. -> llvm::Optional<NumericLiteralToken>;
  22. class Parser;
  23. private:
  24. NumericLiteralToken() {}
  25. // The text of the token.
  26. llvm::StringRef text;
  27. // The offset of the '.'. Set to text.size() if none is present.
  28. int radix_point;
  29. // The offset of the alphabetical character introducing the exponent. In a
  30. // valid literal, this will be an 'e' or a 'p', and may be followed by a '+'
  31. // or a '-', but for error recovery, this may simply be the last lowercase
  32. // letter in the invalid token. Always greater than or equal to radix_point.
  33. // Set to text.size() if none is present.
  34. int exponent;
  35. };
  36. // Parser for numeric literal tokens.
  37. //
  38. // Responsible for checking that a numeric literal is valid and meaningful and
  39. // either diagnosing or extracting its meaning.
  40. class NumericLiteralToken::Parser {
  41. public:
  42. Parser(DiagnosticEmitter& emitter, NumericLiteralToken literal);
  43. auto IsInteger() -> bool {
  44. return literal.radix_point == static_cast<int>(literal.text.size());
  45. }
  46. enum CheckResult {
  47. // The token is valid.
  48. Valid,
  49. // The token is invalid, but we've diagnosed and recovered from the error.
  50. RecoverableError,
  51. // The token is invalid, and we've diagnosed, but we can't assign meaning
  52. // to it.
  53. UnrecoverableError,
  54. };
  55. // Check that the numeric literal token is syntactically valid and
  56. // meaningful, and diagnose if not.
  57. auto Check() -> CheckResult;
  58. // Get the radix of this token. One of 2, 10, or 16.
  59. auto GetRadix() -> int { return radix; }
  60. // Get the mantissa of this token's value.
  61. auto GetMantissa() -> llvm::APInt;
  62. // Get the exponent of this token's value. This is always zero for an integer
  63. // literal.
  64. auto GetExponent() -> llvm::APInt;
  65. private:
  66. struct CheckDigitSequenceResult {
  67. bool ok;
  68. bool has_digit_separators = false;
  69. };
  70. auto CheckDigitSequence(llvm::StringRef text, int radix,
  71. bool allow_digit_separators = true)
  72. -> CheckDigitSequenceResult;
  73. auto CheckDigitSeparatorPlacement(llvm::StringRef text, int radix,
  74. int num_digit_separators) -> void;
  75. auto CheckLeadingZero() -> bool;
  76. auto CheckIntPart() -> bool;
  77. auto CheckFractionalPart() -> bool;
  78. auto CheckExponentPart() -> bool;
  79. private:
  80. DiagnosticEmitter& emitter;
  81. NumericLiteralToken literal;
  82. // The radix of the literal: 2, 10, or 16, for a prefix of '0b', no prefix,
  83. // or '0x', respectively.
  84. int radix = 10;
  85. // The various components of a numeric literal:
  86. //
  87. // [radix] int_part [. fract_part [[ep] [+-] exponent_part]]
  88. llvm::StringRef int_part;
  89. llvm::StringRef fract_part;
  90. llvm::StringRef exponent_part;
  91. // Do we need to remove any special characters (digit separator or radix
  92. // point) before interpreting the mantissa or exponent as an integer?
  93. bool mantissa_needs_cleaning = false;
  94. bool exponent_needs_cleaning = false;
  95. // True if we found a `-` before `exponent_part`.
  96. bool exponent_is_negative = false;
  97. // True if we produced an error but recovered.
  98. bool recovered_from_error = false;
  99. };
  100. } // namespace Carbon
  101. #endif // LEXER_NUMERIC_LITERAL_H_