numeric_literal.h 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef LEXER_NUMERIC_LITERAL_H_
  5. #define LEXER_NUMERIC_LITERAL_H_
  6. #include <utility>
  7. #include "diagnostics/diagnostic_emitter.h"
  8. #include "llvm/ADT/APInt.h"
  9. #include "llvm/ADT/Optional.h"
  10. #include "llvm/ADT/StringRef.h"
  11. namespace Carbon {
  12. // A numeric literal token that has been extracted from a source buffer.
  13. class NumericLiteralToken {
  14. public:
  15. // Get the text corresponding to this literal.
  16. llvm::StringRef Text() const { return text; }
  17. // Extract a numeric literal from the given text, if it has a suitable form.
  18. static auto Lex(llvm::StringRef source_text)
  19. -> llvm::Optional<NumericLiteralToken>;
  20. class Parser;
  21. private:
  22. NumericLiteralToken() {}
  23. // The text of the token.
  24. llvm::StringRef text;
  25. // The offset of the '.'. Set to text.size() if none is present.
  26. int radix_point;
  27. // The offset of the alphabetical character introducing the exponent. In a
  28. // valid literal, this will be an 'e' or a 'p', and may be followed by a '+'
  29. // or a '-', but for error recovery, this may simply be the last lowercase
  30. // letter in the invalid token. Always greater than or equal to radix_point.
  31. // Set to text.size() if none is present.
  32. int exponent;
  33. };
  34. // Parser for numeric literal tokens.
  35. //
  36. // Responsible for checking that a numeric literal is valid and meaningful and
  37. // either diagnosing or extracting its meaning.
  38. class NumericLiteralToken::Parser {
  39. public:
  40. Parser(DiagnosticEmitter& emitter, NumericLiteralToken literal);
  41. auto IsInteger() -> bool {
  42. return literal.radix_point == static_cast<int>(literal.text.size());
  43. }
  44. enum CheckResult {
  45. // The token is valid.
  46. Valid,
  47. // The token is invalid, but we've diagnosed and recovered from the error.
  48. RecoverableError,
  49. // The token is invalid, and we've diagnosed, but we can't assign meaning
  50. // to it.
  51. UnrecoverableError,
  52. };
  53. // Check that the numeric literal token is syntactically valid and
  54. // meaningful, and diagnose if not.
  55. auto Check() -> CheckResult;
  56. // Get the radix of this token. One of 2, 10, or 16.
  57. auto GetRadix() -> int { return radix; }
  58. // Get the mantissa of this token's value.
  59. auto GetMantissa() -> llvm::APInt;
  60. // Get the exponent of this token's value. This is always zero for an integer
  61. // literal.
  62. auto GetExponent() -> llvm::APInt;
  63. private:
  64. struct CheckDigitSequenceResult {
  65. bool ok;
  66. bool has_digit_separators = false;
  67. };
  68. auto CheckDigitSequence(llvm::StringRef text, int radix,
  69. bool allow_digit_separators = true)
  70. -> CheckDigitSequenceResult;
  71. auto CheckDigitSeparatorPlacement(llvm::StringRef text, int radix,
  72. int num_digit_separators) -> void;
  73. auto CheckLeadingZero() -> bool;
  74. auto CheckIntPart() -> bool;
  75. auto CheckFractionalPart() -> bool;
  76. auto CheckExponentPart() -> bool;
  77. private:
  78. DiagnosticEmitter& emitter;
  79. NumericLiteralToken literal;
  80. // The radix of the literal: 2, 10, or 16, for a prefix of '0b', no prefix,
  81. // or '0x', respectively.
  82. int radix = 10;
  83. // The various components of a numeric literal:
  84. //
  85. // [radix] int_part [. fract_part [[ep] [+-] exponent_part]]
  86. llvm::StringRef int_part;
  87. llvm::StringRef fract_part;
  88. llvm::StringRef exponent_part;
  89. // Do we need to remove any special characters (digit separator or radix
  90. // point) before interpreting the mantissa or exponent as an integer?
  91. bool mantissa_needs_cleaning = false;
  92. bool exponent_needs_cleaning = false;
  93. // True if we found a `-` before `exponent_part`.
  94. bool exponent_is_negative = false;
  95. // True if we produced an error but recovered.
  96. bool recovered_from_error = false;
  97. };
  98. } // namespace Carbon
  99. #endif // LEXER_NUMERIC_LITERAL_H_