numeric_literal_test.cpp 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "lexer/numeric_literal.h"
  5. #include <iterator>
  6. #include "diagnostics/diagnostic_emitter.h"
  7. #include "gmock/gmock.h"
  8. #include "gtest/gtest.h"
  9. namespace Carbon {
  10. namespace {
  11. struct NumericLiteralTest : ::testing::Test {
  12. auto Lex(llvm::StringRef text) -> NumericLiteralToken {
  13. llvm::Optional<NumericLiteralToken> result = NumericLiteralToken::Lex(text);
  14. assert(result);
  15. EXPECT_EQ(result->Text(), text);
  16. return *result;
  17. }
  18. auto Parse(llvm::StringRef text) -> NumericLiteralToken::Parser {
  19. return NumericLiteralToken::Parser(ConsoleDiagnosticEmitter(), Lex(text));
  20. }
  21. };
  22. TEST_F(NumericLiteralTest, HandlesIntegerLiteral) {
  23. struct Testcase {
  24. llvm::StringLiteral token;
  25. uint64_t value;
  26. int radix;
  27. };
  28. Testcase testcases[] = {
  29. {.token = "12", .value = 12, .radix = 10},
  30. {.token = "0x12_3ABC", .value = 0x12'3ABC, .radix = 16},
  31. {.token = "0b10_10_11", .value = 0b10'10'11, .radix = 2},
  32. {.token = "1_234_567", .value = 1'234'567, .radix = 10},
  33. };
  34. for (Testcase testcase : testcases) {
  35. auto parser = Parse(testcase.token);
  36. EXPECT_EQ(parser.Check(), parser.Valid) << testcase.token;
  37. EXPECT_EQ(parser.IsInteger(), true);
  38. EXPECT_EQ(parser.GetMantissa().getZExtValue(), testcase.value);
  39. EXPECT_EQ(parser.GetExponent().getSExtValue(), 0);
  40. EXPECT_EQ(parser.GetRadix(), testcase.radix);
  41. }
  42. }
  43. TEST_F(NumericLiteralTest, ValidatesBaseSpecifier) {
  44. llvm::StringLiteral valid[] = {
  45. // Decimal integer literals.
  46. "0",
  47. "1",
  48. "123456789000000000000000000000000000000000000",
  49. // Hexadecimal integer literals.
  50. "0x0123456789ABCDEF",
  51. "0x0000000000000000000000000000000",
  52. // Binary integer literals.
  53. "0b10110100101001010",
  54. "0b0000000",
  55. };
  56. for (llvm::StringLiteral literal : valid) {
  57. auto parser = Parse(literal);
  58. EXPECT_EQ(parser.Check(), parser.Valid) << literal;
  59. }
  60. llvm::StringLiteral invalid[] = {
  61. "00", "0X123", "0o123", "0B1",
  62. "007", "123L", "123456789A", "0x",
  63. "0b", "0x123abc", "0b011101201001", "0b10A",
  64. "0x_", "0b_",
  65. };
  66. for (llvm::StringLiteral literal : invalid) {
  67. auto parser = Parse(literal);
  68. EXPECT_EQ(parser.Check(), parser.UnrecoverableError) << literal;
  69. }
  70. }
  71. TEST_F(NumericLiteralTest, ValidatesIntegerDigitSeparators) {
  72. llvm::StringLiteral valid[] = {
  73. // Decimal literals optionally have digit separators every 3 places.
  74. "1_234",
  75. "123_456",
  76. "1_234_567",
  77. // Hexadecimal literals optionally have digit separators every 4 places.
  78. "0x1_0000",
  79. "0x1000_0000",
  80. "0x1_0000_0000",
  81. // Binary integer literals can have digit separators anywhere..
  82. "0b1_0_1_0_1_0",
  83. "0b111_0000",
  84. };
  85. for (llvm::StringLiteral literal : valid) {
  86. auto parser = Parse(literal);
  87. EXPECT_EQ(parser.Check(), parser.Valid) << literal;
  88. }
  89. llvm::StringLiteral invalid[] = {
  90. // Decimal literals.
  91. "12_34",
  92. "123_4_6_789",
  93. "12_3456_789",
  94. "12__345",
  95. "1_",
  96. // Hexadecimal literals.
  97. "0x_1234",
  98. "0x123_",
  99. "0x12_3",
  100. "0x_234_5678",
  101. "0x1234_567",
  102. // Binary literals.
  103. "0b_10101",
  104. "0b1__01",
  105. "0b1011_",
  106. "0b1_01_01_",
  107. };
  108. for (llvm::StringLiteral literal : invalid) {
  109. auto parser = Parse(literal);
  110. EXPECT_EQ(parser.Check(), parser.RecoverableError) << literal;
  111. }
  112. }
  113. TEST_F(NumericLiteralTest, HandlesRealLiteral) {
  114. struct Testcase {
  115. llvm::StringLiteral token;
  116. uint64_t mantissa;
  117. int64_t exponent;
  118. unsigned radix;
  119. };
  120. Testcase testcases[] = {
  121. // Decimal real literals.
  122. {.token = "0.0", .mantissa = 0, .exponent = -1, .radix = 10},
  123. {.token = "12.345", .mantissa = 12345, .exponent = -3, .radix = 10},
  124. {.token = "12.345e6", .mantissa = 12345, .exponent = 3, .radix = 10},
  125. {.token = "12.345e+6", .mantissa = 12345, .exponent = 3, .radix = 10},
  126. {.token = "1_234.5e-2", .mantissa = 12345, .exponent = -3, .radix = 10},
  127. {.token = "1.0e-2_000_000",
  128. .mantissa = 10,
  129. .exponent = -2'000'001,
  130. .radix = 10},
  131. // Hexadecimal real literals.
  132. {.token = "0x1_2345_6789.CDEF",
  133. .mantissa = 0x1'2345'6789'CDEF,
  134. .exponent = -16,
  135. .radix = 16},
  136. {.token = "0x0.0001p4", .mantissa = 1, .exponent = -12, .radix = 16},
  137. {.token = "0x0.0001p+4", .mantissa = 1, .exponent = -12, .radix = 16},
  138. {.token = "0x0.0001p-4", .mantissa = 1, .exponent = -20, .radix = 16},
  139. // The exponent here works out as exactly INT64_MIN.
  140. {.token = "0x1.01p-9223372036854775800",
  141. .mantissa = 0x101,
  142. .exponent = -9223372036854775807L - 1L,
  143. .radix = 16},
  144. // The exponent here doesn't fit in a signed 64-bit integer until we
  145. // adjust for the radix point.
  146. {.token = "0x1.01p9223372036854775809",
  147. .mantissa = 0x101,
  148. .exponent = 9223372036854775801L,
  149. .radix = 16},
  150. // Binary real literals. These are invalid, but we accept them for error
  151. // recovery.
  152. {.token = "0b10_11_01.01",
  153. .mantissa = 0b10110101,
  154. .exponent = -2,
  155. .radix = 2},
  156. };
  157. for (Testcase testcase : testcases) {
  158. auto parser = Parse(testcase.token);
  159. EXPECT_EQ(parser.Check(),
  160. testcase.radix == 2 ? parser.RecoverableError : parser.Valid)
  161. << testcase.token;
  162. EXPECT_EQ(parser.IsInteger(), false);
  163. EXPECT_EQ(parser.GetMantissa().getZExtValue(), testcase.mantissa);
  164. EXPECT_EQ(parser.GetExponent().getSExtValue(), testcase.exponent);
  165. EXPECT_EQ(parser.GetRadix(), testcase.radix);
  166. }
  167. }
  168. TEST_F(NumericLiteralTest, HandlesRealLiteralOverflow) {
  169. llvm::StringLiteral input = "0x1.000001p-9223372036854775800";
  170. auto parser = Parse(input);
  171. EXPECT_EQ(parser.Check(), parser.Valid);
  172. EXPECT_EQ(parser.GetMantissa(), 0x1000001);
  173. EXPECT_EQ((parser.GetExponent() + 9223372036854775800).getSExtValue(), -24);
  174. EXPECT_EQ(parser.GetRadix(), 16);
  175. }
  176. TEST_F(NumericLiteralTest, ValidatesRealLiterals) {
  177. llvm::StringLiteral invalid_digit_separators[] = {
  178. // Invalid digit separators.
  179. "12_34.5", "123.4_567", "123.456_7", "1_2_3.4",
  180. "123.4e56_78", "0x12_34.5", "0x12.3_4", "0x12.34p5_6",
  181. };
  182. for (llvm::StringLiteral literal : invalid_digit_separators) {
  183. auto parser = Parse(literal);
  184. EXPECT_EQ(parser.Check(), parser.RecoverableError) << literal;
  185. }
  186. llvm::StringLiteral invalid[] = {
  187. // No digits in integer part.
  188. "0x.0",
  189. "0b.0",
  190. "0x_.0",
  191. "0b_.0",
  192. // No digits in fractional part.
  193. "0.e",
  194. "0.e0",
  195. "0.e+0",
  196. "0x0.p",
  197. "0x0.p-0",
  198. // Invalid digits in mantissa.
  199. "123A.4",
  200. "123.4A",
  201. "123A.4e0",
  202. "123.4Ae0",
  203. "0x123ABCDEFG.0",
  204. "0x123.ABCDEFG",
  205. "0x123ABCDEFG.0p0",
  206. "0x123.ABCDEFGp0",
  207. // Invalid exponent letter.
  208. "0.0f0",
  209. "0.0p0",
  210. "0.0z+0",
  211. "0x0.0e0",
  212. "0x0.0f0",
  213. "0x0.0z-0",
  214. // No digits in exponent part.
  215. "0.0e",
  216. "0x0.0p",
  217. "0.0e_",
  218. "0x0.0p_",
  219. // Invalid digits in exponent part.
  220. "0.0eHELLO",
  221. "0.0eA",
  222. "0.0e+A",
  223. "0x0.0pA",
  224. "0x0.0p-A",
  225. };
  226. for (llvm::StringLiteral literal : invalid) {
  227. auto parser = Parse(literal);
  228. EXPECT_EQ(parser.Check(), parser.UnrecoverableError) << literal;
  229. }
  230. }
  231. } // namespace
  232. } // namespace Carbon