string_helpers_test.cpp 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "common/string_helpers.h"
  5. #include <gmock/gmock.h>
  6. #include <gtest/gtest.h>
  7. #include <optional>
  8. #include <string>
  9. #include "llvm/ADT/ArrayRef.h"
  10. #include "llvm/ADT/StringRef.h"
  11. #include "llvm/Support/Allocator.h"
  12. using ::testing::Eq;
  13. using ::testing::Optional;
  14. using ::testing::StrEq;
  15. namespace Carbon {
  16. namespace {
  17. TEST(UnescapeStringLiteral, Valid) {
  18. EXPECT_THAT(UnescapeStringLiteral("test"), Optional(Eq("test")));
  19. EXPECT_THAT(UnescapeStringLiteral("okay whitespace"),
  20. Optional(Eq("okay whitespace")));
  21. EXPECT_THAT(UnescapeStringLiteral("test\n"), Optional(Eq("test\n")));
  22. EXPECT_THAT(UnescapeStringLiteral("test\\n"), Optional(Eq("test\n")));
  23. EXPECT_THAT(UnescapeStringLiteral("abc\\ndef"), Optional(Eq("abc\ndef")));
  24. EXPECT_THAT(UnescapeStringLiteral("test\\\\n"), Optional(Eq("test\\n")));
  25. EXPECT_THAT(UnescapeStringLiteral("\\xAA"), Optional(Eq("\xAA")));
  26. EXPECT_THAT(UnescapeStringLiteral("\\x12"), Optional(Eq("\x12")));
  27. EXPECT_THAT(UnescapeStringLiteral("test", 1), Optional(Eq("test")));
  28. EXPECT_THAT(UnescapeStringLiteral("test\\#n", 1), Optional(Eq("test\n")));
  29. EXPECT_THAT(UnescapeStringLiteral(
  30. "r\\u{000000E9}al \\u{2764}\\u{FE0F}\\u{1F50A}!\\u{10FFFF}"),
  31. Optional(Eq("réal ❤️🔊!􏿿")));
  32. }
  33. TEST(UnescapeStringLiteral, Invalid) {
  34. // Missing char after `\`.
  35. EXPECT_THAT(UnescapeStringLiteral("a\\"), Eq(std::nullopt));
  36. // Not a supported escape.
  37. EXPECT_THAT(UnescapeStringLiteral("\\e"), Eq(std::nullopt));
  38. // Needs 2 hex chars.
  39. EXPECT_THAT(UnescapeStringLiteral("\\x"), Eq(std::nullopt));
  40. // Needs 2 hex chars.
  41. EXPECT_THAT(UnescapeStringLiteral("\\xA"), Eq(std::nullopt));
  42. // Needs uppercase hex.
  43. EXPECT_THAT(UnescapeStringLiteral("\\xaa"), Eq(std::nullopt));
  44. // Reserved.
  45. EXPECT_THAT(UnescapeStringLiteral("\\00"), Eq(std::nullopt));
  46. EXPECT_THAT(UnescapeStringLiteral("\\#00", 1), Eq(std::nullopt));
  47. }
  48. TEST(UnescapeStringLiteral, InvalidUnicodes) {
  49. // Various incomplete Unicode specifiers
  50. EXPECT_THAT(UnescapeStringLiteral("\\u"), Eq(std::nullopt));
  51. EXPECT_THAT(UnescapeStringLiteral("\\u1"), Eq(std::nullopt));
  52. EXPECT_THAT(UnescapeStringLiteral("\\uz"), Eq(std::nullopt));
  53. EXPECT_THAT(UnescapeStringLiteral("\\u{"), Eq(std::nullopt));
  54. EXPECT_THAT(UnescapeStringLiteral("\\u{z"), Eq(std::nullopt));
  55. EXPECT_THAT(UnescapeStringLiteral("\\u{E9"), Eq(std::nullopt));
  56. EXPECT_THAT(UnescapeStringLiteral("\\u{E9z"), Eq(std::nullopt));
  57. EXPECT_THAT(UnescapeStringLiteral("\\u{}"), Eq(std::nullopt));
  58. // invalid characters in unicode
  59. EXPECT_THAT(UnescapeStringLiteral("\\u{z}"), Eq(std::nullopt));
  60. // lowercase hexadecimal
  61. EXPECT_THAT(UnescapeStringLiteral("\\u{e9}"), Eq(std::nullopt));
  62. // Codepoint number too high
  63. EXPECT_THAT(UnescapeStringLiteral("\\u{110000}"), Eq(std::nullopt));
  64. // codepoint more than 8 hex digits
  65. EXPECT_THAT(UnescapeStringLiteral("\\u{FF000000E9}"), Eq(std::nullopt));
  66. }
  67. TEST(UnescapeStringLiteral, Nul) {
  68. std::optional<std::string> str = UnescapeStringLiteral("a\\0b");
  69. ASSERT_NE(str, std::nullopt);
  70. EXPECT_THAT(str->size(), Eq(3));
  71. EXPECT_THAT(strlen(str->c_str()), Eq(1));
  72. EXPECT_THAT((*str)[0], Eq('a'));
  73. EXPECT_THAT((*str)[1], Eq('\0'));
  74. EXPECT_THAT((*str)[2], Eq('b'));
  75. }
  76. TEST(ParseBlockStringLiteral, FailTooFewLines) {
  77. EXPECT_THAT(ParseBlockStringLiteral("").error().message(),
  78. Eq("Too few lines"));
  79. }
  80. TEST(ParseBlockStringLiteral, FailNoLeadingTripleQuotes) {
  81. EXPECT_THAT(ParseBlockStringLiteral("'a'\n").error().message(),
  82. Eq("Should start with triple quotes: 'a'"));
  83. }
  84. TEST(ParseBlockStringLiteral, FailInvalideFiletypeIndicator) {
  85. EXPECT_THAT(ParseBlockStringLiteral("'''carbon file\n").error().message(),
  86. Eq("Invalid characters in file type indicator: carbon file"));
  87. }
  88. TEST(ParseBlockStringLiteral, FailEndingTripleQuotes) {
  89. EXPECT_THAT(ParseBlockStringLiteral("'''\n").error().message(),
  90. Eq("Should end with triple quotes: "));
  91. }
  92. TEST(ParseBlockStringLiteral, FailWrongIndent) {
  93. constexpr char Input[] = R"('''
  94. A block string literal
  95. with wrong indent
  96. ''')";
  97. EXPECT_THAT(ParseBlockStringLiteral(Input).error().message(),
  98. Eq("Wrong indent for line: with wrong indent, expected 5"));
  99. }
  100. TEST(ParseBlockStringLiteral, FailInvalidEscaping) {
  101. constexpr char Input[] = R"('''
  102. \q
  103. ''')";
  104. EXPECT_THAT(ParseBlockStringLiteral(Input).error().message(),
  105. Eq("Invalid escaping in \\q"));
  106. constexpr char InputRaw[] = R"('''
  107. \#q
  108. ''')";
  109. EXPECT_THAT(ParseBlockStringLiteral(InputRaw, 1).error().message(),
  110. Eq("Invalid escaping in \\#q"));
  111. }
  112. TEST(ParseBlockStringLiteral, OkEmptyString) {
  113. constexpr char Input[] = R"('''
  114. ''')";
  115. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(""));
  116. }
  117. TEST(ParseBlockStringLiteral, OkOneLineString) {
  118. constexpr char Input[] = R"('''
  119. A block string literal
  120. ''')";
  121. constexpr char Expected[] = R"(A block string literal
  122. )";
  123. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  124. }
  125. TEST(ParseBlockStringLiteral, OkTwoLineString) {
  126. constexpr char Input[] = R"('''
  127. A block string literal
  128. with indent.
  129. ''')";
  130. constexpr char Expected[] = R"(A block string literal
  131. with indent.
  132. )";
  133. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  134. }
  135. TEST(ParseBlockStringLiteral, OkWithFileTypeIndicator) {
  136. constexpr char Input[] = R"('''carbon
  137. A block string literal
  138. with file type indicator.
  139. ''')";
  140. constexpr char Expected[] = R"(A block string literal
  141. with file type indicator.
  142. )";
  143. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  144. }
  145. TEST(ParseBlockStringLiteral, OkWhitespaceAfterOpeningQuotes) {
  146. constexpr char Input[] = R"('''
  147. A block string literal
  148. ''')";
  149. constexpr char Expected[] = R"(A block string literal
  150. )";
  151. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  152. }
  153. TEST(ParseBlockStringLiteral, OkWithEmptyLines) {
  154. constexpr char Input[] = R"('''
  155. A block string literal
  156. with
  157. empty
  158. lines.
  159. ''')";
  160. constexpr char Expected[] = R"(A block string literal
  161. with
  162. empty
  163. lines.
  164. )";
  165. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  166. }
  167. TEST(ParseBlockStringLiteral, OkWithSlashNewlineEscape) {
  168. constexpr char Input[] = R"('''
  169. A block string literal\
  170. ''')";
  171. constexpr char Expected[] = "A block string literal";
  172. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  173. }
  174. TEST(ParseBlockStringLiteral, OkWithDoubleSlashNewline) {
  175. constexpr char Input[] = R"('''
  176. A block string literal\\
  177. ''')";
  178. constexpr char Expected[] = R"(A block string literal\
  179. )";
  180. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  181. }
  182. TEST(ParseBlockStringLiteral, OkWithTripleSlashNewline) {
  183. constexpr char Input[] = R"('''
  184. A block string literal\\\
  185. ''')";
  186. constexpr char Expected[] = R"(A block string literal\)";
  187. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  188. }
  189. TEST(ParseBlockStringLiteral, OkMultipleSlashes) {
  190. constexpr char Input[] = R"('''
  191. A block string literal\
  192. \
  193. \
  194. \
  195. ''')";
  196. constexpr char Expected[] = "A block string literal";
  197. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  198. }
  199. TEST(BuildCStrArgs, NoArgs) {
  200. llvm::BumpPtrAllocator alloc;
  201. auto result = BuildCStrArgs("tool", {}, alloc);
  202. ASSERT_THAT(result.size(), Eq(1));
  203. EXPECT_THAT(result[0], StrEq("tool"));
  204. }
  205. TEST(BuildCStrArgs, OneArg) {
  206. llvm::BumpPtrAllocator alloc;
  207. auto result = BuildCStrArgs("tool", {"arg1"}, alloc);
  208. ASSERT_THAT(result.size(), Eq(2));
  209. EXPECT_THAT(result[0], StrEq("tool"));
  210. EXPECT_THAT(result[1], StrEq("arg1"));
  211. }
  212. TEST(BuildCStrArgs, MultipleArgs) {
  213. llvm::BumpPtrAllocator alloc;
  214. auto result = BuildCStrArgs("tool", {"arg1", "arg2"}, alloc);
  215. ASSERT_THAT(result.size(), Eq(3));
  216. EXPECT_THAT(result[0], StrEq("tool"));
  217. EXPECT_THAT(result[1], StrEq("arg1"));
  218. EXPECT_THAT(result[2], StrEq("arg2"));
  219. }
  220. TEST(BuildCStrArgsWithPrefix, NoArgs) {
  221. llvm::BumpPtrAllocator alloc;
  222. auto result = BuildCStrArgs("tool", {}, {}, alloc);
  223. ASSERT_THAT(result.size(), Eq(1));
  224. EXPECT_THAT(result[0], StrEq("tool"));
  225. }
  226. TEST(BuildCStrArgsWithPrefix, PrefixOnly) {
  227. llvm::BumpPtrAllocator alloc;
  228. std::string prefix_args[] = {"p_arg1", "p_arg2"};
  229. auto result = BuildCStrArgs("tool", prefix_args, {}, alloc);
  230. ASSERT_THAT(result.size(), Eq(3));
  231. EXPECT_THAT(result[0], StrEq("tool"));
  232. EXPECT_THAT(result[1], Eq(prefix_args[0].c_str()));
  233. EXPECT_THAT(result[2], Eq(prefix_args[1].c_str()));
  234. }
  235. TEST(BuildCStrArgsWithPrefix, ArgsOnly) {
  236. llvm::BumpPtrAllocator alloc;
  237. auto result = BuildCStrArgs("tool", {}, {"arg1", "arg2"}, alloc);
  238. ASSERT_THAT(result.size(), Eq(3));
  239. EXPECT_THAT(result[0], StrEq("tool"));
  240. EXPECT_THAT(result[1], StrEq("arg1"));
  241. EXPECT_THAT(result[2], StrEq("arg2"));
  242. }
  243. TEST(BuildCStrArgsWithPrefix, BothPrefixAndArgs) {
  244. llvm::BumpPtrAllocator alloc;
  245. std::string prefix_args[] = {"p_arg1", "p_arg2"};
  246. auto result = BuildCStrArgs("tool", prefix_args, {"arg1", "arg2"}, alloc);
  247. ASSERT_THAT(result.size(), Eq(5));
  248. EXPECT_THAT(result[0], StrEq("tool"));
  249. EXPECT_THAT(result[1], Eq(prefix_args[0].c_str()));
  250. EXPECT_THAT(result[2], Eq(prefix_args[1].c_str()));
  251. EXPECT_THAT(result[3], StrEq("arg1"));
  252. EXPECT_THAT(result[4], StrEq("arg2"));
  253. }
  254. } // namespace
  255. } // namespace Carbon