string_helpers_test.cpp 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "common/string_helpers.h"
  5. #include <gmock/gmock.h>
  6. #include <gtest/gtest.h>
  7. #include <optional>
  8. #include <string>
  9. #include "llvm/ADT/ArrayRef.h"
  10. #include "llvm/ADT/StringRef.h"
  11. using ::testing::Eq;
  12. using ::testing::Optional;
  13. using ::testing::StrEq;
  14. namespace Carbon {
  15. namespace {
  16. TEST(UnescapeStringLiteral, Valid) {
  17. EXPECT_THAT(UnescapeStringLiteral("test"), Optional(Eq("test")));
  18. EXPECT_THAT(UnescapeStringLiteral("okay whitespace"),
  19. Optional(Eq("okay whitespace")));
  20. EXPECT_THAT(UnescapeStringLiteral("test\n"), Optional(Eq("test\n")));
  21. EXPECT_THAT(UnescapeStringLiteral("test\\n"), Optional(Eq("test\n")));
  22. EXPECT_THAT(UnescapeStringLiteral("abc\\ndef"), Optional(Eq("abc\ndef")));
  23. EXPECT_THAT(UnescapeStringLiteral("test\\\\n"), Optional(Eq("test\\n")));
  24. EXPECT_THAT(UnescapeStringLiteral("\\xAA"), Optional(Eq("\xAA")));
  25. EXPECT_THAT(UnescapeStringLiteral("\\x12"), Optional(Eq("\x12")));
  26. EXPECT_THAT(UnescapeStringLiteral("test", 1), Optional(Eq("test")));
  27. EXPECT_THAT(UnescapeStringLiteral("test\\#n", 1), Optional(Eq("test\n")));
  28. EXPECT_THAT(UnescapeStringLiteral(
  29. "r\\u{000000E9}al \\u{2764}\\u{FE0F}\\u{1F50A}!\\u{10FFFF}"),
  30. Optional(Eq("réal ❤️🔊!􏿿")));
  31. }
  32. TEST(UnescapeStringLiteral, Invalid) {
  33. // Missing char after `\`.
  34. EXPECT_THAT(UnescapeStringLiteral("a\\"), Eq(std::nullopt));
  35. // Not a supported escape.
  36. EXPECT_THAT(UnescapeStringLiteral("\\e"), Eq(std::nullopt));
  37. // Needs 2 hex chars.
  38. EXPECT_THAT(UnescapeStringLiteral("\\x"), Eq(std::nullopt));
  39. // Needs 2 hex chars.
  40. EXPECT_THAT(UnescapeStringLiteral("\\xA"), Eq(std::nullopt));
  41. // Needs uppercase hex.
  42. EXPECT_THAT(UnescapeStringLiteral("\\xaa"), Eq(std::nullopt));
  43. // Reserved.
  44. EXPECT_THAT(UnescapeStringLiteral("\\00"), Eq(std::nullopt));
  45. EXPECT_THAT(UnescapeStringLiteral("\\#00", 1), Eq(std::nullopt));
  46. }
  47. TEST(UnescapeStringLiteral, InvalidUnicodes) {
  48. // Various incomplete Unicode specifiers
  49. EXPECT_THAT(UnescapeStringLiteral("\\u"), Eq(std::nullopt));
  50. EXPECT_THAT(UnescapeStringLiteral("\\u1"), Eq(std::nullopt));
  51. EXPECT_THAT(UnescapeStringLiteral("\\uz"), Eq(std::nullopt));
  52. EXPECT_THAT(UnescapeStringLiteral("\\u{"), Eq(std::nullopt));
  53. EXPECT_THAT(UnescapeStringLiteral("\\u{z"), Eq(std::nullopt));
  54. EXPECT_THAT(UnescapeStringLiteral("\\u{E9"), Eq(std::nullopt));
  55. EXPECT_THAT(UnescapeStringLiteral("\\u{E9z"), Eq(std::nullopt));
  56. EXPECT_THAT(UnescapeStringLiteral("\\u{}"), Eq(std::nullopt));
  57. // invalid characters in unicode
  58. EXPECT_THAT(UnescapeStringLiteral("\\u{z}"), Eq(std::nullopt));
  59. // lowercase hexadecimal
  60. EXPECT_THAT(UnescapeStringLiteral("\\u{e9}"), Eq(std::nullopt));
  61. // Codepoint number too high
  62. EXPECT_THAT(UnescapeStringLiteral("\\u{110000}"), Eq(std::nullopt));
  63. // codepoint more than 8 hex digits
  64. EXPECT_THAT(UnescapeStringLiteral("\\u{FF000000E9}"), Eq(std::nullopt));
  65. }
  66. TEST(UnescapeStringLiteral, Nul) {
  67. std::optional<std::string> str = UnescapeStringLiteral("a\\0b");
  68. ASSERT_NE(str, std::nullopt);
  69. EXPECT_THAT(str->size(), Eq(3));
  70. EXPECT_THAT(strlen(str->c_str()), Eq(1));
  71. EXPECT_THAT((*str)[0], Eq('a'));
  72. EXPECT_THAT((*str)[1], Eq('\0'));
  73. EXPECT_THAT((*str)[2], Eq('b'));
  74. }
  75. TEST(ParseBlockStringLiteral, FailTooFewLines) {
  76. EXPECT_THAT(ParseBlockStringLiteral("").error().message(),
  77. Eq("Too few lines"));
  78. }
  79. TEST(ParseBlockStringLiteral, FailNoLeadingTripleQuotes) {
  80. EXPECT_THAT(ParseBlockStringLiteral("'a'\n").error().message(),
  81. Eq("Should start with triple quotes: 'a'"));
  82. }
  83. TEST(ParseBlockStringLiteral, FailInvalideFiletypeIndicator) {
  84. EXPECT_THAT(ParseBlockStringLiteral("'''carbon file\n").error().message(),
  85. Eq("Invalid characters in file type indicator: carbon file"));
  86. }
  87. TEST(ParseBlockStringLiteral, FailEndingTripleQuotes) {
  88. EXPECT_THAT(ParseBlockStringLiteral("'''\n").error().message(),
  89. Eq("Should end with triple quotes: "));
  90. }
  91. TEST(ParseBlockStringLiteral, FailWrongIndent) {
  92. constexpr char Input[] = R"('''
  93. A block string literal
  94. with wrong indent
  95. ''')";
  96. EXPECT_THAT(ParseBlockStringLiteral(Input).error().message(),
  97. Eq("Wrong indent for line: with wrong indent, expected 5"));
  98. }
  99. TEST(ParseBlockStringLiteral, FailInvalidEscaping) {
  100. constexpr char Input[] = R"('''
  101. \q
  102. ''')";
  103. EXPECT_THAT(ParseBlockStringLiteral(Input).error().message(),
  104. Eq("Invalid escaping in \\q"));
  105. constexpr char InputRaw[] = R"('''
  106. \#q
  107. ''')";
  108. EXPECT_THAT(ParseBlockStringLiteral(InputRaw, 1).error().message(),
  109. Eq("Invalid escaping in \\#q"));
  110. }
  111. TEST(ParseBlockStringLiteral, OkEmptyString) {
  112. constexpr char Input[] = R"('''
  113. ''')";
  114. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(""));
  115. }
  116. TEST(ParseBlockStringLiteral, OkOneLineString) {
  117. constexpr char Input[] = R"('''
  118. A block string literal
  119. ''')";
  120. constexpr char Expected[] = R"(A block string literal
  121. )";
  122. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  123. }
  124. TEST(ParseBlockStringLiteral, OkTwoLineString) {
  125. constexpr char Input[] = R"('''
  126. A block string literal
  127. with indent.
  128. ''')";
  129. constexpr char Expected[] = R"(A block string literal
  130. with indent.
  131. )";
  132. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  133. }
  134. TEST(ParseBlockStringLiteral, OkWithFileTypeIndicator) {
  135. constexpr char Input[] = R"('''carbon
  136. A block string literal
  137. with file type indicator.
  138. ''')";
  139. constexpr char Expected[] = R"(A block string literal
  140. with file type indicator.
  141. )";
  142. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  143. }
  144. TEST(ParseBlockStringLiteral, OkWhitespaceAfterOpeningQuotes) {
  145. constexpr char Input[] = R"('''
  146. A block string literal
  147. ''')";
  148. constexpr char Expected[] = R"(A block string literal
  149. )";
  150. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  151. }
  152. TEST(ParseBlockStringLiteral, OkWithEmptyLines) {
  153. constexpr char Input[] = R"('''
  154. A block string literal
  155. with
  156. empty
  157. lines.
  158. ''')";
  159. constexpr char Expected[] = R"(A block string literal
  160. with
  161. empty
  162. lines.
  163. )";
  164. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  165. }
  166. TEST(ParseBlockStringLiteral, OkWithSlashNewlineEscape) {
  167. constexpr char Input[] = R"('''
  168. A block string literal\
  169. ''')";
  170. constexpr char Expected[] = "A block string literal";
  171. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  172. }
  173. TEST(ParseBlockStringLiteral, OkWithDoubleSlashNewline) {
  174. constexpr char Input[] = R"('''
  175. A block string literal\\
  176. ''')";
  177. constexpr char Expected[] = R"(A block string literal\
  178. )";
  179. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  180. }
  181. TEST(ParseBlockStringLiteral, OkWithTripleSlashNewline) {
  182. constexpr char Input[] = R"('''
  183. A block string literal\\\
  184. ''')";
  185. constexpr char Expected[] = R"(A block string literal\)";
  186. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  187. }
  188. TEST(ParseBlockStringLiteral, OkMultipleSlashes) {
  189. constexpr char Input[] = R"('''
  190. A block string literal\
  191. \
  192. \
  193. \
  194. ''')";
  195. constexpr char Expected[] = "A block string literal";
  196. EXPECT_THAT(*ParseBlockStringLiteral(Input), Eq(Expected));
  197. }
  198. TEST(BuildCStrArgs, NoArgs) {
  199. llvm::OwningArrayRef<char> storage;
  200. auto result = BuildCStrArgs("tool", {}, storage);
  201. ASSERT_THAT(result.size(), Eq(1));
  202. EXPECT_THAT(result[0], StrEq("tool"));
  203. }
  204. TEST(BuildCStrArgs, OneArg) {
  205. llvm::OwningArrayRef<char> storage;
  206. auto result = BuildCStrArgs("tool", {"arg1"}, storage);
  207. ASSERT_THAT(result.size(), Eq(2));
  208. EXPECT_THAT(result[0], StrEq("tool"));
  209. EXPECT_THAT(result[1], StrEq("arg1"));
  210. }
  211. TEST(BuildCStrArgs, MultipleArgs) {
  212. llvm::OwningArrayRef<char> storage;
  213. auto result = BuildCStrArgs("tool", {"arg1", "arg2"}, storage);
  214. ASSERT_THAT(result.size(), Eq(3));
  215. EXPECT_THAT(result[0], StrEq("tool"));
  216. EXPECT_THAT(result[1], StrEq("arg1"));
  217. EXPECT_THAT(result[2], StrEq("arg2"));
  218. }
  219. TEST(BuildCStrArgsWithPrefix, NoArgs) {
  220. llvm::OwningArrayRef<char> storage;
  221. auto result = BuildCStrArgs("tool", {}, {}, storage);
  222. ASSERT_THAT(result.size(), Eq(1));
  223. EXPECT_THAT(result[0], StrEq("tool"));
  224. }
  225. TEST(BuildCStrArgsWithPrefix, PrefixOnly) {
  226. llvm::OwningArrayRef<char> storage;
  227. std::string prefix_args[] = {"p_arg1", "p_arg2"};
  228. auto result = BuildCStrArgs("tool", prefix_args, {}, storage);
  229. ASSERT_THAT(result.size(), Eq(3));
  230. EXPECT_THAT(result[0], StrEq("tool"));
  231. EXPECT_THAT(result[1], Eq(prefix_args[0].c_str()));
  232. EXPECT_THAT(result[2], Eq(prefix_args[1].c_str()));
  233. }
  234. TEST(BuildCStrArgsWithPrefix, ArgsOnly) {
  235. llvm::OwningArrayRef<char> storage;
  236. auto result = BuildCStrArgs("tool", {}, {"arg1", "arg2"}, storage);
  237. ASSERT_THAT(result.size(), Eq(3));
  238. EXPECT_THAT(result[0], StrEq("tool"));
  239. EXPECT_THAT(result[1], StrEq("arg1"));
  240. EXPECT_THAT(result[2], StrEq("arg2"));
  241. }
  242. TEST(BuildCStrArgsWithPrefix, BothPrefixAndArgs) {
  243. llvm::OwningArrayRef<char> storage;
  244. std::string prefix_args[] = {"p_arg1", "p_arg2"};
  245. auto result = BuildCStrArgs("tool", prefix_args, {"arg1", "arg2"}, storage);
  246. ASSERT_THAT(result.size(), Eq(5));
  247. EXPECT_THAT(result[0], StrEq("tool"));
  248. EXPECT_THAT(result[1], Eq(prefix_args[0].c_str()));
  249. EXPECT_THAT(result[2], Eq(prefix_args[1].c_str()));
  250. EXPECT_THAT(result[3], StrEq("arg1"));
  251. EXPECT_THAT(result[4], StrEq("arg2"));
  252. }
  253. } // namespace
  254. } // namespace Carbon