source_gen.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "testing/base/source_gen.h"
  5. #include <numeric>
  6. #include "llvm/ADT/ArrayRef.h"
  7. #include "llvm/ADT/Sequence.h"
  8. #include "llvm/ADT/SmallVector.h"
  9. #include "llvm/ADT/StringExtras.h"
  10. #include "llvm/Support/FormatVariadic.h"
  11. #include "toolchain/lex/token_kind.h"
  12. namespace Carbon::Testing {
  13. auto SourceGen::Global() -> SourceGen& {
  14. static SourceGen global_gen;
  15. return global_gen;
  16. }
  17. SourceGen::SourceGen(Language language) : language_(language) {}
  18. // Some heuristic numbers used when formatting generated code. These heuristics
  19. // are loosely based on what we expect to make Carbon code readable, and might
  20. // not fit as well in C++, but we use the same heuristics across languages for
  21. // simplicity and to make the output in different languages more directly
  22. // comparable.
  23. constexpr static int NumSingleLineFunctionParams = 3;
  24. constexpr static int NumSingleLineMethodParams = 2;
  25. constexpr static int MaxParamsPerLine = 4;
  26. static auto EstimateAvgFunctionDeclLines(SourceGen::FunctionDeclParams params)
  27. -> double {
  28. // Currently model a uniform distribution [0, max] parameters. Assume a line
  29. // break before the first parameter for >3 and after every 4th.
  30. int param_lines = 0;
  31. for (int num_params : llvm::seq_inclusive(0, params.max_params)) {
  32. if (num_params > NumSingleLineFunctionParams) {
  33. param_lines += (num_params + MaxParamsPerLine - 1) / MaxParamsPerLine;
  34. }
  35. }
  36. return 1.0 + static_cast<double>(param_lines) / (params.max_params + 1);
  37. }
  38. static auto EstimateAvgMethodDeclLines(SourceGen::MethodDeclParams params)
  39. -> double {
  40. // Currently model a uniform distribution [0, max] parameters. Assume a line
  41. // break before the first parameter for >2 and after every 4th.
  42. int param_lines = 0;
  43. for (int num_params : llvm::seq_inclusive(0, params.max_params)) {
  44. if (num_params > NumSingleLineMethodParams) {
  45. param_lines += (num_params + MaxParamsPerLine - 1) / MaxParamsPerLine;
  46. }
  47. }
  48. return 1.0 + static_cast<double>(param_lines) / (params.max_params + 1);
  49. }
  50. // Note that this should match the heuristics used when formatting.
  51. // TODO: See top-level TODO about line estimates and formatting.
  52. static auto EstimateAvgClassDefLines(SourceGen::ClassParams params) -> double {
  53. // Comment line, and class open line.
  54. double avg = 2.0;
  55. // One comment line and blank line per function, plus the function lines.
  56. avg +=
  57. (2.0 + EstimateAvgFunctionDeclLines(params.public_function_decl_params)) *
  58. params.public_function_decls;
  59. avg += (2.0 + EstimateAvgMethodDeclLines(params.public_method_decl_params)) *
  60. params.public_method_decls;
  61. avg += (2.0 +
  62. EstimateAvgFunctionDeclLines(params.private_function_decl_params)) *
  63. params.private_function_decls;
  64. avg += (2.0 + EstimateAvgMethodDeclLines(params.private_method_decl_params)) *
  65. params.private_method_decls;
  66. // A blank line and all the fields (if any).
  67. if (params.private_field_decls > 0) {
  68. avg += 1.0 + params.private_field_decls;
  69. }
  70. // No need to account for the class close line, we have an extra blank line
  71. // count for the last of the above.
  72. return avg;
  73. }
  74. auto SourceGen::GenAPIFileDenseDecls(int target_lines, DenseDeclParams params)
  75. -> std::string {
  76. std::string source;
  77. llvm::raw_string_ostream os(source);
  78. // Figure out how many classes fit in our target lines, each separated by a
  79. // blank line. We need to account the comment lines below to start the file.
  80. // Note that we want a blank line after our file comment block, so every class
  81. // needs a blank line.
  82. constexpr int NumFileCommentLines = 4;
  83. double avg_class_lines = EstimateAvgClassDefLines(params.class_params);
  84. CARBON_CHECK(target_lines > NumFileCommentLines + avg_class_lines)
  85. << "Not enough target lines to generate a single class!";
  86. int num_classes = static_cast<double>(target_lines - NumFileCommentLines) /
  87. (avg_class_lines + 1);
  88. int expected_lines =
  89. NumFileCommentLines + num_classes * (avg_class_lines + 1);
  90. os << "// Generated " << (!IsCpp() ? "Carbon" : "C++") << " source file.\n";
  91. os << llvm::formatv("// {0} target lines: {1} classes, {2} expected lines",
  92. target_lines, num_classes, expected_lines)
  93. << "\n";
  94. os << "//\n// Generating as an API file with dense declarations.\n";
  95. auto class_gen_state = GetClassGenState(num_classes, params.class_params);
  96. for ([[maybe_unused]] int _ : llvm::seq(num_classes)) {
  97. os << "\n";
  98. GenerateClassDef(params.class_params, class_gen_state, os);
  99. }
  100. // Make sure we consumed all the state.
  101. CARBON_CHECK(class_gen_state.public_function_param_counts.empty());
  102. CARBON_CHECK(class_gen_state.public_method_param_counts.empty());
  103. CARBON_CHECK(class_gen_state.private_function_param_counts.empty());
  104. CARBON_CHECK(class_gen_state.private_method_param_counts.empty());
  105. CARBON_CHECK(class_gen_state.class_names.empty());
  106. return source;
  107. }
  108. auto SourceGen::GetShuffledIdentifiers(int number, int min_length,
  109. int max_length, bool uniform)
  110. -> llvm::SmallVector<llvm::StringRef> {
  111. llvm::SmallVector<llvm::StringRef> idents =
  112. GetIdentifiers(number, min_length, max_length, uniform);
  113. std::shuffle(idents.begin(), idents.end(), rng_);
  114. return idents;
  115. }
  116. auto SourceGen::GetShuffledUniqueIdentifiers(int number, int min_length,
  117. int max_length, bool uniform)
  118. -> llvm::SmallVector<llvm::StringRef> {
  119. CARBON_CHECK(min_length >= 4)
  120. << "Cannot trivially guarantee enough distinct, unique identifiers for "
  121. "lengths <= 3";
  122. llvm::SmallVector<llvm::StringRef> idents =
  123. GetUniqueIdentifiers(number, min_length, max_length, uniform);
  124. std::shuffle(idents.begin(), idents.end(), rng_);
  125. return idents;
  126. }
  127. auto SourceGen::GetIdentifiers(int number, int min_length, int max_length,
  128. bool uniform)
  129. -> llvm::SmallVector<llvm::StringRef> {
  130. llvm::SmallVector<llvm::StringRef> idents = GetIdentifiersImpl(
  131. number, min_length, max_length, uniform,
  132. [this](int length, int length_count,
  133. llvm::SmallVectorImpl<llvm::StringRef>& dest) {
  134. auto length_idents = GetSingleLengthIdentifiers(length, length_count);
  135. dest.append(length_idents.begin(), length_idents.end());
  136. });
  137. return idents;
  138. }
  139. auto SourceGen::GetUniqueIdentifiers(int number, int min_length, int max_length,
  140. bool uniform)
  141. -> llvm::SmallVector<llvm::StringRef> {
  142. CARBON_CHECK(min_length >= 4)
  143. << "Cannot trivially guarantee enough distinct, unique identifiers for "
  144. "lengths <= 3";
  145. llvm::SmallVector<llvm::StringRef> idents =
  146. GetIdentifiersImpl(number, min_length, max_length, uniform,
  147. [this](int length, int length_count,
  148. llvm::SmallVectorImpl<llvm::StringRef>& dest) {
  149. AppendUniqueIdentifiers(length, length_count, dest);
  150. });
  151. return idents;
  152. }
  153. auto SourceGen::GetSingleLengthIdentifiers(int length, int number)
  154. -> llvm::ArrayRef<llvm::StringRef> {
  155. llvm::SmallVector<llvm::StringRef>& idents =
  156. identifiers_by_length_.Insert(length, {}).value();
  157. if (static_cast<int>(idents.size()) < number) {
  158. idents.reserve(number);
  159. for ([[maybe_unused]] int _ : llvm::seq<int>(idents.size(), number)) {
  160. auto ident_storage =
  161. llvm::MutableArrayRef(reinterpret_cast<char*>(storage_.Allocate(
  162. /*Size=*/length, /*Alignment=*/1)),
  163. length);
  164. GenerateRandomIdentifier(ident_storage);
  165. llvm::StringRef new_id(ident_storage.data(), length);
  166. idents.push_back(new_id);
  167. }
  168. CARBON_CHECK(static_cast<int>(idents.size()) == number);
  169. }
  170. return llvm::ArrayRef(idents).slice(0, number);
  171. }
  172. static auto IdentifierStartChars() -> llvm::ArrayRef<char> {
  173. static llvm::SmallVector<char> chars = [] {
  174. llvm::SmallVector<char> chars;
  175. for (char c : llvm::seq_inclusive('A', 'Z')) {
  176. chars.push_back(c);
  177. }
  178. for (char c : llvm::seq_inclusive('a', 'z')) {
  179. chars.push_back(c);
  180. }
  181. return chars;
  182. }();
  183. return chars;
  184. }
  185. static auto IdentifierChars() -> llvm::ArrayRef<char> {
  186. static llvm::SmallVector<char> chars = [] {
  187. llvm::ArrayRef<char> start_chars = IdentifierStartChars();
  188. llvm::SmallVector<char> chars(start_chars.begin(), start_chars.end());
  189. chars.push_back('_');
  190. for (char c : llvm::seq_inclusive('0', '9')) {
  191. chars.push_back(c);
  192. }
  193. return chars;
  194. }();
  195. return chars;
  196. }
  197. constexpr static llvm::StringRef NonCarbonCppKeywords[] = {
  198. "asm", "do", "double", "float", "int", "long",
  199. "new", "signed", "try", "unix", "unsigned", "xor",
  200. };
  201. // Returns a random identifier string of the specified length.
  202. //
  203. // Ensures this is a valid identifier, avoiding any overlapping syntaxes or
  204. // keywords both in Carbon and C++.
  205. //
  206. // This routine is somewhat expensive and so is useful to cache and reduce the
  207. // frequency of calls. However, each time it is called it computes a completely
  208. // new random identifier and so can be useful to eventually find a distinct
  209. // identifier when needed.
  210. auto SourceGen::GenerateRandomIdentifier(
  211. llvm::MutableArrayRef<char> dest_storage) -> void {
  212. llvm::ArrayRef<char> start_chars = IdentifierStartChars();
  213. llvm::ArrayRef<char> chars = IdentifierChars();
  214. auto ident = llvm::StringRef(dest_storage.data(), dest_storage.size());
  215. do {
  216. dest_storage[0] =
  217. start_chars[absl::Uniform<int>(rng_, 0, start_chars.size())];
  218. for (int i : llvm::seq<int>(1, dest_storage.size())) {
  219. dest_storage[i] = chars[absl::Uniform<int>(rng_, 0, chars.size())];
  220. }
  221. } while (
  222. // TODO: Clean up and simplify this code. With some small refactorings and
  223. // post-processing we should be able to make this both easier to read and
  224. // less inefficient.
  225. llvm::any_of(
  226. Lex::TokenKind::KeywordTokens,
  227. [ident](auto token) { return ident == token.fixed_spelling(); }) ||
  228. llvm::is_contained(NonCarbonCppKeywords, ident) ||
  229. (llvm::is_contained({'i', 'u', 'f'}, ident[0]) &&
  230. llvm::all_of(ident.substr(1),
  231. [](const char c) { return llvm::isDigit(c); })));
  232. }
  233. // Appends a number of unique, random identifiers with a particular length to
  234. // the provided destination vector.
  235. //
  236. // Uses, and when necessary grows, a cached sequence of random identifiers with
  237. // the specified length. Because these are cached, this is efficient to call
  238. // repeatedly, but will not produce a different sequence of identifiers.
  239. auto SourceGen::AppendUniqueIdentifiers(
  240. int length, int number, llvm::SmallVectorImpl<llvm::StringRef>& dest)
  241. -> void {
  242. auto& [count, unique_idents] =
  243. unique_identifiers_by_length_.Insert(length, {}).value();
  244. // See if we need to grow our pool of unique identifiers with the requested
  245. // length.
  246. if (count < number) {
  247. // We'll need to insert exactly the requested new unique identifiers. All
  248. // our other inserts will find an existing entry.
  249. unique_idents.GrowForInsertCount(count - number);
  250. // Generate the needed number of identifiers.
  251. for ([[maybe_unused]] int _ : llvm::seq<int>(count, number)) {
  252. // Allocate stable storage for the identifier so we can form stable
  253. // `StringRef`s to it.
  254. auto ident_storage =
  255. llvm::MutableArrayRef(reinterpret_cast<char*>(storage_.Allocate(
  256. /*Size=*/length, /*Alignment=*/1)),
  257. length);
  258. // Repeatedly generate novel identifiers of this length until we find a
  259. // new unique one.
  260. for (;;) {
  261. GenerateRandomIdentifier(ident_storage);
  262. auto result =
  263. unique_idents.Insert(llvm::StringRef(ident_storage.data(), length));
  264. if (result.is_inserted()) {
  265. break;
  266. }
  267. }
  268. }
  269. count = number;
  270. }
  271. // Append all the identifiers directly out of the set. We make no guarantees
  272. // about the relative order so we just use the non-deterministic order of the
  273. // set and avoid additional storage.
  274. //
  275. // TODO: It's awkward the `ForEach` here can't early-exit. This just walks the
  276. // whole set which is harmless if inefficient. We should add early exiting
  277. // the loop support to `Set` and update this code.
  278. unique_idents.ForEach([&](llvm::StringRef ident) {
  279. if (number > 0) {
  280. dest.push_back(ident);
  281. --number;
  282. }
  283. });
  284. CARBON_CHECK(number == 0);
  285. }
  286. // An array of the counts that should be used for each identifier length to
  287. // produce our desired distribution.
  288. //
  289. // Note that the zero-based index corresponds to a 1-based length, so the count
  290. // for identifiers of length 1 is at index 0.
  291. static constexpr std::array<int, 64> IdentifierLengthCounts = [] {
  292. std::array<int, 64> ident_length_counts;
  293. // For non-uniform distribution, we simulate a distribution roughly based on
  294. // the observed histogram of identifier lengths, but smoothed a bit and
  295. // reduced to small counts so that we cycle through all the lengths
  296. // reasonably quickly. We want sampling of even 10% of NumTokens from this
  297. // in a round-robin form to not be skewed overly much. This still inherently
  298. // compresses the long tail as we'd rather have coverage even though it
  299. // distorts the distribution a bit.
  300. //
  301. // The distribution here comes from a script that analyzes source code run
  302. // over a few directories of LLVM. The script renders a visual ascii-art
  303. // histogram along with the data for each bucket, and that output is
  304. // included in comments above each bucket size below to help visualize the
  305. // rough shape we're aiming for.
  306. //
  307. // 1 characters [3976] ███████████████████████████████▊
  308. ident_length_counts[0] = 40;
  309. // 2 characters [3724] █████████████████████████████▊
  310. ident_length_counts[1] = 40;
  311. // 3 characters [4173] █████████████████████████████████▍
  312. ident_length_counts[2] = 40;
  313. // 4 characters [5000] ████████████████████████████████████████
  314. ident_length_counts[3] = 50;
  315. // 5 characters [1568] ████████████▌
  316. ident_length_counts[4] = 20;
  317. // 6 characters [2226] █████████████████▊
  318. ident_length_counts[5] = 20;
  319. // 7 characters [2380] ███████████████████
  320. ident_length_counts[6] = 20;
  321. // 8 characters [1786] ██████████████▎
  322. ident_length_counts[7] = 18;
  323. // 9 characters [1397] ███████████▏
  324. ident_length_counts[8] = 12;
  325. // 10 characters [ 739] █████▉
  326. ident_length_counts[9] = 12;
  327. // 11 characters [ 779] ██████▎
  328. ident_length_counts[10] = 12;
  329. // 12 characters [1344] ██████████▊
  330. ident_length_counts[11] = 12;
  331. // 13 characters [ 498] ████
  332. ident_length_counts[12] = 5;
  333. // 14 characters [ 284] ██▎
  334. ident_length_counts[13] = 3;
  335. // 15 characters [ 172] █▍
  336. // 16 characters [ 278] ██▎
  337. // 17 characters [ 191] █▌
  338. // 18 characters [ 207] █▋
  339. for (int i = 14; i < 18; ++i) {
  340. ident_length_counts[i] = 2;
  341. }
  342. // 19 - 63 characters are all <100 but non-zero, and we map them to 1 for
  343. // coverage despite slightly over weighting the tail.
  344. for (int i = 18; i < 64; ++i) {
  345. ident_length_counts[i] = 1;
  346. }
  347. return ident_length_counts;
  348. }();
  349. // A helper to sum elements of a range.
  350. template <typename T>
  351. static auto Sum(const T& range) -> int {
  352. return std::accumulate(range.begin(), range.end(), 0);
  353. }
  354. // A template function that implements the common logic of `GetIdentifiers` and
  355. // `GetUniqueIdentifiers`. Most parameters correspond to the parameters of those
  356. // functions. Additionally, an `AppendFunc` callable is provided to implement
  357. // the appending operation.
  358. //
  359. // The main functionality provided here is collecting the correct number of
  360. // identifiers from each of the lengths in the range [min_length, max_length]
  361. // and either in our default representative distribution or a uniform
  362. // distribution.
  363. auto SourceGen::GetIdentifiersImpl(int number, int min_length, int max_length,
  364. bool uniform,
  365. llvm::function_ref<AppendFn> append)
  366. -> llvm::SmallVector<llvm::StringRef> {
  367. CARBON_CHECK(min_length <= max_length);
  368. CARBON_CHECK(uniform || max_length <= 64)
  369. << "Cannot produce a meaningful non-uniform distribution of lengths "
  370. "longer than 64 as those are exceedingly rare in our observed data "
  371. "sets.";
  372. llvm::SmallVector<llvm::StringRef> idents;
  373. idents.reserve(number);
  374. // First, compute the total weight of the distribution so we know how many
  375. // identifiers we'll get each time we collect from it.
  376. int num_lengths = max_length - min_length + 1;
  377. auto length_counts =
  378. llvm::ArrayRef(IdentifierLengthCounts).slice(min_length - 1, num_lengths);
  379. int count_sum = uniform ? num_lengths : Sum(length_counts);
  380. CARBON_CHECK(count_sum >= 1);
  381. int number_rem = number % count_sum;
  382. // Finally, walk through each length in the distribution.
  383. for (int length : llvm::seq_inclusive(min_length, max_length)) {
  384. // Scale how many identifiers we want of this length if computing a
  385. // non-uniform distribution. For uniform, we always take one.
  386. int scale = uniform ? 1 : IdentifierLengthCounts[length - 1];
  387. // Now we can compute how many identifiers of this length to request.
  388. int length_count = (number / count_sum) * scale;
  389. if (number_rem > 0) {
  390. int rem_adjustment = std::min(scale, number_rem);
  391. length_count += rem_adjustment;
  392. number_rem -= rem_adjustment;
  393. }
  394. append(length, length_count, idents);
  395. }
  396. CARBON_CHECK(number_rem == 0)
  397. << "Unexpected number remaining: " << number_rem;
  398. CARBON_CHECK(static_cast<int>(idents.size()) == number)
  399. << "Ended up with " << idents.size()
  400. << " identifiers instead of the requested " << number;
  401. return idents;
  402. }
  403. // Returns a shuffled sequence of integers in the range [min, max].
  404. //
  405. // The order of the returned integers is random, but each integer in the range
  406. // appears the same number of times in the result, with the number of
  407. // appearances rounded up for lower numbers and rounded down for higher numbers
  408. // in order to exactly produce `number` results.
  409. auto SourceGen::GetShuffledInts(int number, int min, int max)
  410. -> llvm::SmallVector<int> {
  411. llvm::SmallVector<int> ints;
  412. ints.reserve(number);
  413. // Evenly distribute to each value between min and max.
  414. int num_values = max - min + 1;
  415. for (int i : llvm::seq_inclusive(min, max)) {
  416. int i_count = number / num_values;
  417. i_count += i < (min + (number % num_values));
  418. ints.append(i_count, i);
  419. }
  420. CARBON_CHECK(static_cast<int>(ints.size()) == number);
  421. std::shuffle(ints.begin(), ints.end(), rng_);
  422. return ints;
  423. }
  424. // Given a number of class definitions and the params with which to generate
  425. // them, builds the state that will be used while generating that many classes.
  426. //
  427. // We build the state first and across all the class definitions that will be
  428. // generated so that we can distribute random components across all the
  429. // definitions.
  430. auto SourceGen::GetClassGenState(int number, ClassParams params)
  431. -> ClassGenState {
  432. ClassGenState state;
  433. state.public_function_param_counts =
  434. GetShuffledInts(number * params.public_function_decls, 0,
  435. params.public_function_decl_params.max_params);
  436. state.public_method_param_counts =
  437. GetShuffledInts(number * params.public_method_decls, 0,
  438. params.public_method_decl_params.max_params);
  439. state.private_function_param_counts =
  440. GetShuffledInts(number * params.private_function_decls, 0,
  441. params.private_function_decl_params.max_params);
  442. state.private_method_param_counts =
  443. GetShuffledInts(number * params.private_method_decls, 0,
  444. params.private_method_decl_params.max_params);
  445. state.class_names = GetShuffledUniqueIdentifiers(number, /*min_length=*/5);
  446. int num_members =
  447. number * (params.public_function_decls + params.public_method_decls +
  448. params.private_function_decls + params.private_method_decls +
  449. params.private_field_decls);
  450. state.member_names = GetShuffledIdentifiers(num_members, /*min_length=*/4);
  451. int num_params = Sum(state.public_function_param_counts) +
  452. Sum(state.public_method_param_counts) +
  453. Sum(state.private_function_param_counts) +
  454. Sum(state.private_method_param_counts);
  455. state.param_names = GetShuffledIdentifiers(num_params);
  456. return state;
  457. }
  458. // A helper to pop series of unique identifiers off a sequence of random
  459. // identifiers that may have duplicates.
  460. //
  461. // This is particularly designed to work with the sequences of non-unique
  462. // identifiers produced by `GetShuffledIdentifiers` with the important property
  463. // that while popping off unique identifiers found in the shuffled list, we
  464. // don't change the distribution of identifier lengths.
  465. //
  466. // The uniqueness is only per-instance of the class, and so an instance can be
  467. // used to extract a series of names that share a scope.
  468. //
  469. // It works by scanning the sequence to extract each unique identifier found,
  470. // swapping it to the back and popping it off the list. This does shuffle the
  471. // order, but it isn't expected to do so in an interesting way.
  472. //
  473. // It also provides a fallback path in case there are no unique identifiers left
  474. // which computes fresh, random identifiers with the same length as the next one
  475. // in the sequence until a unique one is found.
  476. //
  477. // For simplicity of the fallback path, the lifetime of the identifiers produced
  478. // is bound to the lifetime of the popper instance, and not the generator as a
  479. // whole. If this is ever a problematic constraint, we can start copying
  480. // fallback identifiers into the generator's storage.
  481. class SourceGen::UniqueIdentifierPopper {
  482. public:
  483. explicit UniqueIdentifierPopper(SourceGen& gen,
  484. llvm::SmallVectorImpl<llvm::StringRef>& data)
  485. : gen_(&gen), data_(&data), it_(data_->rbegin()) {}
  486. // Pop the next unique identifier that can be found in the data, or synthesize
  487. // one with a valid length. Always consumes exactly one identifier from the
  488. // data.
  489. //
  490. // Note that the lifetime of the underlying identifier is that of the popper
  491. // and not the underlying data.
  492. auto Pop() -> llvm::StringRef {
  493. for (auto end = data_->rend(); it_ != end; ++it_) {
  494. auto insert = set_.Insert(*it_);
  495. if (!insert.is_inserted()) {
  496. continue;
  497. }
  498. if (it_ != data_->rbegin()) {
  499. std::swap(*data_->rbegin(), *it_);
  500. }
  501. CARBON_CHECK(insert.key() == data_->back());
  502. return data_->pop_back_val();
  503. }
  504. // Out of unique elements. Overwrite the back, preserving its length,
  505. // generating a new identifiers until we find a unique one and return that.
  506. // This ensures we continue to consume the structure and produce the same
  507. // size identifiers even in the fallback.
  508. int length = data_->pop_back_val().size();
  509. auto fallback_ident_storage =
  510. llvm::MutableArrayRef(reinterpret_cast<char*>(gen_->storage_.Allocate(
  511. /*Size=*/length, /*Alignment=*/1)),
  512. length);
  513. for (;;) {
  514. gen_->GenerateRandomIdentifier(fallback_ident_storage);
  515. auto fallback_id = llvm::StringRef(fallback_ident_storage.data(), length);
  516. if (set_.Insert(fallback_id).is_inserted()) {
  517. return fallback_id;
  518. }
  519. }
  520. }
  521. private:
  522. SourceGen* gen_;
  523. llvm::SmallVectorImpl<llvm::StringRef>* data_;
  524. llvm::SmallVectorImpl<llvm::StringRef>::reverse_iterator it_;
  525. Set<llvm::StringRef> set_;
  526. };
  527. // Generates a function declaration and writes it to the provided stream.
  528. //
  529. // The declaration can be configured with a function name, private modifier,
  530. // whether it is a method, the parameter count, an how indented it is.
  531. //
  532. // This is also provided a collection of identifiers to consume as parameter
  533. // names -- it will use a unique popper to extract unique parameter names from
  534. // this collection.
  535. auto SourceGen::GenerateFunctionDecl(
  536. llvm::StringRef name, bool is_private, bool is_method, int param_count,
  537. llvm::StringRef indent, llvm::SmallVectorImpl<llvm::StringRef>& param_names,
  538. llvm::raw_ostream& os) -> void {
  539. os << indent << "// TODO: make better comment text\n";
  540. if (!IsCpp()) {
  541. os << indent << (is_private ? "private " : "") << "fn " << name;
  542. if (is_method) {
  543. os << "[self: Self]";
  544. }
  545. } else {
  546. os << indent;
  547. if (!is_method) {
  548. os << "static ";
  549. }
  550. os << "auto " << name;
  551. }
  552. os << "(";
  553. if (param_count >
  554. (is_method ? NumSingleLineMethodParams : NumSingleLineFunctionParams)) {
  555. os << "\n" << indent << " ";
  556. }
  557. UniqueIdentifierPopper unique_param_names(*this, param_names);
  558. for (int i : llvm::seq(param_count)) {
  559. if (i > 0) {
  560. if ((i % MaxParamsPerLine) == 0) {
  561. os << ",\n" << indent << " ";
  562. } else {
  563. os << ", ";
  564. }
  565. }
  566. if (!IsCpp()) {
  567. os << unique_param_names.Pop() << ": i32";
  568. } else {
  569. os << "int " << unique_param_names.Pop();
  570. }
  571. }
  572. os << ")" << (IsCpp() ? " -> void" : "") << ";\n";
  573. }
  574. // Generate a class definition and write it to the provided stream.
  575. //
  576. // The structure of the definition is guided by the `params` provided, and it
  577. // consumes the provided state.
  578. auto SourceGen::GenerateClassDef(const ClassParams& params,
  579. ClassGenState& state, llvm::raw_ostream& os)
  580. -> void {
  581. os << "// TODO: make better comment text\n";
  582. os << "class " << state.class_names.pop_back_val() << " {\n";
  583. if (IsCpp()) {
  584. os << " public:\n";
  585. }
  586. UniqueIdentifierPopper unique_member_names(*this, state.member_names);
  587. llvm::ListSeparator line_sep("\n");
  588. for ([[maybe_unused]] int _ : llvm::seq(params.public_function_decls)) {
  589. os << line_sep;
  590. GenerateFunctionDecl(unique_member_names.Pop(), /*is_private=*/false,
  591. /*is_method=*/false,
  592. state.public_function_param_counts.pop_back_val(),
  593. /*indent=*/" ", state.param_names, os);
  594. }
  595. for ([[maybe_unused]] int _ : llvm::seq(params.public_method_decls)) {
  596. os << line_sep;
  597. GenerateFunctionDecl(unique_member_names.Pop(), /*is_private=*/false,
  598. /*is_method=*/true,
  599. state.public_method_param_counts.pop_back_val(),
  600. /*indent=*/" ", state.param_names, os);
  601. }
  602. if (IsCpp()) {
  603. os << "\n private:\n";
  604. // Reset the separator.
  605. line_sep = llvm::ListSeparator("\n");
  606. }
  607. for ([[maybe_unused]] int _ : llvm::seq(params.private_function_decls)) {
  608. os << line_sep;
  609. GenerateFunctionDecl(unique_member_names.Pop(), /*is_private=*/true,
  610. /*is_method=*/false,
  611. state.private_function_param_counts.pop_back_val(),
  612. /*indent=*/" ", state.param_names, os);
  613. }
  614. for ([[maybe_unused]] int _ : llvm::seq(params.private_method_decls)) {
  615. os << line_sep;
  616. GenerateFunctionDecl(unique_member_names.Pop(), /*is_private=*/true,
  617. /*is_method=*/true,
  618. state.private_method_param_counts.pop_back_val(),
  619. /*indent=*/" ", state.param_names, os);
  620. }
  621. os << line_sep;
  622. for ([[maybe_unused]] int _ : llvm::seq(params.private_field_decls)) {
  623. if (!IsCpp()) {
  624. os << " private var " << unique_member_names.Pop() << ": i32;\n";
  625. } else {
  626. os << " int " << unique_member_names.Pop() << ";\n";
  627. }
  628. }
  629. os << "}" << (IsCpp() ? ";" : "") << "\n";
  630. }
  631. } // namespace Carbon::Testing