rewriter.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "migrate_cpp/rewriter.h"
  5. #include "clang/Tooling/Tooling.h"
  6. #include "llvm/ADT/Twine.h"
  7. #include "llvm/Support/FormatVariadic.h"
  8. namespace Carbon {
  9. static constexpr const char CppPlaceholder[] = "__cpp__{ ... }";
  10. auto OutputWriter::Write(clang::SourceLocation loc,
  11. const OutputSegment& segment) const -> bool {
  12. return std::visit(
  13. [&](auto& content) {
  14. using type = std::decay_t<decltype(content)>;
  15. auto [begin, end] = bounds;
  16. if constexpr (std::is_same_v<type, std::string>) {
  17. auto begin_offset = source_manager.getDecomposedLoc(loc).second;
  18. // Append the string replacement if the node being replaced falls
  19. // within `bounds`.
  20. if (begin <= begin_offset && begin_offset < end) {
  21. output.append(content);
  22. }
  23. } else if constexpr (std::is_same_v<type, clang::DynTypedNode> ||
  24. std::is_same_v<type, clang::TypeLoc>) {
  25. auto content_loc = content.getSourceRange().getBegin();
  26. auto begin_offset =
  27. source_manager.getDecomposedLoc(content_loc).second;
  28. // If the node we're considering a replacement for is already beyond
  29. // the region for which we want to make a replacement, exit early
  30. // declaring that we have completed replacements (by returning false).
  31. // Otherwise proceed. Note that we do not exit early or skip anything
  32. // if the node comes before the relevant region. This is because many
  33. // nodes in Clang's AST have a starting source location but a
  34. // meaningless end location, and while the start of the segment may
  35. // not be in the range, as we recurse, sub-segments may indeed end up
  36. // being printed.
  37. if (begin_offset >= end) {
  38. return false;
  39. }
  40. if (auto iter = map.find(content); iter == map.end()) {
  41. output.append(CppPlaceholder);
  42. } else {
  43. for (const auto& output_segment : iter->second) {
  44. if (!Write(content.getSourceRange().getBegin(), output_segment)) {
  45. return false;
  46. }
  47. }
  48. }
  49. } else {
  50. static_assert(std::is_void_v<type>,
  51. "Failed to handle a case in the `std::variant`.");
  52. }
  53. return true;
  54. },
  55. segment.content_);
  56. }
  57. auto MigrationConsumer::HandleTranslationUnit(clang::ASTContext& context)
  58. -> void {
  59. RewriteBuilder rewriter(context, segment_map_);
  60. rewriter.TraverseAST(context);
  61. auto translation_unit_node =
  62. clang::DynTypedNode::create(*context.getTranslationUnitDecl());
  63. auto iter = segment_map_.find(translation_unit_node);
  64. if (iter == segment_map_.end()) {
  65. result_.append(CppPlaceholder);
  66. } else {
  67. OutputWriter w{
  68. .map = segment_map_,
  69. .bounds = output_range_,
  70. .source_manager = context.getSourceManager(),
  71. .output = result_,
  72. };
  73. for (const auto& output_segment : iter->second) {
  74. w.Write(translation_unit_node.getSourceRange().getBegin(),
  75. output_segment);
  76. }
  77. }
  78. }
  79. auto RewriteBuilder::TextFor(clang::SourceLocation begin,
  80. clang::SourceLocation end) const
  81. -> llvm::StringRef {
  82. auto range = clang::CharSourceRange::getCharRange(begin, end);
  83. return clang::Lexer::getSourceText(range, context_.getSourceManager(),
  84. context_.getLangOpts());
  85. }
  86. auto RewriteBuilder::TextForTokenAt(clang::SourceLocation loc) const
  87. -> llvm::StringRef {
  88. auto& source_manager = context_.getSourceManager();
  89. auto [file_id, offset] = source_manager.getDecomposedLoc(loc);
  90. llvm::StringRef file = source_manager.getBufferData(file_id);
  91. clang::Lexer lexer(source_manager.getLocForStartOfFile(file_id),
  92. context_.getLangOpts(), file.begin(), file.data() + offset,
  93. file.end());
  94. clang::Token token;
  95. lexer.LexFromRawLexer(token);
  96. return TextFor(loc, loc.getLocWithOffset(token.getLength()));
  97. }
  98. // TODO: The output written in this member function needs to be
  99. // architecture-dependent. Moreover, even if the output is correct in the sense
  100. // that the types match and are interoperable between Carbon and C++, they may
  101. // not be semantically correct: If the C++ code specifies the type `long`, and
  102. // on the platform for which the migration is occurring `long` has 64-bits, we
  103. // may not want to use `i64` as the replacement: The C++ code may be intended to
  104. // operate in environments where `long` is only 32-bits wide. We need to develop
  105. // a strategy for determining builtin-type replacements that addresses these
  106. // issues.
  107. auto RewriteBuilder::VisitBuiltinTypeLoc(clang::BuiltinTypeLoc type_loc)
  108. -> bool {
  109. llvm::StringRef content;
  110. switch (type_loc.getTypePtr()->getKind()) {
  111. case clang::BuiltinType::Bool:
  112. content = "bool";
  113. break;
  114. case clang::BuiltinType::Char_U:
  115. content = "char";
  116. break;
  117. case clang::BuiltinType::UChar:
  118. content = "u8";
  119. break;
  120. case clang::BuiltinType::UShort:
  121. content = "u16";
  122. break;
  123. case clang::BuiltinType::UInt:
  124. content = "u32";
  125. break;
  126. case clang::BuiltinType::ULong:
  127. content = "u64";
  128. break;
  129. case clang::BuiltinType::ULongLong:
  130. content = "u64";
  131. break;
  132. case clang::BuiltinType::UInt128:
  133. content = "u128";
  134. break;
  135. case clang::BuiltinType::Char_S:
  136. content = "char";
  137. break;
  138. case clang::BuiltinType::SChar:
  139. content = "i8";
  140. break;
  141. case clang::BuiltinType::Short:
  142. content = "i16";
  143. break;
  144. case clang::BuiltinType::Int:
  145. content = "i32";
  146. break;
  147. case clang::BuiltinType::Long:
  148. content = "i64";
  149. break;
  150. case clang::BuiltinType::LongLong:
  151. content = "i64";
  152. break;
  153. case clang::BuiltinType::Int128:
  154. content = "i128";
  155. break;
  156. case clang::BuiltinType::Float:
  157. content = "f32";
  158. break;
  159. case clang::BuiltinType::Double:
  160. content = "f64";
  161. break;
  162. case clang::BuiltinType::Void:
  163. content = "()";
  164. break;
  165. default:
  166. // In this case we do not know what the output should be so we do not
  167. // write any.
  168. return true;
  169. }
  170. SetReplacement(type_loc, OutputSegment(content));
  171. return true;
  172. }
  173. auto RewriteBuilder::VisitCXXBoolLiteralExpr(clang::CXXBoolLiteralExpr* expr)
  174. -> bool {
  175. SetReplacement(expr, OutputSegment(expr->getValue() ? "true" : "false"));
  176. return true;
  177. }
  178. auto RewriteBuilder::VisitDeclRefExpr(clang::DeclRefExpr* expr) -> bool {
  179. SetReplacement(expr, OutputSegment(TextForTokenAt(expr->getBeginLoc())));
  180. return true;
  181. }
  182. auto RewriteBuilder::VisitDeclStmt(clang::DeclStmt* stmt) -> bool {
  183. std::vector<OutputSegment> segments;
  184. for (clang::Decl* decl : stmt->decls()) {
  185. segments.push_back(OutputSegment(decl));
  186. segments.push_back(OutputSegment(";\n"));
  187. }
  188. SetReplacement(stmt, std::move(segments));
  189. return true;
  190. }
  191. auto RewriteBuilder::VisitImplicitCastExpr(clang::ImplicitCastExpr* expr)
  192. -> bool {
  193. SetReplacement(expr, OutputSegment(expr->getSubExpr()));
  194. return true;
  195. }
  196. auto RewriteBuilder::VisitIntegerLiteral(clang::IntegerLiteral* expr) -> bool {
  197. // TODO: Replace suffixes.
  198. std::string text(TextForTokenAt(expr->getBeginLoc()));
  199. for (char& c : text) {
  200. // Carbon uses underscores for digit separators whereas C++ uses single
  201. // quotation marks. Convert all `'` to `_`.
  202. if (c == '\'') {
  203. c = '_';
  204. }
  205. }
  206. SetReplacement(expr, {OutputSegment(std::move(text))});
  207. return true;
  208. }
  209. auto RewriteBuilder::VisitParmVarDecl(clang::ParmVarDecl* decl) -> bool {
  210. llvm::StringRef name = decl->getName();
  211. std::vector<OutputSegment> segments = {
  212. OutputSegment(llvm::formatv("{0}: ", name.empty() ? "_" : name.str())),
  213. OutputSegment(decl->getTypeSourceInfo()->getTypeLoc()),
  214. };
  215. if (clang::Expr* init = decl->getInit()) {
  216. segments.push_back(OutputSegment(" = "));
  217. segments.push_back(OutputSegment(init));
  218. }
  219. SetReplacement(decl, std::move(segments));
  220. return true;
  221. }
  222. auto RewriteBuilder::VisitPointerTypeLoc(clang::PointerTypeLoc type_loc)
  223. -> bool {
  224. SetReplacement(type_loc,
  225. {OutputSegment(type_loc.getPointeeLoc()), OutputSegment("*")});
  226. return true;
  227. }
  228. auto RewriteBuilder::VisitReturnStmt(clang::ReturnStmt* stmt) -> bool {
  229. SetReplacement(
  230. stmt, {OutputSegment("return "), OutputSegment(stmt->getRetValue())});
  231. return true;
  232. }
  233. auto RewriteBuilder::VisitTranslationUnitDecl(clang::TranslationUnitDecl* decl)
  234. -> bool {
  235. std::vector<OutputSegment> segments;
  236. // Clang starts each translation unit with some initial `TypeDefDecl`s that
  237. // are not part of the written text. We want to skip past these initial
  238. // declarations, which we do by ignoring any node of type `TypeDefDecl` which
  239. // has an invalid source location.
  240. auto iter = decl->decls_begin();
  241. while (iter != decl->decls_end() && llvm::isa<clang::TypedefDecl>(*iter) &&
  242. (*iter)->getLocation().isInvalid()) {
  243. ++iter;
  244. }
  245. for (; iter != decl->decls_end(); ++iter) {
  246. clang::Decl* d = *iter;
  247. segments.push_back(OutputSegment(d));
  248. // Function definitions do not need semicolons.
  249. bool needs_semicolon = !(llvm::isa<clang::FunctionDecl>(d) &&
  250. llvm::cast<clang::FunctionDecl>(d)->hasBody());
  251. segments.push_back(OutputSegment(needs_semicolon ? ";\n" : "\n"));
  252. }
  253. SetReplacement(decl, std::move(segments));
  254. return true;
  255. }
  256. auto RewriteBuilder::VisitUnaryOperator(clang::UnaryOperator* expr) -> bool {
  257. switch (expr->getOpcode()) {
  258. case clang::UO_AddrOf:
  259. SetReplacement(expr,
  260. {OutputSegment("&"), OutputSegment(expr->getSubExpr())});
  261. break;
  262. default:
  263. // TODO: Finish implementing cases.
  264. break;
  265. }
  266. return true;
  267. }
  268. auto RewriteBuilder::TraverseFunctionDecl(clang::FunctionDecl* decl) -> bool {
  269. clang::TypeLoc return_type_loc = decl->getFunctionTypeLoc().getReturnLoc();
  270. if (!TraverseTypeLoc(return_type_loc)) {
  271. return false;
  272. }
  273. std::vector<OutputSegment> segments;
  274. segments.push_back(
  275. OutputSegment(llvm::formatv("fn {0}(", decl->getNameAsString())));
  276. size_t i = 0;
  277. for (; i + 1 < decl->getNumParams(); ++i) {
  278. clang::ParmVarDecl* param = decl->getParamDecl(i);
  279. if (!TraverseDecl(param)) {
  280. return false;
  281. }
  282. segments.push_back(OutputSegment(param));
  283. segments.push_back(OutputSegment(", "));
  284. }
  285. if (i + 1 == decl->getNumParams()) {
  286. clang::ParmVarDecl* param = decl->getParamDecl(i);
  287. if (!TraverseDecl(param)) {
  288. return false;
  289. }
  290. segments.push_back(OutputSegment(param));
  291. }
  292. segments.push_back(OutputSegment(") -> "));
  293. segments.push_back(OutputSegment(return_type_loc));
  294. if (decl->hasBody()) {
  295. segments.push_back(OutputSegment(" {\n"));
  296. auto* stmts = llvm::dyn_cast<clang::CompoundStmt>(decl->getBody());
  297. for (clang::Stmt* stmt : stmts->body()) {
  298. if (!TraverseStmt(stmt)) {
  299. return false;
  300. }
  301. segments.push_back(OutputSegment(stmt));
  302. segments.push_back(OutputSegment(";\n"));
  303. }
  304. segments.push_back(OutputSegment("}"));
  305. }
  306. SetReplacement(decl, std::move(segments));
  307. return true;
  308. }
  309. auto RewriteBuilder::TraverseVarDecl(clang::VarDecl* decl) -> bool {
  310. clang::TypeLoc loc = decl->getTypeSourceInfo()->getTypeLoc();
  311. if (!TraverseTypeLoc(loc)) {
  312. return false;
  313. }
  314. // TODO: Check storage class. Determine what happens for static local
  315. // variables.
  316. bool is_const = decl->getType().isConstQualified();
  317. std::vector<OutputSegment> segments = {
  318. OutputSegment(llvm::formatv("{0} {1}: ", is_const ? "let" : "var",
  319. decl->getNameAsString())),
  320. OutputSegment(decl->getTypeSourceInfo()->getTypeLoc()),
  321. };
  322. if (clang::Expr* init = decl->getInit()) {
  323. if (!TraverseStmt(init)) {
  324. return false;
  325. }
  326. segments.push_back(OutputSegment(" = "));
  327. segments.push_back(OutputSegment(init));
  328. }
  329. SetReplacement(decl, std::move(segments));
  330. return true;
  331. }
  332. } // namespace Carbon