rewriter.cpp 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "migrate_cpp/rewriter.h"
  5. #include "clang/Tooling/Tooling.h"
  6. #include "llvm/ADT/Twine.h"
  7. #include "llvm/Support/FormatVariadic.h"
  8. namespace Carbon {
  9. static constexpr const char CppPlaceholder[] = "__cpp__{ ... }";
  10. auto OutputWriter::Write(clang::SourceLocation loc,
  11. const OutputSegment& segment) const -> bool {
  12. return std::visit(
  13. [&](auto& content) {
  14. using type = std::decay_t<decltype(content)>;
  15. auto [begin, end] = bounds;
  16. if constexpr (std::is_same_v<type, std::string>) {
  17. auto begin_offset = source_manager.getDecomposedLoc(loc).second;
  18. // Append the string replacement if the node being replaced falls
  19. // within `bounds`.
  20. if (begin <= begin_offset && begin_offset < end) {
  21. output.append(content);
  22. }
  23. } else if constexpr (std::is_same_v<type, clang::DynTypedNode> ||
  24. std::is_same_v<type, clang::TypeLoc>) {
  25. auto content_loc = content.getSourceRange().getBegin();
  26. auto begin_offset =
  27. source_manager.getDecomposedLoc(content_loc).second;
  28. // If the node we're considering a replacement for is already beyond
  29. // the region for which we want to make a replacement, exit early
  30. // declaring that we have completed replacements (by returning false).
  31. // Otherwise proceed. Note that we do not exit early or skip anything
  32. // if the node comes before the relevant region. This is because many
  33. // nodes in Clang's AST have a starting source location but a
  34. // meaningless end location, and while the start of the segment may
  35. // not be in the range, as we recurse, sub-segments may indeed end up
  36. // being printed.
  37. if (begin_offset >= end) {
  38. return false;
  39. }
  40. if (auto iter = map.find(content); iter == map.end()) {
  41. output.append(CppPlaceholder);
  42. } else {
  43. for (const auto& output_segment : iter->second) {
  44. if (!Write(content.getSourceRange().getBegin(), output_segment)) {
  45. return false;
  46. }
  47. }
  48. }
  49. } else {
  50. static_assert(std::is_void_v<type>,
  51. "Failed to handle a case in the `std::variant`.");
  52. }
  53. return true;
  54. },
  55. segment.content_);
  56. }
  57. auto MigrationConsumer::HandleTranslationUnit(clang::ASTContext& context)
  58. -> void {
  59. RewriteBuilder rewriter(context, segment_map_);
  60. rewriter.TraverseAST(context);
  61. auto translation_unit_node =
  62. clang::DynTypedNode::create(*context.getTranslationUnitDecl());
  63. auto iter = segment_map_.find(translation_unit_node);
  64. if (iter == segment_map_.end()) {
  65. result_.append(CppPlaceholder);
  66. } else {
  67. OutputWriter w{
  68. .map = segment_map_,
  69. .bounds = output_range_,
  70. .source_manager = context.getSourceManager(),
  71. .output = result_,
  72. };
  73. for (const auto& output_segment : iter->second) {
  74. w.Write(translation_unit_node.getSourceRange().getBegin(),
  75. output_segment);
  76. }
  77. }
  78. }
  79. auto RewriteBuilder::TextFor(clang::SourceLocation begin,
  80. clang::SourceLocation end) const
  81. -> llvm::StringRef {
  82. auto range = clang::CharSourceRange::getCharRange(begin, end);
  83. return clang::Lexer::getSourceText(range, context_.getSourceManager(),
  84. context_.getLangOpts());
  85. }
  86. auto RewriteBuilder::TextForTokenAt(clang::SourceLocation loc) const
  87. -> llvm::StringRef {
  88. auto& source_manager = context_.getSourceManager();
  89. auto [file_id, offset] = source_manager.getDecomposedLoc(loc);
  90. llvm::StringRef file = source_manager.getBufferData(file_id);
  91. clang::Lexer lexer(source_manager.getLocForStartOfFile(file_id),
  92. context_.getLangOpts(), file.begin(), file.data() + offset,
  93. file.end());
  94. clang::Token token;
  95. lexer.LexFromRawLexer(token);
  96. return TextFor(loc, loc.getLocWithOffset(token.getLength()));
  97. }
  98. // TODO: The output written in this member function needs to be
  99. // architecture-dependent. Moreover, even if the output is correct in the sense
  100. // that the types match and are interoperable between Carbon and C++, they may
  101. // not be semantically correct: If the C++ code specifies the type `long`, and
  102. // on the platform for which the migration is occurring `long` has 64-bits, we
  103. // may not want to use `i64` as the replacement: The C++ code may be intended to
  104. // operate in environments where `long` is only 32-bits wide. We need to develop
  105. // a strategy for determining builtin-type replacements that addresses these
  106. // issues.
  107. auto RewriteBuilder::VisitBuiltinTypeLoc(clang::BuiltinTypeLoc type_loc)
  108. -> bool {
  109. llvm::StringRef content;
  110. switch (type_loc.getTypePtr()->getKind()) {
  111. case clang::BuiltinType::Bool:
  112. content = "bool";
  113. break;
  114. case clang::BuiltinType::Char_U:
  115. content = "char";
  116. break;
  117. case clang::BuiltinType::UChar:
  118. content = "u8";
  119. break;
  120. case clang::BuiltinType::UShort:
  121. content = "u16";
  122. break;
  123. case clang::BuiltinType::UInt:
  124. content = "u32";
  125. break;
  126. case clang::BuiltinType::ULong:
  127. content = "u64";
  128. break;
  129. case clang::BuiltinType::ULongLong:
  130. content = "u64";
  131. break;
  132. case clang::BuiltinType::UInt128:
  133. content = "u128";
  134. break;
  135. case clang::BuiltinType::Char_S:
  136. content = "char";
  137. break;
  138. case clang::BuiltinType::SChar:
  139. content = "i8";
  140. break;
  141. case clang::BuiltinType::Short:
  142. content = "i16";
  143. break;
  144. case clang::BuiltinType::Int:
  145. content = "i32";
  146. break;
  147. case clang::BuiltinType::Long:
  148. content = "i64";
  149. break;
  150. case clang::BuiltinType::LongLong:
  151. content = "i64";
  152. break;
  153. case clang::BuiltinType::Int128:
  154. content = "i128";
  155. break;
  156. case clang::BuiltinType::Float:
  157. content = "f32";
  158. break;
  159. case clang::BuiltinType::Double:
  160. content = "f64";
  161. break;
  162. default:
  163. // In this case we do not know what the output should be so we do not
  164. // write any.
  165. return true;
  166. }
  167. SetReplacement(type_loc, OutputSegment(content));
  168. return true;
  169. }
  170. auto RewriteBuilder::VisitCXXBoolLiteralExpr(clang::CXXBoolLiteralExpr* expr)
  171. -> bool {
  172. SetReplacement(expr, OutputSegment(expr->getValue() ? "true" : "false"));
  173. return true;
  174. }
  175. auto RewriteBuilder::VisitDeclRefExpr(clang::DeclRefExpr* expr) -> bool {
  176. SetReplacement(expr, OutputSegment(TextForTokenAt(expr->getBeginLoc())));
  177. return true;
  178. }
  179. auto RewriteBuilder::VisitDeclStmt(clang::DeclStmt* stmt) -> bool {
  180. std::vector<OutputSegment> segments;
  181. for (clang::Decl* decl : stmt->decls()) {
  182. segments.push_back(OutputSegment(decl));
  183. segments.push_back(OutputSegment(";\n"));
  184. }
  185. SetReplacement(stmt, std::move(segments));
  186. return true;
  187. }
  188. auto RewriteBuilder::VisitIntegerLiteral(clang::IntegerLiteral* expr) -> bool {
  189. // TODO: Replace suffixes.
  190. std::string text(TextForTokenAt(expr->getBeginLoc()));
  191. for (char& c : text) {
  192. // Carbon uses underscores for digit separators whereas C++ uses single
  193. // quotation marks. Convert all `'` to `_`.
  194. if (c == '\'') {
  195. c = '_';
  196. }
  197. }
  198. SetReplacement(expr, {OutputSegment(std::move(text))});
  199. return true;
  200. }
  201. auto RewriteBuilder::VisitPointerTypeLoc(clang::PointerTypeLoc type_loc)
  202. -> bool {
  203. SetReplacement(type_loc,
  204. {OutputSegment(type_loc.getPointeeLoc()), OutputSegment("*")});
  205. return true;
  206. }
  207. auto RewriteBuilder::VisitTranslationUnitDecl(clang::TranslationUnitDecl* decl)
  208. -> bool {
  209. std::vector<OutputSegment> segments;
  210. // Clang starts each translation unit with some initial `TypeDefDecl`s that
  211. // are not part of the written text. We want to skip past these initial
  212. // declarations, which we do by ignoring any node of type `TypeDefDecl` which
  213. // has an invalid source location.
  214. auto iter = decl->decls_begin();
  215. while (iter != decl->decls_end() && llvm::isa<clang::TypedefDecl>(*iter) &&
  216. (*iter)->getLocation().isInvalid()) {
  217. ++iter;
  218. }
  219. for (; iter != decl->decls_end(); ++iter) {
  220. clang::Decl* d = *iter;
  221. segments.push_back(OutputSegment(d));
  222. segments.push_back(OutputSegment(";\n"));
  223. }
  224. SetReplacement(decl, std::move(segments));
  225. return true;
  226. }
  227. auto RewriteBuilder::VisitUnaryOperator(clang::UnaryOperator* expr) -> bool {
  228. switch (expr->getOpcode()) {
  229. case clang::UO_AddrOf:
  230. SetReplacement(expr,
  231. {OutputSegment("&"), OutputSegment(expr->getSubExpr())});
  232. break;
  233. default:
  234. // TODO: Finish implementing cases.
  235. break;
  236. }
  237. return true;
  238. }
  239. auto RewriteBuilder::VisitVarDecl(clang::VarDecl* decl) -> bool {
  240. // TODO: Check storage class. Determine what happens for static local
  241. // variables.
  242. bool is_const = decl->getType().isConstQualified();
  243. std::vector<OutputSegment> segments = {
  244. OutputSegment(llvm::formatv("{0} {1}: ", is_const ? "let" : "var",
  245. decl->getNameAsString())),
  246. OutputSegment(decl->getTypeSourceInfo()->getTypeLoc()),
  247. };
  248. if (clang::Expr* init = decl->getInit()) {
  249. segments.push_back(OutputSegment(" = "));
  250. segments.push_back(OutputSegment(init));
  251. }
  252. SetReplacement(decl, std::move(segments));
  253. return true;
  254. }
  255. } // namespace Carbon