filesystem.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "common/filesystem.h"
  5. #include <fcntl.h>
  6. #include <unistd.h>
  7. #include "common/build_data.h"
  8. #include "llvm/Support/MathExtras.h"
  9. namespace Carbon::Filesystem {
  10. // Render an error number from `errno` to the provided stream using the richest
  11. // rendering available on the platform.
  12. static auto PrintErrorNumber(llvm::raw_ostream& out, int errnum) -> void {
  13. #if defined(_GNU_SOURCE) && \
  14. (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 32))
  15. // For sufficiently recent glibc versions, use GNU-specific routines to
  16. // compute the error name and description.
  17. llvm::StringRef name = strerrordesc_np(errnum);
  18. llvm::StringRef desc = strerrorname_np(errnum);
  19. out << llvm::formatv("{0}: {1}", name, desc);
  20. #elif defined(__APPLE__) || defined(_GNU_SOURCE) || defined(_POSIX_SOURCE)
  21. // Broadly portable fallback for Unix-like systems.
  22. char buffer[4096];
  23. #ifdef _GNU_SOURCE
  24. const char* str = strerror_r(errnum, buffer, sizeof(buffer));
  25. // The GNU version doesn't report a meta-error.
  26. int meta_error = 0;
  27. #else
  28. int meta_error = strerror_r(errnum, buffer, sizeof(buffer));
  29. const char* str = buffer;
  30. #endif
  31. if (meta_error == 0) {
  32. out << llvm::formatv("errno {0}: {1}", errnum, llvm::StringRef(str));
  33. } else {
  34. out << llvm::formatv(
  35. "error number {0}; encountered meta-error number {1} while rendering "
  36. "an error message",
  37. errnum, meta_error);
  38. }
  39. #else
  40. #error TODO: Implement this for other platforms.
  41. #endif
  42. }
  43. auto FdError::Print(llvm::raw_ostream& out) const -> void {
  44. // The `format_` member is a `StringLiteral` that is null terminated, so
  45. // `.data()` is safe here.
  46. // NOLINTNEXTLINE(bugprone-suspicious-stringview-data-usage)
  47. out << llvm::formatv(format_.data(), fd_) << " failed: ";
  48. PrintErrorNumber(out, unix_errnum());
  49. }
  50. auto PathError::Print(llvm::raw_ostream& out) const -> void {
  51. // The `format_` member is a `StringLiteral` that is null terminated, so
  52. // `.data()` is safe here.
  53. // NOLINTNEXTLINE(bugprone-suspicious-stringview-data-usage)
  54. out << llvm::formatv(format_.data(), path_, dir_fd_) << " failed: ";
  55. PrintErrorNumber(out, unix_errnum());
  56. }
  57. auto Internal::FileRefBase::ReadToString() -> ErrorOr<std::string, FdError> {
  58. std::string result;
  59. // Read a buffer at a time until we reach the end. We use the pipe buffer
  60. // length as our max buffer size as it is likely to be small but reasonable
  61. // for the OS, and in the case of pipes the same chunking in which the data
  62. // will arrive.
  63. //
  64. // TODO: Replace this with a smaller buffer and using `resize_and_overwrite`
  65. // to read into the string in-place for larger strings. Unclear if that will
  66. // be any faster, but it will be much more friendly to callers with
  67. // constrained stack sizes and use less memory overall.
  68. std::byte buffer[PIPE_BUF];
  69. for (;;) {
  70. auto read_result = ReadToBuffer(buffer);
  71. if (!read_result.ok()) {
  72. return std::move(read_result).error();
  73. }
  74. if (read_result->empty()) {
  75. // EOF
  76. break;
  77. }
  78. result.append(reinterpret_cast<const char*>(read_result->data()),
  79. read_result->size());
  80. }
  81. return result;
  82. }
  83. auto Internal::FileRefBase::WriteFromString(llvm::StringRef str)
  84. -> ErrorOr<Success, FdError> {
  85. auto bytes = llvm::ArrayRef<std::byte>(
  86. reinterpret_cast<const std::byte*>(str.data()), str.size());
  87. while (!bytes.empty()) {
  88. auto write_result = WriteFromBuffer(bytes);
  89. if (!write_result.ok()) {
  90. return std::move(write_result).error();
  91. }
  92. bytes = *write_result;
  93. }
  94. return Success();
  95. }
  96. auto DirRef::OpenDir(const std::filesystem::path& path,
  97. CreationOptions creation_options, ModeType creation_mode,
  98. OpenFlags open_flags) -> ErrorOr<Dir, PathError> {
  99. // If we potentially need to create a directory, we have to do that
  100. // separately as no systems support `O_CREAT | O_DIRECTORY`, even though
  101. // that would be (much) nicer.
  102. if (creation_options == CreateNew) {
  103. // If we are required to be the one that created the directory, disable
  104. // following the last symlink when we open that directory. The last symlink
  105. // is the only one that matters for security here because it is only valid
  106. // to create the last component. It is that directory component that we want
  107. // to ensure has not been replaced with a symlink by an adversarial
  108. // concurrent process.
  109. open_flags |= OpenFlags::NoFollow;
  110. }
  111. if (creation_options != OpenExisting) {
  112. CARBON_CHECK(creation_options != CreateAlways,
  113. "Invalid `creation_options` value of `CreateAlways`: there is "
  114. "no support for truncating directories, and so they cannot be "
  115. "created in an analogous way to files if they already exist.");
  116. if (mkdirat(dfd_, path.c_str(), creation_mode) != 0) {
  117. // Unless the error is just that the path already exists, and that is
  118. // allowed for the requested creation flags, report any error here as part
  119. // of opening just like we would if the error originated from `openat`
  120. // with `O_CREAT`.
  121. if (creation_options == CreateNew || errno != EEXIST) {
  122. return PathError(errno,
  123. "Calling `mkdirat` on '{0}' relative to '{1}' during "
  124. "DirRef::OpenDir",
  125. path, dfd_);
  126. }
  127. }
  128. }
  129. // Open this path as a directory. Note that this has to succeed, and when we
  130. // created the directory we require the last component to not be a symlink in
  131. // case it was _replaced_ with a symlink while running.
  132. int result_fd =
  133. openat(dfd_, path.c_str(), static_cast<int>(open_flags) | O_DIRECTORY);
  134. if (result_fd == -1) {
  135. // No need for `EINTR` handling here as if this is a FIFO it would be an
  136. // error with `O_DIRECTORY`.
  137. return PathError(
  138. errno,
  139. "Calling `openat` on '{0}' relative to '{1}' during DirRef::OpenDir",
  140. path, dfd_);
  141. }
  142. Dir result(result_fd);
  143. // If we were required to create the directory, we also need to verify that
  144. // the opened file descriptor continues to have the same permissions and the
  145. // correct owner as we couldn't do the creation atomically with the open. This
  146. // defends against an adversarial removal of the created directory and
  147. // creation of a new directory with the same name but either with wider
  148. // permissions such as all-write, or with a different owner.
  149. //
  150. // We don't defend against replacement with a directory of the same name, same
  151. // permissions, same owner, but different group. There is no good way to do
  152. // this defense given the complexity of group assignment, and there appears to
  153. // be no need. Achieving such a replacement without superuser power would
  154. // require a parent directory with `setgid` bit, and a group that gives the
  155. // attacker access -- but such a parent directory would make *any* creation
  156. // vulnerable without any need for a replacement, so we can't defend against
  157. // that here. The caller has ample tools to defend against this including
  158. // taking care with the parent directory and restricting the group permission
  159. // bits which we *do* verify.
  160. if (creation_options == CreateNew) {
  161. auto stat_result = result.Stat();
  162. if (!stat_result.ok()) {
  163. // Manually propagate this error so we can attach it back to the opened
  164. // path and relative directory.
  165. return PathError(stat_result.error().unix_errnum(),
  166. "DirRef::Stat after opening '{0}' relative to '{1}'",
  167. path, dfd_);
  168. }
  169. // Check that the owning UID is the current effective UID.
  170. if (stat_result->unix_uid() != geteuid()) {
  171. // Model this as `EPERM`, which is a bit awkward, but should be fine.
  172. return PathError(EPERM,
  173. "Unexpected UID change after creating '{0}' relative to "
  174. "'{1}' during DirRef::OpenDir",
  175. path, dfd_);
  176. }
  177. // Check that the permissions are a subset of the requested ones. They may
  178. // have been masked down by `umask`, but if there are *new* permissions,
  179. // that would be a security issue.
  180. if ((stat_result->permissions() & creation_mode) !=
  181. stat_result->permissions()) {
  182. // Model this with `EPERM` and a custom message.
  183. return PathError(EPERM,
  184. "Unexpected permissions after creating '{0}' relative "
  185. "to '{1}' during DirRef::OpenDir",
  186. path, dfd_);
  187. }
  188. }
  189. return result;
  190. }
  191. auto DirRef::ReadFileToString(const std::filesystem::path& path)
  192. -> ErrorOr<std::string, PathError> {
  193. CARBON_ASSIGN_OR_RETURN(ReadFile f, OpenReadOnly(path));
  194. auto result = f.ReadToString();
  195. if (result.ok()) {
  196. return *std::move(result);
  197. }
  198. return PathError(result.error().unix_errnum(),
  199. "Dir::ReadFileToString on '{0}' relative to '{1}'", path,
  200. dfd_);
  201. }
  202. auto DirRef::WriteFileFromString(const std::filesystem::path& path,
  203. llvm::StringRef content,
  204. CreationOptions creation_options)
  205. -> ErrorOr<Success, PathError> {
  206. CARBON_ASSIGN_OR_RETURN(WriteFile f, OpenWriteOnly(path, creation_options));
  207. auto write_result = f.WriteFromString(content);
  208. if (!write_result.ok()) {
  209. return PathError(
  210. write_result.error().unix_errnum(),
  211. "Write error in Dir::WriteFileFromString on '{0}' relative to '{1}'",
  212. path, dfd_);
  213. }
  214. auto close_result = std::move(f).Close();
  215. if (!close_result.ok()) {
  216. return PathError(
  217. close_result.error().unix_errnum(),
  218. "Close error in Dir::WriteFileFromString on '{0}' relative to '{1}'",
  219. path, dfd_);
  220. }
  221. return Success();
  222. }
  223. auto DirRef::CreateDirectories(const std::filesystem::path& path,
  224. ModeType creation_mode)
  225. -> ErrorOr<Dir, PathError> {
  226. // Avoid having to handle an empty path by immediately rejecting it as
  227. // invalid.
  228. if (path.empty()) {
  229. return PathError(EINVAL,
  230. "DirRef::CreateDirectories on '{0}' relative to '{1}'",
  231. path, dfd_);
  232. }
  233. // Try directly opening the directory and use that if successful. This is an
  234. // important hot path case of users essentially doing an "open-always" form of
  235. // creating multiple steps of directories.
  236. auto open_result = OpenDir(path, OpenExisting);
  237. if (open_result.ok()) {
  238. return std::move(*open_result);
  239. } else if (!open_result.error().no_entity()) {
  240. return std::move(open_result).error();
  241. }
  242. // Walk from the full path towards this directory (or the root) to find the
  243. // first existing directory. This is faster than walking down as no file
  244. // descriptors have to be allocated for any intervening directories, etc. We
  245. // keep the path components that are missing as we pop them off for easy
  246. // traversal back down.
  247. std::optional<Dir> work_dir;
  248. // Paths typically consist of relatively few components
  249. // and so we can use a bit of stack and avoid allocating and moving the paths
  250. // in common cases. We use `8` as an arbitrary but likely good for all of the
  251. // hottest cases.
  252. llvm::SmallVector<std::filesystem::path, 8> missing_components;
  253. missing_components.push_back(path.filename());
  254. for (std::filesystem::path parent_path = path.parent_path();
  255. !parent_path.empty(); parent_path = parent_path.parent_path()) {
  256. auto open_result = OpenDir(parent_path, OpenExisting);
  257. if (open_result.ok()) {
  258. work_dir = std::move(*open_result);
  259. break;
  260. }
  261. missing_components.push_back(parent_path.filename());
  262. }
  263. CARBON_CHECK(!missing_components.empty());
  264. // If we haven't yet opened an intermediate directory, start by creating one
  265. // relative to this directory. We can't do this as part of the loop below as
  266. // `this` and the newly opened directory have different types.
  267. if (!work_dir) {
  268. std::filesystem::path component = missing_components.pop_back_val();
  269. CARBON_ASSIGN_OR_RETURN(
  270. Dir component_dir,
  271. OpenDir(component, CreationOptions::OpenAlways, creation_mode));
  272. // Move this component into our temporary directory slot.
  273. work_dir = std::move(component_dir);
  274. }
  275. // Now walk through the remaining components opening and creating each
  276. // relative to the previous.
  277. while (!missing_components.empty()) {
  278. std::filesystem::path component = missing_components.pop_back_val();
  279. CARBON_ASSIGN_OR_RETURN(
  280. Dir component_dir,
  281. work_dir->OpenDir(component, CreationOptions::OpenAlways,
  282. creation_mode));
  283. // Close the current temporary directory and move the new component
  284. // directory object into its place.
  285. work_dir = std::move(component_dir);
  286. }
  287. CARBON_CHECK(work_dir,
  288. "Should always have created at least one directory for a "
  289. "non-empty path!");
  290. return std::move(work_dir).value();
  291. }
  292. auto DirRef::Rmtree(const std::filesystem::path& path)
  293. -> ErrorOr<Success, PathError> {
  294. struct DirAndIterator {
  295. DirRef::Reader dir;
  296. ssize_t dir_entry_start;
  297. };
  298. llvm::SmallVector<DirAndIterator> dir_stack;
  299. llvm::SmallVector<std::filesystem::path> dir_entries;
  300. llvm::SmallVector<std::filesystem::path> unknown_entries;
  301. dir_entries.push_back(path);
  302. for (;;) {
  303. // When we bottom out, we're removing the initial tree path and doing so
  304. // relative to `this` directory.
  305. DirRef current = dir_stack.empty() ? *this : dir_stack.back().dir;
  306. ssize_t dir_entry_start =
  307. dir_stack.empty() ? 0 : dir_stack.back().dir_entry_start;
  308. // If we've finished all the child directories of the current entry in the
  309. // stack, pop it off and continue.
  310. if (dir_entry_start == static_cast<ssize_t>(dir_entries.size())) {
  311. dir_stack.pop_back();
  312. continue;
  313. }
  314. CARBON_CHECK(dir_entry_start < static_cast<ssize_t>(dir_entries.size()));
  315. // Take the last entry under the current directory and try removing it.
  316. const std::filesystem::path& entry_path = dir_entries.back();
  317. auto rmdir_result = current.Rmdir(entry_path);
  318. if (rmdir_result.ok() || rmdir_result.error().no_entity()) {
  319. // Removed here or elsewhere already, so pop the entry.
  320. dir_entries.pop_back();
  321. if (dir_entries.empty()) {
  322. // The last entry is the input path with an empty stack, so we've
  323. // finished at this point.
  324. CARBON_CHECK(dir_stack.empty());
  325. return Success();
  326. }
  327. continue;
  328. }
  329. // If we get any error other than not-empty, just return that.
  330. if (!rmdir_result.error().not_empty()) {
  331. return std::move(rmdir_result).error();
  332. }
  333. // Recurse into the subdirectory since it isn't empty, opening it, getting a
  334. // reader, and pushing it onto our stack.
  335. CARBON_ASSIGN_OR_RETURN(Dir subdir, current.OpenDir(entry_path));
  336. auto read_result = std::move(subdir).TakeAndRead();
  337. if (!read_result.ok()) {
  338. return PathError(
  339. read_result.error().unix_errnum(),
  340. "Dir::Read on '{0}' relative to '{1}' during RmdirRecursively",
  341. entry_path, current.dfd_);
  342. }
  343. dir_stack.push_back(
  344. {*std::move(read_result), static_cast<ssize_t>(dir_entries.size())});
  345. // Now read the directory entries. It would be nice to be able to directly
  346. // remove the files and empty directories as we find them when reading, and
  347. // the POSIX spec appears to require that to work, but testing shows at
  348. // least some Linux environments don't work reliably in this case and will
  349. // fail to visit some entries entirely. As a consequence, we walk the entire
  350. // directory and collect the entries into data structures before beginning
  351. // to remove them.
  352. DirRef::Reader& subdir_reader = dir_stack.back().dir;
  353. for (const auto& entry : subdir_reader) {
  354. llvm::StringRef name = entry.name();
  355. if (name == "." || name == "..") {
  356. continue;
  357. }
  358. if (entry.is_known_dir()) {
  359. dir_entries.push_back(name.str());
  360. } else {
  361. // We end up here for entries known to be regular files, other kinds of
  362. // non-directory entries, or when the entry kind isn't known.
  363. //
  364. // Unless we *know* the entry is a directory, we put it into the unknown
  365. // entries. For these, we unlink them first in case they are
  366. // non-directory entries and use the failure of that to move any
  367. // directories that end up here to the directory entries list.
  368. unknown_entries.push_back(name.str());
  369. }
  370. }
  371. // We can immediately try to unlink all the unknown entries, which will
  372. // include any regular files, and use an error on directories that were
  373. // unknown above to switch them to the `dir_entries` list.
  374. while (!unknown_entries.empty()) {
  375. std::filesystem::path name = unknown_entries.pop_back_val();
  376. auto unlink_result = subdir_reader.Unlink(name);
  377. if (unlink_result.ok() || unlink_result.error().no_entity()) {
  378. continue;
  379. } else if (!unlink_result.error().is_dir()) {
  380. return std::move(unlink_result).error();
  381. }
  382. dir_entries.push_back(std::move(name));
  383. }
  384. // We'll handle the directory entries we've queued here in the next
  385. // iteration, removing them or recursing as needed.
  386. }
  387. }
  388. auto DirRef::ReadlinkSlow(const std::filesystem::path& path)
  389. -> ErrorOr<std::string, PathError> {
  390. constexpr ssize_t MinBufferSize =
  391. #ifdef PATH_MAX
  392. PATH_MAX
  393. #else
  394. 1024
  395. #endif
  396. ;
  397. // Read directly into a string to avoid allocating two large buffers.
  398. std::string large_buffer;
  399. // Stat the symlink to get an initial guess at the size.
  400. CARBON_ASSIGN_OR_RETURN(FileStatus status, Lstat(path));
  401. // We try to use the size from the `lstat` unless it is empty, in which case
  402. // we try to use our minimum buffer size which is `PATH_MAX` or a constant
  403. // value. We have a fallback to dynamically discover an adequate buffer size
  404. // below that will handle any inaccuracy.
  405. ssize_t buffer_size = status.size();
  406. if (buffer_size == 0) {
  407. buffer_size = MinBufferSize;
  408. }
  409. large_buffer.resize(status.size());
  410. ssize_t result =
  411. readlinkat(dfd_, path.c_str(), large_buffer.data(), large_buffer.size());
  412. if (result == -1) {
  413. return PathError(errno, "Readlink on '{0}' relative to '{1}'", path, dfd_);
  414. }
  415. // Now the really bad fallback case: if there are racing writes to the
  416. // symlink, the guessed size may not have been large enough. As a last-ditch
  417. // effort, begin doubling (from the next power of two >= our min buffer size)
  418. // the length until it fits. We cap this at 10 MiB to prevent egregious file
  419. // system contents (or some bug somewhere) from exhausting memory.
  420. constexpr ssize_t MaxBufferSize = 10 << 20;
  421. while (result == static_cast<ssize_t>(large_buffer.size())) {
  422. int64_t next_buffer_size = std::max<ssize_t>(
  423. MinBufferSize, llvm::NextPowerOf2(large_buffer.size()));
  424. if (next_buffer_size > MaxBufferSize) {
  425. return PathError(ENOMEM, "Readlink on '{0}' relative to '{1}'", path,
  426. dfd_);
  427. }
  428. large_buffer.resize(next_buffer_size);
  429. result = readlinkat(dfd_, path.c_str(), large_buffer.data(),
  430. large_buffer.size());
  431. if (result == -1) {
  432. return PathError(errno, "Readlink on '{0}' relative to '{1}'", path,
  433. dfd_);
  434. }
  435. }
  436. // Fix-up the size of the string and return it.
  437. large_buffer.resize(result);
  438. return large_buffer;
  439. }
  440. auto MakeTmpDir() -> ErrorOr<RemovingDir, Error> {
  441. std::filesystem::path tmpdir_path = "/tmp";
  442. // We use both `TEST_TMPDIR` and `TMPDIR`. The `TEST_TMPDIR` is set by Bazel
  443. // and preferred to keep tests using the expected output tree rather than
  444. // the system temporary directory.
  445. for (const char* tmpdir_env_name : {"TEST_TMPDIR", "TMPDIR"}) {
  446. const char* tmpdir_env_cstr = getenv(tmpdir_env_name);
  447. if (tmpdir_env_cstr == nullptr) {
  448. continue;
  449. }
  450. std::filesystem::path tmpdir_env = tmpdir_env_cstr;
  451. if (!tmpdir_env.is_absolute()) {
  452. continue;
  453. }
  454. tmpdir_path = std::move(tmpdir_env);
  455. break;
  456. }
  457. std::filesystem::path target = BuildData::BuildTarget.str();
  458. tmpdir_path /= target.filename();
  459. tmpdir_path += ".XXXXXX";
  460. std::string tmpdir_path_buffer = tmpdir_path.native();
  461. char* result = mkdtemp(tmpdir_path_buffer.data());
  462. if (result == nullptr) {
  463. RawStringOstream os;
  464. os << llvm::formatv("Calling mkdtemp on '{0}' failed: ",
  465. tmpdir_path.native());
  466. PrintErrorNumber(os, errno);
  467. return Error(os.TakeStr());
  468. }
  469. CARBON_CHECK(result == tmpdir_path_buffer.data(),
  470. "`mkdtemp` used a modified path");
  471. tmpdir_path = std::move(tmpdir_path_buffer);
  472. // Because `mkdtemp` doesn't return an open directory atomically, open the
  473. // created directory and perform safety checks similar to `OpenDir` when
  474. // creating a new directory.
  475. CARBON_ASSIGN_OR_RETURN(
  476. Dir tmp, Cwd().OpenDir(tmpdir_path, OpenExisting, /*creation_mode=*/0,
  477. OpenFlags::NoFollow));
  478. // Make sure we try to remove the directory from here on out.
  479. RemovingDir result_dir(std::move(tmp), tmpdir_path);
  480. // It's a bit awkward to report `fstat` errors as `Error`s, but we
  481. // don't have much choice. The stat failing here would be very weird.
  482. CARBON_ASSIGN_OR_RETURN(FileStatus stat, result_dir.Stat());
  483. // The permissions must be exactly 0700 for a temporary directory, and the UID
  484. // should be ours.
  485. if (stat.permissions() != 0700 && stat.unix_uid() != geteuid()) {
  486. return Error(
  487. llvm::formatv("Found incorrect permissions or UID on tmpdir '{0}'",
  488. tmpdir_path.native())
  489. .str());
  490. }
  491. return result_dir;
  492. }
  493. } // namespace Carbon::Filesystem