filesystem.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "common/filesystem.h"
  5. #include <fcntl.h>
  6. #include <time.h>
  7. #include <unistd.h>
  8. #include <chrono>
  9. #include "common/build_data.h"
  10. #include "llvm/Support/MathExtras.h"
  11. namespace Carbon::Filesystem {
  12. // Render an error number from `errno` to the provided stream using the richest
  13. // rendering available on the platform.
  14. static auto PrintErrorNumber(llvm::raw_ostream& out, int errnum) -> void {
  15. #if defined(_GNU_SOURCE) && \
  16. (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 32))
  17. // For sufficiently recent glibc versions, use GNU-specific routines to
  18. // compute the error name and description.
  19. llvm::StringRef name = strerrordesc_np(errnum);
  20. llvm::StringRef desc = strerrorname_np(errnum);
  21. out << llvm::formatv("{0}: {1}", name, desc);
  22. #elif defined(__APPLE__) || defined(_GNU_SOURCE) || defined(_POSIX_SOURCE)
  23. // Broadly portable fallback for Unix-like systems.
  24. char buffer[4096];
  25. #ifdef _GNU_SOURCE
  26. const char* str = strerror_r(errnum, buffer, sizeof(buffer));
  27. // The GNU version doesn't report a meta-error.
  28. int meta_error = 0;
  29. #else
  30. int meta_error = strerror_r(errnum, buffer, sizeof(buffer));
  31. const char* str = buffer;
  32. #endif
  33. if (meta_error == 0) {
  34. out << llvm::formatv("errno {0}: {1}", errnum, llvm::StringRef(str));
  35. } else {
  36. out << llvm::formatv(
  37. "error number {0}; encountered meta-error number {1} while rendering "
  38. "an error message",
  39. errnum, meta_error);
  40. }
  41. #else
  42. #error TODO: Implement this for other platforms.
  43. #endif
  44. }
  45. auto FdError::Print(llvm::raw_ostream& out) const -> void {
  46. // The `format_` member is a `StringLiteral` that is null terminated, so
  47. // `.data()` is safe here.
  48. // NOLINTNEXTLINE(bugprone-suspicious-stringview-data-usage)
  49. out << llvm::formatv(format_.data(), fd_) << " failed: ";
  50. PrintErrorNumber(out, unix_errnum());
  51. }
  52. auto PathError::Print(llvm::raw_ostream& out) const -> void {
  53. // The `format_` member is a `StringLiteral` that is null terminated, so
  54. // `.data()` is safe here.
  55. // NOLINTNEXTLINE(bugprone-suspicious-stringview-data-usage)
  56. out << llvm::formatv(format_.data(), path_, dir_fd_) << " failed: ";
  57. PrintErrorNumber(out, unix_errnum());
  58. }
  59. auto Internal::FileRefBase::ReadFileToString()
  60. -> ErrorOr<std::string, FdError> {
  61. std::string result;
  62. // Read a buffer at a time until we reach the end. We use the pipe buffer
  63. // length as our max buffer size as it is likely to be small but reasonable
  64. // for the OS, and in the case of pipes the same chunking in which the data
  65. // will arrive.
  66. //
  67. // TODO: Replace this with a smaller buffer and using `resize_and_overwrite`
  68. // to read into the string in-place for larger strings. Unclear if that will
  69. // be any faster, but it will be much more friendly to callers with
  70. // constrained stack sizes and use less memory overall.
  71. std::byte buffer[PIPE_BUF];
  72. CARBON_RETURN_IF_ERROR(SeekFromBeginning(0));
  73. for (;;) {
  74. auto read_result = ReadToBuffer(buffer);
  75. if (!read_result.ok()) {
  76. return std::move(read_result).error();
  77. }
  78. if (read_result->empty()) {
  79. // EOF
  80. break;
  81. }
  82. result.append(reinterpret_cast<const char*>(read_result->data()),
  83. read_result->size());
  84. }
  85. return result;
  86. }
  87. auto Internal::FileRefBase::WriteFileFromString(llvm::StringRef str)
  88. -> ErrorOr<Success, FdError> {
  89. CARBON_RETURN_IF_ERROR(SeekFromBeginning(0));
  90. auto bytes = llvm::ArrayRef<std::byte>(
  91. reinterpret_cast<const std::byte*>(str.data()), str.size());
  92. while (!bytes.empty()) {
  93. auto write_result = WriteFromBuffer(bytes);
  94. if (!write_result.ok()) {
  95. return std::move(write_result).error();
  96. }
  97. bytes = *write_result;
  98. }
  99. CARBON_RETURN_IF_ERROR(Truncate(str.size()));
  100. return Success();
  101. }
  102. // A macOS specific sleep routine that builds on more standard utilities. This
  103. // is technically a portable implementation so we always compile it but only use
  104. // it on macOS where the more efficient direct use of `clock_nanosleep` isn't
  105. // available.
  106. [[maybe_unused]]
  107. static auto SleepMacos(Duration sleep) -> void {
  108. TimePoint stop = Clock::now() + sleep;
  109. timespec sleep_ts = Internal::DurationToTimespec(sleep);
  110. for (;;) {
  111. timespec rem_sleep_ts = {};
  112. int result = nanosleep(&sleep_ts, &rem_sleep_ts);
  113. if (result == 0) {
  114. return;
  115. }
  116. // Continue sleeping if we get interrupted by a resumable signal. For
  117. // everything else report it.
  118. if (errno != EINTR) {
  119. int errnum = errno;
  120. RawStringOstream error_os;
  121. PrintErrorNumber(error_os, errnum);
  122. CARBON_FATAL("Unexpected error while sleeping: {0}", error_os.TakeStr());
  123. }
  124. // Update to the remaining sleep time for the next attempt at sleeping.
  125. sleep_ts = rem_sleep_ts;
  126. // Also check if the clock has passed our stop time as a fallback to avoid
  127. // too much clock skew.
  128. if (Clock::now() > stop) {
  129. return;
  130. }
  131. }
  132. }
  133. static auto Sleep(Duration sleep) -> void {
  134. // For every platform but macOS we can sleep directly on an absolute time.
  135. #if __APPLE__
  136. // On Apple platforms, dispatch to a specialized routine.
  137. SleepMacos(sleep);
  138. #else
  139. // We use `clock_gettime` instead of the filesystem `Clock` or some other
  140. // `std::chrono` clock because we want to use the exact same clock that we'll
  141. // use for sleeping below, and we'll need the time in a `timespec` for that
  142. // call anyways. We do use a monotonic clock to try and avoid sleeps being
  143. // interrupted by clock changes.
  144. timespec ts = {};
  145. int result = clock_gettime(CLOCK_MONOTONIC, &ts);
  146. CARBON_CHECK(result == 0, "Error getting the time: {0}", strerror(errno));
  147. // Now convert the timespec to a duration that we can safely do arithmetic on.
  148. // Since the sleep interval is in nanoseconds it is tempting to directly do
  149. // arithmetic here, but this has a subtle pitfall near the boundary between
  150. // the nanosecond component and the second component.
  151. //
  152. // Note that our `Duration` uses `__int128` to avoid worrying about running
  153. // out of precision to represent the final deadline.
  154. Duration stop_time = std::chrono::seconds(ts.tv_sec);
  155. stop_time += std::chrono::nanoseconds(ts.tv_nsec);
  156. stop_time += sleep;
  157. // Now convert back to timespec.
  158. ts = Internal::DurationToTimespec(stop_time);
  159. do {
  160. result = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, nullptr);
  161. // Continue sleeping if we get interrupted by a resumable signal. Because
  162. // we're using a monotonic clock and an absolute deadline time we will
  163. // eventually progress past that deadline.
  164. } while (result != 0 && (errno == EINTR));
  165. if (result != 0) {
  166. int errnum = errno;
  167. RawStringOstream error_os;
  168. PrintErrorNumber(error_os, errnum);
  169. CARBON_FATAL("Unexpected error while sleeping: {0}", error_os.TakeStr());
  170. }
  171. #endif
  172. }
  173. auto Internal::FileRefBase::TryLock(FileLock::Kind kind, Duration deadline,
  174. Duration poll_interval)
  175. -> ErrorOr<FileLock, FdError> {
  176. CARBON_CHECK(poll_interval <= deadline);
  177. if (deadline != Duration(0) && poll_interval == Duration(0)) {
  178. // If the caller didn't provide a poll interval but did provide a deadline,
  179. // pick a poll interval to roughly be 1/1000th of the deadline but at least
  180. // 1 microsecond. We don't support polling faster than 1 microsecond given
  181. // how expensive file locking is.
  182. poll_interval =
  183. std::max(Duration(std::chrono::microseconds(1)), deadline / 1000);
  184. }
  185. if (deadline != Duration(0)) {
  186. CARBON_CHECK(
  187. deadline >= std::chrono::microseconds(10),
  188. "A deadline for a file lock shorter than 10 microseconds is not "
  189. "supported, callers can implement their own polling logic.");
  190. CARBON_CHECK(poll_interval >= std::chrono::microseconds(1),
  191. "Polling for a file lock faster than every microsecond is not "
  192. "supported, callers can implement their own polling logic.");
  193. }
  194. auto stop = Clock::now() + deadline;
  195. for (;;) {
  196. int result = flock(fd_, static_cast<int>(kind) | LOCK_NB);
  197. if (result == 0) {
  198. return FileLock(fd_);
  199. }
  200. // Return an error if this is something other than blocking for the lock to
  201. // be available, or we didn't get a deadline for continuing to try and
  202. // acquire the lock, or we've reached our deadline.
  203. if (errno != EWOULDBLOCK || deadline == Duration(0) ||
  204. Clock::now() >= stop) {
  205. return FdError(errno, "File::TryLock on '{0}'", fd_);
  206. }
  207. // The caller requested attempting to wait up to a deadline to acquire the
  208. // lock with a specific poll interval. Try to sleep for that poll interval
  209. // before trying the lock again.
  210. Sleep(poll_interval);
  211. }
  212. }
  213. auto DirRef::AppendEntriesIf(
  214. llvm::SmallVectorImpl<std::filesystem::path>& entries,
  215. llvm::function_ref<auto(llvm::StringRef name)->bool> predicate)
  216. -> ErrorOr<Success, FdError> {
  217. CARBON_ASSIGN_OR_RETURN(Reader reader, Read());
  218. for (const Entry& entry : reader) {
  219. llvm::StringRef name = entry.name();
  220. if (name == "." || name == "..") {
  221. continue;
  222. }
  223. if (predicate && !predicate(name)) {
  224. continue;
  225. }
  226. entries.push_back(name.str());
  227. }
  228. return Success();
  229. }
  230. auto DirRef::AppendEntriesIf(
  231. llvm::SmallVectorImpl<std::filesystem::path>& dir_entries,
  232. llvm::SmallVectorImpl<std::filesystem::path>& non_dir_entries,
  233. llvm::function_ref<auto(llvm::StringRef name)->bool> predicate)
  234. -> ErrorOr<Success, FdError> {
  235. CARBON_ASSIGN_OR_RETURN(Reader reader, Read());
  236. for (const Entry& entry : reader) {
  237. llvm::StringRef name = entry.name();
  238. if (name == "." || name == "..") {
  239. continue;
  240. }
  241. if (predicate && !predicate(name)) {
  242. continue;
  243. }
  244. std::filesystem::path name_path = name.str();
  245. if (entry.is_known_dir()) {
  246. dir_entries.push_back(std::move(name_path));
  247. continue;
  248. }
  249. if (!entry.is_unknown_type()) {
  250. non_dir_entries.push_back(std::move(name_path));
  251. continue;
  252. }
  253. auto stat_result = Lstat(name_path);
  254. if (!stat_result.ok()) {
  255. return FdError(stat_result.error().unix_errnum(),
  256. "Dir::AppendEntriesIf on '{0}' failed while stat-ing "
  257. "entries to determine which are directories",
  258. dfd_);
  259. }
  260. if (stat_result->is_dir()) {
  261. dir_entries.push_back(std::move(name_path));
  262. } else {
  263. non_dir_entries.push_back(std::move(name_path));
  264. }
  265. }
  266. return Success();
  267. }
  268. auto DirRef::OpenDir(const std::filesystem::path& path,
  269. CreationOptions creation_options, ModeType creation_mode,
  270. OpenFlags open_flags) -> ErrorOr<Dir, PathError> {
  271. // If we potentially need to create a directory, we have to do that
  272. // separately as no systems support `O_CREAT | O_DIRECTORY`, even though
  273. // that would be (much) nicer.
  274. if (creation_options == CreateNew) {
  275. // If we are required to be the one that created the directory, disable
  276. // following the last symlink when we open that directory. The last symlink
  277. // is the only one that matters for security here because it is only valid
  278. // to create the last component. It is that directory component that we want
  279. // to ensure has not been replaced with a symlink by an adversarial
  280. // concurrent process.
  281. open_flags |= OpenFlags::NoFollow;
  282. }
  283. if (creation_options != OpenExisting) {
  284. CARBON_CHECK(creation_options != CreateAlways,
  285. "Invalid `creation_options` value of `CreateAlways`: there is "
  286. "no support for truncating directories, and so they cannot be "
  287. "created in an analogous way to files if they already exist.");
  288. if (mkdirat(dfd_, path.c_str(), creation_mode) != 0) {
  289. // Unless the error is just that the path already exists, and that is
  290. // allowed for the requested creation flags, report any error here as part
  291. // of opening just like we would if the error originated from `openat`
  292. // with `O_CREAT`.
  293. if (creation_options == CreateNew || errno != EEXIST) {
  294. return PathError(errno,
  295. "Calling `mkdirat` on '{0}' relative to '{1}' during "
  296. "DirRef::OpenDir",
  297. path, dfd_);
  298. }
  299. }
  300. }
  301. // Open this path as a directory. Note that this has to succeed, and when we
  302. // created the directory we require the last component to not be a symlink in
  303. // case it was _replaced_ with a symlink while running.
  304. int result_fd =
  305. openat(dfd_, path.c_str(), static_cast<int>(open_flags) | O_DIRECTORY);
  306. if (result_fd == -1) {
  307. // No need for `EINTR` handling here as if this is a FIFO it would be an
  308. // error with `O_DIRECTORY`.
  309. return PathError(
  310. errno,
  311. "Calling `openat` on '{0}' relative to '{1}' during DirRef::OpenDir",
  312. path, dfd_);
  313. }
  314. Dir result(result_fd);
  315. // If we were required to create the directory, we also need to verify that
  316. // the opened file descriptor continues to have the same permissions and the
  317. // correct owner as we couldn't do the creation atomically with the open. This
  318. // defends against an adversarial removal of the created directory and
  319. // creation of a new directory with the same name but either with wider
  320. // permissions such as all-write, or with a different owner.
  321. //
  322. // We don't defend against replacement with a directory of the same name, same
  323. // permissions, same owner, but different group. There is no good way to do
  324. // this defense given the complexity of group assignment, and there appears to
  325. // be no need. Achieving such a replacement without superuser power would
  326. // require a parent directory with `setgid` bit, and a group that gives the
  327. // attacker access -- but such a parent directory would make *any* creation
  328. // vulnerable without any need for a replacement, so we can't defend against
  329. // that here. The caller has ample tools to defend against this including
  330. // taking care with the parent directory and restricting the group permission
  331. // bits which we *do* verify.
  332. if (creation_options == CreateNew) {
  333. auto stat_result = result.Stat();
  334. if (!stat_result.ok()) {
  335. // Manually propagate this error so we can attach it back to the opened
  336. // path and relative directory.
  337. return PathError(stat_result.error().unix_errnum(),
  338. "DirRef::Stat after opening '{0}' relative to '{1}'",
  339. path, dfd_);
  340. }
  341. // Check that the owning UID is the current effective UID.
  342. if (stat_result->unix_uid() != geteuid()) {
  343. // Model this as `EPERM`, which is a bit awkward, but should be fine.
  344. return PathError(EPERM,
  345. "Unexpected UID change after creating '{0}' relative to "
  346. "'{1}' during DirRef::OpenDir",
  347. path, dfd_);
  348. }
  349. // Check that the permissions are a subset of the requested ones. They may
  350. // have been masked down by `umask`, but if there are *new* permissions,
  351. // that would be a security issue.
  352. if ((stat_result->permissions() & creation_mode) !=
  353. stat_result->permissions()) {
  354. // Model this with `EPERM` and a custom message.
  355. return PathError(EPERM,
  356. "Unexpected permissions after creating '{0}' relative "
  357. "to '{1}' during DirRef::OpenDir",
  358. path, dfd_);
  359. }
  360. }
  361. return result;
  362. }
  363. auto DirRef::ReadFileToString(const std::filesystem::path& path)
  364. -> ErrorOr<std::string, PathError> {
  365. CARBON_ASSIGN_OR_RETURN(ReadFile f, OpenReadOnly(path));
  366. auto result = f.ReadFileToString();
  367. if (result.ok()) {
  368. return *std::move(result);
  369. }
  370. return PathError(result.error().unix_errnum(),
  371. "Dir::ReadFileToString on '{0}' relative to '{1}'", path,
  372. dfd_);
  373. }
  374. auto DirRef::WriteFileFromString(const std::filesystem::path& path,
  375. llvm::StringRef content,
  376. CreationOptions creation_options)
  377. -> ErrorOr<Success, PathError> {
  378. CARBON_ASSIGN_OR_RETURN(WriteFile f, OpenWriteOnly(path, creation_options));
  379. auto write_result = f.WriteFileFromString(content);
  380. // Immediately close the file as even if there was a write error we don't want
  381. // to leave the file open.
  382. auto close_result = std::move(f).Close();
  383. // Now report the first error encountered or return success.
  384. if (!write_result.ok()) {
  385. return PathError(
  386. write_result.error().unix_errnum(),
  387. "Write error in Dir::WriteFileFromString on '{0}' relative to '{1}'",
  388. path, dfd_);
  389. }
  390. if (!close_result.ok()) {
  391. return PathError(
  392. close_result.error().unix_errnum(),
  393. "Close error in Dir::WriteFileFromString on '{0}' relative to '{1}'",
  394. path, dfd_);
  395. }
  396. return Success();
  397. }
  398. auto DirRef::CreateDirectories(const std::filesystem::path& path,
  399. ModeType creation_mode)
  400. -> ErrorOr<Dir, PathError> {
  401. // Avoid having to handle an empty path by immediately rejecting it as
  402. // invalid.
  403. if (path.empty()) {
  404. return PathError(EINVAL,
  405. "DirRef::CreateDirectories on '{0}' relative to '{1}'",
  406. path, dfd_);
  407. }
  408. // Try directly opening the directory and use that if successful. This is an
  409. // important hot path case of users essentially doing an "open-always" form of
  410. // creating multiple steps of directories.
  411. auto open_result = OpenDir(path, OpenExisting);
  412. if (open_result.ok()) {
  413. return std::move(*open_result);
  414. } else if (!open_result.error().no_entity()) {
  415. return std::move(open_result).error();
  416. }
  417. // Walk from the full path towards this directory (or the root) to find the
  418. // first existing directory. This is faster than walking down as no file
  419. // descriptors have to be allocated for any intervening directories, etc. We
  420. // keep the path components that are missing as we pop them off for easy
  421. // traversal back down.
  422. std::optional<Dir> work_dir;
  423. // Paths typically consist of relatively few components
  424. // and so we can use a bit of stack and avoid allocating and moving the paths
  425. // in common cases. We use `8` as an arbitrary but likely good for all of the
  426. // hottest cases.
  427. llvm::SmallVector<std::filesystem::path, 8> missing_components;
  428. missing_components.push_back(path.filename());
  429. for (std::filesystem::path parent_path = path.parent_path();
  430. !parent_path.empty(); parent_path = parent_path.parent_path()) {
  431. auto open_result = OpenDir(parent_path, OpenExisting);
  432. if (open_result.ok()) {
  433. work_dir = std::move(*open_result);
  434. break;
  435. }
  436. missing_components.push_back(parent_path.filename());
  437. }
  438. CARBON_CHECK(!missing_components.empty());
  439. // If we haven't yet opened an intermediate directory, start by creating one
  440. // relative to this directory. We can't do this as part of the loop below as
  441. // `this` and the newly opened directory have different types.
  442. if (!work_dir) {
  443. std::filesystem::path component = missing_components.pop_back_val();
  444. CARBON_ASSIGN_OR_RETURN(
  445. Dir component_dir,
  446. OpenDir(component, CreationOptions::OpenAlways, creation_mode));
  447. // Move this component into our temporary directory slot.
  448. work_dir = std::move(component_dir);
  449. }
  450. // Now walk through the remaining components opening and creating each
  451. // relative to the previous.
  452. while (!missing_components.empty()) {
  453. std::filesystem::path component = missing_components.pop_back_val();
  454. CARBON_ASSIGN_OR_RETURN(
  455. Dir component_dir,
  456. work_dir->OpenDir(component, CreationOptions::OpenAlways,
  457. creation_mode));
  458. // Close the current temporary directory and move the new component
  459. // directory object into its place.
  460. work_dir = std::move(component_dir);
  461. }
  462. CARBON_CHECK(work_dir,
  463. "Should always have created at least one directory for a "
  464. "non-empty path!");
  465. return std::move(work_dir).value();
  466. }
  467. auto DirRef::Rmtree(const std::filesystem::path& path)
  468. -> ErrorOr<Success, PathError> {
  469. struct DirAndIterator {
  470. DirRef::Reader dir;
  471. ssize_t dir_entry_start;
  472. };
  473. llvm::SmallVector<DirAndIterator> dir_stack;
  474. llvm::SmallVector<std::filesystem::path> dir_entries;
  475. llvm::SmallVector<std::filesystem::path> unknown_entries;
  476. dir_entries.push_back(path);
  477. for (;;) {
  478. // When we bottom out, we're removing the initial tree path and doing so
  479. // relative to `this` directory.
  480. DirRef current = dir_stack.empty() ? *this : dir_stack.back().dir;
  481. ssize_t dir_entry_start =
  482. dir_stack.empty() ? 0 : dir_stack.back().dir_entry_start;
  483. // If we've finished all the child directories of the current entry in the
  484. // stack, pop it off and continue.
  485. if (dir_entry_start == static_cast<ssize_t>(dir_entries.size())) {
  486. dir_stack.pop_back();
  487. continue;
  488. }
  489. CARBON_CHECK(dir_entry_start < static_cast<ssize_t>(dir_entries.size()));
  490. // Take the last entry under the current directory and try removing it.
  491. const std::filesystem::path& entry_path = dir_entries.back();
  492. auto rmdir_result = current.Rmdir(entry_path);
  493. if (rmdir_result.ok() || rmdir_result.error().no_entity()) {
  494. // Removed here or elsewhere already, so pop the entry.
  495. dir_entries.pop_back();
  496. if (dir_entries.empty()) {
  497. // The last entry is the input path with an empty stack, so we've
  498. // finished at this point.
  499. CARBON_CHECK(dir_stack.empty());
  500. return Success();
  501. }
  502. continue;
  503. }
  504. // If we get any error other than not-empty, just return that.
  505. if (!rmdir_result.error().not_empty()) {
  506. return std::move(rmdir_result).error();
  507. }
  508. // Recurse into the subdirectory since it isn't empty, opening it, getting a
  509. // reader, and pushing it onto our stack.
  510. CARBON_ASSIGN_OR_RETURN(Dir subdir, current.OpenDir(entry_path));
  511. auto read_result = std::move(subdir).TakeAndRead();
  512. if (!read_result.ok()) {
  513. return PathError(
  514. read_result.error().unix_errnum(),
  515. "Dir::Read on '{0}' relative to '{1}' during RmdirRecursively",
  516. entry_path, current.dfd_);
  517. }
  518. dir_stack.push_back(
  519. {*std::move(read_result), static_cast<ssize_t>(dir_entries.size())});
  520. // Now read the directory entries. It would be nice to be able to directly
  521. // remove the files and empty directories as we find them when reading, and
  522. // the POSIX spec appears to require that to work, but testing shows at
  523. // least some Linux environments don't work reliably in this case and will
  524. // fail to visit some entries entirely. As a consequence, we walk the entire
  525. // directory and collect the entries into data structures before beginning
  526. // to remove them.
  527. DirRef::Reader& subdir_reader = dir_stack.back().dir;
  528. for (const auto& entry : subdir_reader) {
  529. llvm::StringRef name = entry.name();
  530. if (name == "." || name == "..") {
  531. continue;
  532. }
  533. if (entry.is_known_dir()) {
  534. dir_entries.push_back(name.str());
  535. } else {
  536. // We end up here for entries known to be regular files, other kinds of
  537. // non-directory entries, or when the entry kind isn't known.
  538. //
  539. // Unless we *know* the entry is a directory, we put it into the unknown
  540. // entries. For these, we unlink them first in case they are
  541. // non-directory entries and use the failure of that to move any
  542. // directories that end up here to the directory entries list.
  543. unknown_entries.push_back(name.str());
  544. }
  545. }
  546. // We can immediately try to unlink all the unknown entries, which will
  547. // include any regular files, and use an error on directories that were
  548. // unknown above to switch them to the `dir_entries` list.
  549. while (!unknown_entries.empty()) {
  550. std::filesystem::path name = unknown_entries.pop_back_val();
  551. auto unlink_result = subdir_reader.Unlink(name);
  552. if (unlink_result.ok() || unlink_result.error().no_entity()) {
  553. continue;
  554. } else if (!unlink_result.error().is_dir()) {
  555. return std::move(unlink_result).error();
  556. }
  557. dir_entries.push_back(std::move(name));
  558. }
  559. // We'll handle the directory entries we've queued here in the next
  560. // iteration, removing them or recursing as needed.
  561. }
  562. }
  563. auto DirRef::ReadlinkSlow(const std::filesystem::path& path)
  564. -> ErrorOr<std::string, PathError> {
  565. constexpr ssize_t MinBufferSize =
  566. #ifdef PATH_MAX
  567. PATH_MAX
  568. #else
  569. 1024
  570. #endif
  571. ;
  572. // Read directly into a string to avoid allocating two large buffers.
  573. std::string large_buffer;
  574. // Stat the symlink to get an initial guess at the size.
  575. CARBON_ASSIGN_OR_RETURN(FileStatus status, Lstat(path));
  576. // We try to use the size from the `lstat` unless it is empty, in which case
  577. // we try to use our minimum buffer size which is `PATH_MAX` or a constant
  578. // value. We have a fallback to dynamically discover an adequate buffer size
  579. // below that will handle any inaccuracy.
  580. ssize_t buffer_size = status.size();
  581. if (buffer_size == 0) {
  582. buffer_size = MinBufferSize;
  583. }
  584. large_buffer.resize(status.size());
  585. ssize_t result =
  586. readlinkat(dfd_, path.c_str(), large_buffer.data(), large_buffer.size());
  587. if (result == -1) {
  588. return PathError(errno, "Readlink on '{0}' relative to '{1}'", path, dfd_);
  589. }
  590. // Now the really bad fallback case: if there are racing writes to the
  591. // symlink, the guessed size may not have been large enough. As a last-ditch
  592. // effort, begin doubling (from the next power of two >= our min buffer size)
  593. // the length until it fits. We cap this at 10 MiB to prevent egregious file
  594. // system contents (or some bug somewhere) from exhausting memory.
  595. constexpr ssize_t MaxBufferSize = 10 << 20;
  596. while (result == static_cast<ssize_t>(large_buffer.size())) {
  597. int64_t next_buffer_size = std::max<ssize_t>(
  598. MinBufferSize, llvm::NextPowerOf2(large_buffer.size()));
  599. if (next_buffer_size > MaxBufferSize) {
  600. return PathError(ENOMEM, "Readlink on '{0}' relative to '{1}'", path,
  601. dfd_);
  602. }
  603. large_buffer.resize(next_buffer_size);
  604. result = readlinkat(dfd_, path.c_str(), large_buffer.data(),
  605. large_buffer.size());
  606. if (result == -1) {
  607. return PathError(errno, "Readlink on '{0}' relative to '{1}'", path,
  608. dfd_);
  609. }
  610. }
  611. // Fix-up the size of the string and return it.
  612. large_buffer.resize(result);
  613. return large_buffer;
  614. }
  615. auto MakeTmpDir() -> ErrorOr<RemovingDir, Error> {
  616. std::filesystem::path tmpdir_path = "/tmp";
  617. // We use both `TEST_TMPDIR` and `TMPDIR`. The `TEST_TMPDIR` is set by Bazel
  618. // and preferred to keep tests using the expected output tree rather than
  619. // the system temporary directory.
  620. for (const char* tmpdir_env_name : {"TEST_TMPDIR", "TMPDIR"}) {
  621. const char* tmpdir_env_cstr = getenv(tmpdir_env_name);
  622. if (tmpdir_env_cstr == nullptr) {
  623. continue;
  624. }
  625. std::filesystem::path tmpdir_env = tmpdir_env_cstr;
  626. if (!tmpdir_env.is_absolute()) {
  627. continue;
  628. }
  629. tmpdir_path = std::move(tmpdir_env);
  630. break;
  631. }
  632. std::filesystem::path target = BuildData::BuildTarget.str();
  633. tmpdir_path /= target.filename();
  634. return MakeTmpDirWithPrefix(std::move(tmpdir_path));
  635. }
  636. auto MakeTmpDirWithPrefix(std::filesystem::path prefix)
  637. -> ErrorOr<RemovingDir, Error> {
  638. std::filesystem::path tmpdir_path = std::move(prefix);
  639. tmpdir_path += ".XXXXXX";
  640. std::string tmpdir_path_buffer = tmpdir_path.native();
  641. char* result = mkdtemp(tmpdir_path_buffer.data());
  642. if (result == nullptr) {
  643. RawStringOstream os;
  644. os << llvm::formatv("Calling mkdtemp on '{0}' failed: ",
  645. tmpdir_path.native());
  646. PrintErrorNumber(os, errno);
  647. return Error(os.TakeStr());
  648. }
  649. CARBON_CHECK(result == tmpdir_path_buffer.data(),
  650. "`mkdtemp` used a modified path");
  651. tmpdir_path = std::move(tmpdir_path_buffer);
  652. // Because `mkdtemp` doesn't return an open directory atomically, open the
  653. // created directory and perform safety checks similar to `OpenDir` when
  654. // creating a new directory.
  655. CARBON_ASSIGN_OR_RETURN(
  656. Dir tmp, Cwd().OpenDir(tmpdir_path, OpenExisting, /*creation_mode=*/0,
  657. OpenFlags::NoFollow));
  658. // Make sure we try to remove the directory from here on out.
  659. RemovingDir result_dir(std::move(tmp), tmpdir_path);
  660. // It's a bit awkward to report `fstat` errors as `Error`s, but we
  661. // don't have much choice. The stat failing here would be very weird.
  662. CARBON_ASSIGN_OR_RETURN(FileStatus stat, result_dir.Stat());
  663. // The permissions must be exactly 0700 for a temporary directory, and the UID
  664. // should be ours.
  665. if (stat.permissions() != 0700 && stat.unix_uid() != geteuid()) {
  666. return Error(
  667. llvm::formatv("Found incorrect permissions or UID on tmpdir '{0}'",
  668. tmpdir_path.native())
  669. .str());
  670. }
  671. return result_dir;
  672. }
  673. } // namespace Carbon::Filesystem