filesystem.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "common/filesystem.h"
  5. #include <fcntl.h>
  6. #include <time.h>
  7. #include <unistd.h>
  8. #include <chrono>
  9. #include "common/build_data.h"
  10. #include "llvm/Support/MathExtras.h"
  11. namespace Carbon::Filesystem {
  12. // Render an error number from `errno` to the provided stream using the richest
  13. // rendering available on the platform.
  14. static auto PrintErrorNumber(llvm::raw_ostream& out, int errnum) -> void {
  15. #if defined(_GNU_SOURCE) && \
  16. (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 32))
  17. // For sufficiently recent glibc versions, use GNU-specific routines to
  18. // compute the error name and description.
  19. llvm::StringRef name = strerrordesc_np(errnum);
  20. llvm::StringRef desc = strerrorname_np(errnum);
  21. out << llvm::formatv("{0}: {1}", name, desc);
  22. #elif defined(__APPLE__) || defined(_GNU_SOURCE) || defined(_POSIX_SOURCE)
  23. // Broadly portable fallback for Unix-like systems.
  24. char buffer[4096];
  25. #ifdef _GNU_SOURCE
  26. const char* str = strerror_r(errnum, buffer, sizeof(buffer));
  27. // The GNU version doesn't report a meta-error.
  28. int meta_error = 0;
  29. #else
  30. int meta_error = strerror_r(errnum, buffer, sizeof(buffer));
  31. const char* str = buffer;
  32. #endif
  33. if (meta_error == 0) {
  34. out << llvm::formatv("errno {0}: {1}", errnum, llvm::StringRef(str));
  35. } else {
  36. out << llvm::formatv(
  37. "error number {0}; encountered meta-error number {1} while rendering "
  38. "an error message",
  39. errnum, meta_error);
  40. }
  41. #else
  42. #error TODO: Implement this for other platforms.
  43. #endif
  44. }
  45. auto FdError::Print(llvm::raw_ostream& out) const -> void {
  46. // The `format_` member is a `StringLiteral` that is null terminated, so
  47. // `.data()` is safe here.
  48. // NOLINTNEXTLINE(bugprone-suspicious-stringview-data-usage)
  49. out << llvm::formatv(format_.data(), fd_) << " failed: ";
  50. PrintErrorNumber(out, unix_errnum());
  51. }
  52. auto PathError::Print(llvm::raw_ostream& out) const -> void {
  53. // The `format_` member is a `StringLiteral` that is null terminated, so
  54. // `.data()` is safe here.
  55. // NOLINTNEXTLINE(bugprone-suspicious-stringview-data-usage)
  56. out << llvm::formatv(format_.data(), path_,
  57. dir_fd_ == AT_FDCWD ? std::string("AT_FDCWD")
  58. : std::to_string(dir_fd_))
  59. << " failed: ";
  60. PrintErrorNumber(out, unix_errnum());
  61. }
  62. auto Internal::FileRefBase::ReadFileToString()
  63. -> ErrorOr<std::string, FdError> {
  64. std::string result;
  65. // Read a buffer at a time until we reach the end. We use the pipe buffer
  66. // length as our max buffer size as it is likely to be small but reasonable
  67. // for the OS, and in the case of pipes the same chunking in which the data
  68. // will arrive.
  69. //
  70. // TODO: Replace this with a smaller buffer and using `resize_and_overwrite`
  71. // to read into the string in-place for larger strings. Unclear if that will
  72. // be any faster, but it will be much more friendly to callers with
  73. // constrained stack sizes and use less memory overall.
  74. std::byte buffer[PIPE_BUF];
  75. CARBON_RETURN_IF_ERROR(SeekFromBeginning(0));
  76. for (;;) {
  77. auto read_result = ReadToBuffer(buffer);
  78. if (!read_result.ok()) {
  79. return std::move(read_result).error();
  80. }
  81. if (read_result->empty()) {
  82. // EOF
  83. break;
  84. }
  85. result.append(reinterpret_cast<const char*>(read_result->data()),
  86. read_result->size());
  87. }
  88. return result;
  89. }
  90. auto Internal::FileRefBase::WriteFileFromString(llvm::StringRef str)
  91. -> ErrorOr<Success, FdError> {
  92. CARBON_RETURN_IF_ERROR(SeekFromBeginning(0));
  93. auto bytes = llvm::ArrayRef<std::byte>(
  94. reinterpret_cast<const std::byte*>(str.data()), str.size());
  95. while (!bytes.empty()) {
  96. auto write_result = WriteFromBuffer(bytes);
  97. if (!write_result.ok()) {
  98. return std::move(write_result).error();
  99. }
  100. bytes = *write_result;
  101. }
  102. CARBON_RETURN_IF_ERROR(Truncate(str.size()));
  103. return Success();
  104. }
  105. // A macOS specific sleep routine that builds on more standard utilities. This
  106. // is technically a portable implementation so we always compile it but only use
  107. // it on macOS where the more efficient direct use of `clock_nanosleep` isn't
  108. // available.
  109. [[maybe_unused]]
  110. static auto SleepMacos(Duration sleep) -> void {
  111. TimePoint stop = Clock::now() + sleep;
  112. timespec sleep_ts = Internal::DurationToTimespec(sleep);
  113. for (;;) {
  114. timespec rem_sleep_ts = {};
  115. int result = nanosleep(&sleep_ts, &rem_sleep_ts);
  116. if (result == 0) {
  117. return;
  118. }
  119. // Continue sleeping if we get interrupted by a resumable signal. For
  120. // everything else report it.
  121. if (errno != EINTR) {
  122. int errnum = errno;
  123. RawStringOstream error_os;
  124. PrintErrorNumber(error_os, errnum);
  125. CARBON_FATAL("Unexpected error while sleeping: {0}", error_os.TakeStr());
  126. }
  127. // Update to the remaining sleep time for the next attempt at sleeping.
  128. sleep_ts = rem_sleep_ts;
  129. // Also check if the clock has passed our stop time as a fallback to avoid
  130. // too much clock skew.
  131. if (Clock::now() > stop) {
  132. return;
  133. }
  134. }
  135. }
  136. static auto Sleep(Duration sleep) -> void {
  137. // For every platform but macOS we can sleep directly on an absolute time.
  138. #if __APPLE__
  139. // On Apple platforms, dispatch to a specialized routine.
  140. SleepMacos(sleep);
  141. #else
  142. // We use `clock_gettime` instead of the filesystem `Clock` or some other
  143. // `std::chrono` clock because we want to use the exact same clock that we'll
  144. // use for sleeping below, and we'll need the time in a `timespec` for that
  145. // call anyways. We do use a monotonic clock to try and avoid sleeps being
  146. // interrupted by clock changes.
  147. timespec ts = {};
  148. int result = clock_gettime(CLOCK_MONOTONIC, &ts);
  149. CARBON_CHECK(result == 0, "Error getting the time: {0}", strerror(errno));
  150. // Now convert the timespec to a duration that we can safely do arithmetic on.
  151. // Since the sleep interval is in nanoseconds it is tempting to directly do
  152. // arithmetic here, but this has a subtle pitfall near the boundary between
  153. // the nanosecond component and the second component.
  154. //
  155. // Note that our `Duration` uses `__int128` to avoid worrying about running
  156. // out of precision to represent the final deadline.
  157. Duration stop_time = std::chrono::seconds(ts.tv_sec);
  158. stop_time += std::chrono::nanoseconds(ts.tv_nsec);
  159. stop_time += sleep;
  160. // Now convert back to timespec.
  161. ts = Internal::DurationToTimespec(stop_time);
  162. do {
  163. result = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, nullptr);
  164. // Continue sleeping if we get interrupted by a resumable signal. Because
  165. // we're using a monotonic clock and an absolute deadline time we will
  166. // eventually progress past that deadline.
  167. } while (result != 0 && (errno == EINTR));
  168. if (result != 0) {
  169. int errnum = errno;
  170. RawStringOstream error_os;
  171. PrintErrorNumber(error_os, errnum);
  172. CARBON_FATAL("Unexpected error while sleeping: {0}", error_os.TakeStr());
  173. }
  174. #endif
  175. }
  176. auto Internal::FileRefBase::TryLock(FileLock::Kind kind, Duration deadline,
  177. Duration poll_interval)
  178. -> ErrorOr<FileLock, FdError> {
  179. CARBON_CHECK(poll_interval <= deadline);
  180. if (deadline != Duration(0) && poll_interval == Duration(0)) {
  181. // If the caller didn't provide a poll interval but did provide a deadline,
  182. // pick a poll interval to roughly be 1/1000th of the deadline but at least
  183. // 1 microsecond. We don't support polling faster than 1 microsecond given
  184. // how expensive file locking is.
  185. poll_interval =
  186. std::max(Duration(std::chrono::microseconds(1)), deadline / 1000);
  187. }
  188. if (deadline != Duration(0)) {
  189. CARBON_CHECK(
  190. deadline >= std::chrono::microseconds(10),
  191. "A deadline for a file lock shorter than 10 microseconds is not "
  192. "supported, callers can implement their own polling logic.");
  193. CARBON_CHECK(poll_interval >= std::chrono::microseconds(1),
  194. "Polling for a file lock faster than every microsecond is not "
  195. "supported, callers can implement their own polling logic.");
  196. }
  197. auto stop = Clock::now() + deadline;
  198. for (;;) {
  199. int result = flock(fd_, static_cast<int>(kind) | LOCK_NB);
  200. if (result == 0) {
  201. return FileLock(fd_);
  202. }
  203. // Return an error if this is something other than blocking for the lock to
  204. // be available, or we didn't get a deadline for continuing to try and
  205. // acquire the lock, or we've reached our deadline.
  206. if (errno != EWOULDBLOCK || deadline == Duration(0) ||
  207. Clock::now() >= stop) {
  208. return FdError(errno, "File::TryLock on '{0}'", fd_);
  209. }
  210. // The caller requested attempting to wait up to a deadline to acquire the
  211. // lock with a specific poll interval. Try to sleep for that poll interval
  212. // before trying the lock again.
  213. Sleep(poll_interval);
  214. }
  215. }
  216. auto DirRef::AppendEntriesIf(
  217. llvm::SmallVectorImpl<std::filesystem::path>& entries,
  218. llvm::function_ref<auto(llvm::StringRef name)->bool> predicate)
  219. -> ErrorOr<Success, FdError> {
  220. CARBON_ASSIGN_OR_RETURN(Reader reader, Read());
  221. for (const Entry& entry : reader) {
  222. llvm::StringRef name = entry.name();
  223. if (name == "." || name == "..") {
  224. continue;
  225. }
  226. if (predicate && !predicate(name)) {
  227. continue;
  228. }
  229. entries.push_back(name.str());
  230. }
  231. return Success();
  232. }
  233. auto DirRef::AppendEntriesIf(
  234. llvm::SmallVectorImpl<std::filesystem::path>& dir_entries,
  235. llvm::SmallVectorImpl<std::filesystem::path>& non_dir_entries,
  236. llvm::function_ref<auto(llvm::StringRef name)->bool> predicate)
  237. -> ErrorOr<Success, FdError> {
  238. CARBON_ASSIGN_OR_RETURN(Reader reader, Read());
  239. for (const Entry& entry : reader) {
  240. llvm::StringRef name = entry.name();
  241. if (name == "." || name == "..") {
  242. continue;
  243. }
  244. if (predicate && !predicate(name)) {
  245. continue;
  246. }
  247. std::filesystem::path name_path = name.str();
  248. if (entry.is_known_dir()) {
  249. dir_entries.push_back(std::move(name_path));
  250. continue;
  251. }
  252. if (!entry.is_unknown_type()) {
  253. non_dir_entries.push_back(std::move(name_path));
  254. continue;
  255. }
  256. auto stat_result = Lstat(name_path);
  257. if (!stat_result.ok()) {
  258. return FdError(stat_result.error().unix_errnum(),
  259. "Dir::AppendEntriesIf on '{0}' failed while stat-ing "
  260. "entries to determine which are directories",
  261. dfd_);
  262. }
  263. if (stat_result->is_dir()) {
  264. dir_entries.push_back(std::move(name_path));
  265. } else {
  266. non_dir_entries.push_back(std::move(name_path));
  267. }
  268. }
  269. return Success();
  270. }
  271. auto DirRef::OpenDir(const std::filesystem::path& path,
  272. CreationOptions creation_options, ModeType creation_mode,
  273. OpenFlags open_flags) -> ErrorOr<Dir, PathError> {
  274. // If we potentially need to create a directory, we have to do that
  275. // separately as no systems support `O_CREAT | O_DIRECTORY`, even though
  276. // that would be (much) nicer.
  277. if (creation_options == CreateNew) {
  278. // If we are required to be the one that created the directory, disable
  279. // following the last symlink when we open that directory. The last symlink
  280. // is the only one that matters for security here because it is only valid
  281. // to create the last component. It is that directory component that we want
  282. // to ensure has not been replaced with a symlink by an adversarial
  283. // concurrent process.
  284. open_flags |= OpenFlags::NoFollow;
  285. }
  286. if (creation_options != OpenExisting) {
  287. CARBON_CHECK(creation_options != CreateAlways,
  288. "Invalid `creation_options` value of `CreateAlways`: there is "
  289. "no support for truncating directories, and so they cannot be "
  290. "created in an analogous way to files if they already exist.");
  291. if (mkdirat(dfd_, path.c_str(), creation_mode) != 0) {
  292. // Unless the error is just that the path already exists, and that is
  293. // allowed for the requested creation flags, report any error here as part
  294. // of opening just like we would if the error originated from `openat`
  295. // with `O_CREAT`.
  296. if (creation_options == CreateNew || errno != EEXIST) {
  297. return PathError(errno,
  298. "Calling `mkdirat` on '{0}' relative to '{1}' during "
  299. "DirRef::OpenDir",
  300. path, dfd_);
  301. }
  302. }
  303. }
  304. // Open this path as a directory. Note that this has to succeed, and when we
  305. // created the directory we require the last component to not be a symlink in
  306. // case it was _replaced_ with a symlink while running.
  307. int result_fd =
  308. openat(dfd_, path.c_str(), static_cast<int>(open_flags) | O_DIRECTORY);
  309. if (result_fd == -1) {
  310. // No need for `EINTR` handling here as if this is a FIFO it would be an
  311. // error with `O_DIRECTORY`.
  312. return PathError(
  313. errno,
  314. "Calling `openat` on '{0}' relative to '{1}' during DirRef::OpenDir",
  315. path, dfd_);
  316. }
  317. Dir result(result_fd);
  318. // If we were required to create the directory, we also need to verify that
  319. // the opened file descriptor continues to have the same permissions and the
  320. // correct owner as we couldn't do the creation atomically with the open. This
  321. // defends against an adversarial removal of the created directory and
  322. // creation of a new directory with the same name but either with wider
  323. // permissions such as all-write, or with a different owner.
  324. //
  325. // We don't defend against replacement with a directory of the same name, same
  326. // permissions, same owner, but different group. There is no good way to do
  327. // this defense given the complexity of group assignment, and there appears to
  328. // be no need. Achieving such a replacement without superuser power would
  329. // require a parent directory with `setgid` bit, and a group that gives the
  330. // attacker access -- but such a parent directory would make *any* creation
  331. // vulnerable without any need for a replacement, so we can't defend against
  332. // that here. The caller has ample tools to defend against this including
  333. // taking care with the parent directory and restricting the group permission
  334. // bits which we *do* verify.
  335. if (creation_options == CreateNew) {
  336. auto stat_result = result.Stat();
  337. if (!stat_result.ok()) {
  338. // Manually propagate this error so we can attach it back to the opened
  339. // path and relative directory.
  340. return PathError(stat_result.error().unix_errnum(),
  341. "DirRef::Stat after opening '{0}' relative to '{1}'",
  342. path, dfd_);
  343. }
  344. // Check that the owning UID is the current effective UID.
  345. if (stat_result->unix_uid() != geteuid()) {
  346. // Model this as `EPERM`, which is a bit awkward, but should be fine.
  347. return PathError(EPERM,
  348. "Unexpected UID change after creating '{0}' relative to "
  349. "'{1}' during DirRef::OpenDir",
  350. path, dfd_);
  351. }
  352. // Check that the permissions are a subset of the requested ones. They may
  353. // have been masked down by `umask`, but if there are *new* permissions,
  354. // that would be a security issue.
  355. if ((stat_result->permissions() & creation_mode) !=
  356. stat_result->permissions()) {
  357. // Model this with `EPERM` and a custom message.
  358. return PathError(EPERM,
  359. "Unexpected permissions after creating '{0}' relative "
  360. "to '{1}' during DirRef::OpenDir",
  361. path, dfd_);
  362. }
  363. }
  364. return result;
  365. }
  366. auto DirRef::ReadFileToString(const std::filesystem::path& path)
  367. -> ErrorOr<std::string, PathError> {
  368. CARBON_ASSIGN_OR_RETURN(ReadFile f, OpenReadOnly(path));
  369. auto result = f.ReadFileToString();
  370. if (result.ok()) {
  371. return *std::move(result);
  372. }
  373. return PathError(result.error().unix_errnum(),
  374. "Dir::ReadFileToString on '{0}' relative to '{1}'", path,
  375. dfd_);
  376. }
  377. auto DirRef::WriteFileFromString(const std::filesystem::path& path,
  378. llvm::StringRef content,
  379. CreationOptions creation_options)
  380. -> ErrorOr<Success, PathError> {
  381. CARBON_ASSIGN_OR_RETURN(WriteFile f, OpenWriteOnly(path, creation_options));
  382. auto write_result = f.WriteFileFromString(content);
  383. // Immediately close the file as even if there was a write error we don't want
  384. // to leave the file open.
  385. auto close_result = std::move(f).Close();
  386. // Now report the first error encountered or return success.
  387. if (!write_result.ok()) {
  388. return PathError(
  389. write_result.error().unix_errnum(),
  390. "Write error in Dir::WriteFileFromString on '{0}' relative to '{1}'",
  391. path, dfd_);
  392. }
  393. if (!close_result.ok()) {
  394. return PathError(
  395. close_result.error().unix_errnum(),
  396. "Close error in Dir::WriteFileFromString on '{0}' relative to '{1}'",
  397. path, dfd_);
  398. }
  399. return Success();
  400. }
  401. auto DirRef::CreateDirectories(const std::filesystem::path& path,
  402. ModeType creation_mode)
  403. -> ErrorOr<Dir, PathError> {
  404. // Avoid having to handle an empty path by immediately rejecting it as
  405. // invalid.
  406. if (path.empty()) {
  407. return PathError(EINVAL,
  408. "DirRef::CreateDirectories on '{0}' relative to '{1}'",
  409. path, dfd_);
  410. }
  411. // Try directly opening the directory and use that if successful. This is an
  412. // important hot path case of users essentially doing an "open-always" form of
  413. // creating multiple steps of directories.
  414. auto open_result = OpenDir(path, OpenExisting);
  415. if (open_result.ok()) {
  416. return std::move(*open_result);
  417. } else if (!open_result.error().no_entity()) {
  418. return std::move(open_result).error();
  419. }
  420. // Walk from the full path towards this directory (or the root) to find the
  421. // first existing directory. This is faster than walking down as no file
  422. // descriptors have to be allocated for any intervening directories, etc. We
  423. // keep the path components that are missing as we pop them off for easy
  424. // traversal back down.
  425. std::optional<Dir> work_dir;
  426. // Paths typically consist of relatively few components
  427. // and so we can use a bit of stack and avoid allocating and moving the paths
  428. // in common cases. We use `8` as an arbitrary but likely good for all of the
  429. // hottest cases.
  430. llvm::SmallVector<std::filesystem::path, 8> missing_components;
  431. missing_components.push_back(path.filename());
  432. for (std::filesystem::path parent_path = path.parent_path();
  433. !parent_path.empty(); parent_path = parent_path.parent_path()) {
  434. auto open_result = OpenDir(parent_path, OpenExisting);
  435. if (open_result.ok()) {
  436. work_dir = std::move(*open_result);
  437. break;
  438. }
  439. missing_components.push_back(parent_path.filename());
  440. }
  441. CARBON_CHECK(!missing_components.empty());
  442. // If we haven't yet opened an intermediate directory, start by creating one
  443. // relative to this directory. We can't do this as part of the loop below as
  444. // `this` and the newly opened directory have different types.
  445. if (!work_dir) {
  446. std::filesystem::path component = missing_components.pop_back_val();
  447. CARBON_ASSIGN_OR_RETURN(
  448. Dir component_dir,
  449. OpenDir(component, CreationOptions::OpenAlways, creation_mode));
  450. // Move this component into our temporary directory slot.
  451. work_dir = std::move(component_dir);
  452. }
  453. // Now walk through the remaining components opening and creating each
  454. // relative to the previous.
  455. while (!missing_components.empty()) {
  456. std::filesystem::path component = missing_components.pop_back_val();
  457. CARBON_ASSIGN_OR_RETURN(
  458. Dir component_dir,
  459. work_dir->OpenDir(component, CreationOptions::OpenAlways,
  460. creation_mode));
  461. // Close the current temporary directory and move the new component
  462. // directory object into its place.
  463. work_dir = std::move(component_dir);
  464. }
  465. CARBON_CHECK(work_dir,
  466. "Should always have created at least one directory for a "
  467. "non-empty path!");
  468. return std::move(work_dir).value();
  469. }
  470. auto DirRef::Rmtree(const std::filesystem::path& path)
  471. -> ErrorOr<Success, PathError> {
  472. struct DirAndIterator {
  473. DirRef::Reader dir;
  474. ssize_t dir_entry_start;
  475. };
  476. llvm::SmallVector<DirAndIterator> dir_stack;
  477. llvm::SmallVector<std::filesystem::path> dir_entries;
  478. llvm::SmallVector<std::filesystem::path> unknown_entries;
  479. dir_entries.push_back(path);
  480. for (;;) {
  481. // When we bottom out, we're removing the initial tree path and doing so
  482. // relative to `this` directory.
  483. DirRef current = dir_stack.empty() ? *this : dir_stack.back().dir;
  484. ssize_t dir_entry_start =
  485. dir_stack.empty() ? 0 : dir_stack.back().dir_entry_start;
  486. // If we've finished all the child directories of the current entry in the
  487. // stack, pop it off and continue.
  488. if (dir_entry_start == static_cast<ssize_t>(dir_entries.size())) {
  489. dir_stack.pop_back();
  490. continue;
  491. }
  492. CARBON_CHECK(dir_entry_start < static_cast<ssize_t>(dir_entries.size()));
  493. // Take the last entry under the current directory and try removing it.
  494. const std::filesystem::path& entry_path = dir_entries.back();
  495. auto rmdir_result = current.Rmdir(entry_path);
  496. if (rmdir_result.ok() || rmdir_result.error().no_entity()) {
  497. // Removed here or elsewhere already, so pop the entry.
  498. dir_entries.pop_back();
  499. if (dir_entries.empty()) {
  500. // The last entry is the input path with an empty stack, so we've
  501. // finished at this point.
  502. CARBON_CHECK(dir_stack.empty());
  503. return Success();
  504. }
  505. continue;
  506. }
  507. // If we get any error other than not-empty, just return that.
  508. if (!rmdir_result.error().not_empty()) {
  509. return std::move(rmdir_result).error();
  510. }
  511. // Recurse into the subdirectory since it isn't empty, opening it, getting a
  512. // reader, and pushing it onto our stack.
  513. CARBON_ASSIGN_OR_RETURN(Dir subdir, current.OpenDir(entry_path));
  514. auto read_result = std::move(subdir).TakeAndRead();
  515. if (!read_result.ok()) {
  516. return PathError(
  517. read_result.error().unix_errnum(),
  518. "Dir::Read on '{0}' relative to '{1}' during RmdirRecursively",
  519. entry_path, current.dfd_);
  520. }
  521. dir_stack.push_back(
  522. {*std::move(read_result), static_cast<ssize_t>(dir_entries.size())});
  523. // Now read the directory entries. It would be nice to be able to directly
  524. // remove the files and empty directories as we find them when reading, and
  525. // the POSIX spec appears to require that to work, but testing shows at
  526. // least some Linux environments don't work reliably in this case and will
  527. // fail to visit some entries entirely. As a consequence, we walk the entire
  528. // directory and collect the entries into data structures before beginning
  529. // to remove them.
  530. DirRef::Reader& subdir_reader = dir_stack.back().dir;
  531. for (const auto& entry : subdir_reader) {
  532. llvm::StringRef name = entry.name();
  533. if (name == "." || name == "..") {
  534. continue;
  535. }
  536. if (entry.is_known_dir()) {
  537. dir_entries.push_back(name.str());
  538. } else {
  539. // We end up here for entries known to be regular files, other kinds of
  540. // non-directory entries, or when the entry kind isn't known.
  541. //
  542. // Unless we *know* the entry is a directory, we put it into the unknown
  543. // entries. For these, we unlink them first in case they are
  544. // non-directory entries and use the failure of that to move any
  545. // directories that end up here to the directory entries list.
  546. unknown_entries.push_back(name.str());
  547. }
  548. }
  549. // We can immediately try to unlink all the unknown entries, which will
  550. // include any regular files, and use an error on directories that were
  551. // unknown above to switch them to the `dir_entries` list.
  552. while (!unknown_entries.empty()) {
  553. std::filesystem::path name = unknown_entries.pop_back_val();
  554. auto unlink_result = subdir_reader.Unlink(name);
  555. if (unlink_result.ok() || unlink_result.error().no_entity()) {
  556. continue;
  557. } else if (!unlink_result.error().is_dir()) {
  558. return std::move(unlink_result).error();
  559. }
  560. dir_entries.push_back(std::move(name));
  561. }
  562. // We'll handle the directory entries we've queued here in the next
  563. // iteration, removing them or recursing as needed.
  564. }
  565. }
  566. auto DirRef::ReadlinkSlow(const std::filesystem::path& path)
  567. -> ErrorOr<std::string, PathError> {
  568. constexpr ssize_t MinBufferSize =
  569. #ifdef PATH_MAX
  570. PATH_MAX
  571. #else
  572. 1024
  573. #endif
  574. ;
  575. // Read directly into a string to avoid allocating two large buffers.
  576. std::string large_buffer;
  577. // Stat the symlink to get an initial guess at the size.
  578. CARBON_ASSIGN_OR_RETURN(FileStatus status, Lstat(path));
  579. // We try to use the size from the `lstat` unless it is empty, in which case
  580. // we try to use our minimum buffer size which is `PATH_MAX` or a constant
  581. // value. We have a fallback to dynamically discover an adequate buffer size
  582. // below that will handle any inaccuracy.
  583. ssize_t buffer_size = status.size();
  584. if (buffer_size == 0) {
  585. buffer_size = MinBufferSize;
  586. }
  587. large_buffer.resize(status.size());
  588. ssize_t result =
  589. readlinkat(dfd_, path.c_str(), large_buffer.data(), large_buffer.size());
  590. if (result == -1) {
  591. return PathError(errno, "Readlink on '{0}' relative to '{1}'", path, dfd_);
  592. }
  593. // Now the really bad fallback case: if there are racing writes to the
  594. // symlink, the guessed size may not have been large enough. As a last-ditch
  595. // effort, begin doubling (from the next power of two >= our min buffer size)
  596. // the length until it fits. We cap this at 10 MiB to prevent egregious file
  597. // system contents (or some bug somewhere) from exhausting memory.
  598. constexpr ssize_t MaxBufferSize = 10 << 20;
  599. while (result == static_cast<ssize_t>(large_buffer.size())) {
  600. int64_t next_buffer_size = std::max<ssize_t>(
  601. MinBufferSize, llvm::NextPowerOf2(large_buffer.size()));
  602. if (next_buffer_size > MaxBufferSize) {
  603. return PathError(ENOMEM, "Readlink on '{0}' relative to '{1}'", path,
  604. dfd_);
  605. }
  606. large_buffer.resize(next_buffer_size);
  607. result = readlinkat(dfd_, path.c_str(), large_buffer.data(),
  608. large_buffer.size());
  609. if (result == -1) {
  610. return PathError(errno, "Readlink on '{0}' relative to '{1}'", path,
  611. dfd_);
  612. }
  613. }
  614. // Fix-up the size of the string and return it.
  615. large_buffer.resize(result);
  616. return large_buffer;
  617. }
  618. auto MakeTmpDir() -> ErrorOr<RemovingDir, Error> {
  619. std::filesystem::path tmpdir_path = "/tmp";
  620. // We use both `TEST_TMPDIR` and `TMPDIR`. The `TEST_TMPDIR` is set by Bazel
  621. // and preferred to keep tests using the expected output tree rather than
  622. // the system temporary directory.
  623. for (const char* tmpdir_env_name : {"TEST_TMPDIR", "TMPDIR"}) {
  624. const char* tmpdir_env_cstr = getenv(tmpdir_env_name);
  625. if (tmpdir_env_cstr == nullptr) {
  626. continue;
  627. }
  628. std::filesystem::path tmpdir_env = tmpdir_env_cstr;
  629. tmpdir_path = std::move(tmpdir_env);
  630. break;
  631. }
  632. std::filesystem::path target = BuildData::BuildTarget.str();
  633. tmpdir_path /= target.filename();
  634. return MakeTmpDirWithPrefix(std::move(tmpdir_path));
  635. }
  636. auto MakeTmpDirWithPrefix(std::filesystem::path prefix)
  637. -> ErrorOr<RemovingDir, Error> {
  638. std::filesystem::path tmpdir_path = std::move(prefix);
  639. tmpdir_path += ".XXXXXX";
  640. std::string tmpdir_path_buffer = tmpdir_path.native();
  641. char* result = mkdtemp(tmpdir_path_buffer.data());
  642. if (result == nullptr) {
  643. RawStringOstream os;
  644. os << llvm::formatv("Calling mkdtemp on '{0}' failed: ",
  645. tmpdir_path.native());
  646. PrintErrorNumber(os, errno);
  647. return Error(os.TakeStr());
  648. }
  649. CARBON_CHECK(result == tmpdir_path_buffer.data(),
  650. "`mkdtemp` used a modified path");
  651. tmpdir_path = std::move(tmpdir_path_buffer);
  652. // Because `mkdtemp` doesn't return an open directory atomically, open the
  653. // created directory and perform safety checks similar to `OpenDir` when
  654. // creating a new directory.
  655. CARBON_ASSIGN_OR_RETURN(
  656. Dir tmp, Cwd().OpenDir(tmpdir_path, OpenExisting, /*creation_mode=*/0,
  657. OpenFlags::NoFollow));
  658. // Make sure we try to remove the directory from here on out.
  659. RemovingDir result_dir(std::move(tmp), tmpdir_path);
  660. // It's a bit awkward to report `fstat` errors as `Error`s, but we
  661. // don't have much choice. The stat failing here would be very weird.
  662. CARBON_ASSIGN_OR_RETURN(FileStatus stat, result_dir.Stat());
  663. // The permissions must be exactly 0700 for a temporary directory, and the UID
  664. // should be ours.
  665. if (stat.permissions() != 0700 && stat.unix_uid() != geteuid()) {
  666. return Error(
  667. llvm::formatv("Found incorrect permissions or UID on tmpdir '{0}'",
  668. tmpdir_path.native())
  669. .str());
  670. }
  671. return result_dir;
  672. }
  673. } // namespace Carbon::Filesystem