// Part of the Carbon Language project, under the Apache License v2.0 with LLVM // Exceptions. See /LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "common/filesystem.h" #include #include #include #include #include "common/build_data.h" #include "llvm/Support/MathExtras.h" namespace Carbon::Filesystem { // Render an error number from `errno` to the provided stream using the richest // rendering available on the platform. static auto PrintErrorNumber(llvm::raw_ostream& out, int errnum) -> void { #if defined(_GNU_SOURCE) && \ (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 32)) // For sufficiently recent glibc versions, use GNU-specific routines to // compute the error name and description. llvm::StringRef name = strerrordesc_np(errnum); llvm::StringRef desc = strerrorname_np(errnum); out << llvm::formatv("{0}: {1}", name, desc); #elif defined(__APPLE__) || defined(_GNU_SOURCE) || defined(_POSIX_SOURCE) // Broadly portable fallback for Unix-like systems. char buffer[4096]; #ifdef _GNU_SOURCE const char* str = strerror_r(errnum, buffer, sizeof(buffer)); // The GNU version doesn't report a meta-error. int meta_error = 0; #else int meta_error = strerror_r(errnum, buffer, sizeof(buffer)); const char* str = buffer; #endif if (meta_error == 0) { out << llvm::formatv("errno {0}: {1}", errnum, llvm::StringRef(str)); } else { out << llvm::formatv( "error number {0}; encountered meta-error number {1} while rendering " "an error message", errnum, meta_error); } #else #error TODO: Implement this for other platforms. #endif } auto FdError::Print(llvm::raw_ostream& out) const -> void { // The `format_` member is a `StringLiteral` that is null terminated, so // `.data()` is safe here. // NOLINTNEXTLINE(bugprone-suspicious-stringview-data-usage) out << llvm::formatv(format_.data(), fd_) << " failed: "; PrintErrorNumber(out, unix_errnum()); } auto PathError::Print(llvm::raw_ostream& out) const -> void { // The `format_` member is a `StringLiteral` that is null terminated, so // `.data()` is safe here. // NOLINTNEXTLINE(bugprone-suspicious-stringview-data-usage) out << llvm::formatv(format_.data(), path_, dir_fd_ == AT_FDCWD ? std::string("AT_FDCWD") : std::to_string(dir_fd_)) << " failed: "; PrintErrorNumber(out, unix_errnum()); } auto Internal::FileRefBase::ReadFileToString() -> ErrorOr { std::string result; // Read a buffer at a time until we reach the end. We use the pipe buffer // length as our max buffer size as it is likely to be small but reasonable // for the OS, and in the case of pipes the same chunking in which the data // will arrive. // // TODO: Replace this with a smaller buffer and using `resize_and_overwrite` // to read into the string in-place for larger strings. Unclear if that will // be any faster, but it will be much more friendly to callers with // constrained stack sizes and use less memory overall. std::byte buffer[PIPE_BUF]; CARBON_RETURN_IF_ERROR(SeekFromBeginning(0)); for (;;) { auto read_result = ReadToBuffer(buffer); if (!read_result.ok()) { return std::move(read_result).error(); } if (read_result->empty()) { // EOF break; } result.append(reinterpret_cast(read_result->data()), read_result->size()); } return result; } auto Internal::FileRefBase::WriteFileFromString(llvm::StringRef str) -> ErrorOr { CARBON_RETURN_IF_ERROR(SeekFromBeginning(0)); auto bytes = llvm::ArrayRef( reinterpret_cast(str.data()), str.size()); while (!bytes.empty()) { auto write_result = WriteFromBuffer(bytes); if (!write_result.ok()) { return std::move(write_result).error(); } bytes = *write_result; } CARBON_RETURN_IF_ERROR(Truncate(str.size())); return Success(); } // A macOS specific sleep routine that builds on more standard utilities. This // is technically a portable implementation so we always compile it but only use // it on macOS where the more efficient direct use of `clock_nanosleep` isn't // available. [[maybe_unused]] static auto SleepMacos(Duration sleep) -> void { TimePoint stop = Clock::now() + sleep; timespec sleep_ts = Internal::DurationToTimespec(sleep); for (;;) { timespec rem_sleep_ts = {}; int result = nanosleep(&sleep_ts, &rem_sleep_ts); if (result == 0) { return; } // Continue sleeping if we get interrupted by a resumable signal. For // everything else report it. if (errno != EINTR) { int errnum = errno; RawStringOstream error_os; PrintErrorNumber(error_os, errnum); CARBON_FATAL("Unexpected error while sleeping: {0}", error_os.TakeStr()); } // Update to the remaining sleep time for the next attempt at sleeping. sleep_ts = rem_sleep_ts; // Also check if the clock has passed our stop time as a fallback to avoid // too much clock skew. if (Clock::now() > stop) { return; } } } static auto Sleep(Duration sleep) -> void { // For every platform but macOS we can sleep directly on an absolute time. #if __APPLE__ // On Apple platforms, dispatch to a specialized routine. SleepMacos(sleep); #else // We use `clock_gettime` instead of the filesystem `Clock` or some other // `std::chrono` clock because we want to use the exact same clock that we'll // use for sleeping below, and we'll need the time in a `timespec` for that // call anyways. We do use a monotonic clock to try and avoid sleeps being // interrupted by clock changes. timespec ts = {}; int result = clock_gettime(CLOCK_MONOTONIC, &ts); CARBON_CHECK(result == 0, "Error getting the time: {0}", strerror(errno)); // Now convert the timespec to a duration that we can safely do arithmetic on. // Since the sleep interval is in nanoseconds it is tempting to directly do // arithmetic here, but this has a subtle pitfall near the boundary between // the nanosecond component and the second component. // // Note that our `Duration` uses `__int128` to avoid worrying about running // out of precision to represent the final deadline. Duration stop_time = std::chrono::seconds(ts.tv_sec); stop_time += std::chrono::nanoseconds(ts.tv_nsec); stop_time += sleep; // Now convert back to timespec. ts = Internal::DurationToTimespec(stop_time); do { result = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, nullptr); // Continue sleeping if we get interrupted by a resumable signal. Because // we're using a monotonic clock and an absolute deadline time we will // eventually progress past that deadline. } while (result != 0 && (errno == EINTR)); if (result != 0) { int errnum = errno; RawStringOstream error_os; PrintErrorNumber(error_os, errnum); CARBON_FATAL("Unexpected error while sleeping: {0}", error_os.TakeStr()); } #endif } auto Internal::FileRefBase::TryLock(FileLock::Kind kind, Duration deadline, Duration poll_interval) -> ErrorOr { CARBON_CHECK(poll_interval <= deadline); if (deadline != Duration(0) && poll_interval == Duration(0)) { // If the caller didn't provide a poll interval but did provide a deadline, // pick a poll interval to roughly be 1/1000th of the deadline but at least // 1 microsecond. We don't support polling faster than 1 microsecond given // how expensive file locking is. poll_interval = std::max(Duration(std::chrono::microseconds(1)), deadline / 1000); } if (deadline != Duration(0)) { CARBON_CHECK( deadline >= std::chrono::microseconds(10), "A deadline for a file lock shorter than 10 microseconds is not " "supported, callers can implement their own polling logic."); CARBON_CHECK(poll_interval >= std::chrono::microseconds(1), "Polling for a file lock faster than every microsecond is not " "supported, callers can implement their own polling logic."); } auto stop = Clock::now() + deadline; for (;;) { int result = flock(fd_, static_cast(kind) | LOCK_NB); if (result == 0) { return FileLock(fd_); } // Return an error if this is something other than blocking for the lock to // be available, or we didn't get a deadline for continuing to try and // acquire the lock, or we've reached our deadline. if (errno != EWOULDBLOCK || deadline == Duration(0) || Clock::now() >= stop) { return FdError(errno, "File::TryLock on '{0}'", fd_); } // The caller requested attempting to wait up to a deadline to acquire the // lock with a specific poll interval. Try to sleep for that poll interval // before trying the lock again. Sleep(poll_interval); } } auto DirRef::AppendEntriesIf( llvm::SmallVectorImpl& entries, llvm::function_refbool> predicate) -> ErrorOr { CARBON_ASSIGN_OR_RETURN(Reader reader, Read()); for (const Entry& entry : reader) { llvm::StringRef name = entry.name(); if (name == "." || name == "..") { continue; } if (predicate && !predicate(name)) { continue; } entries.push_back(name.str()); } return Success(); } auto DirRef::AppendEntriesIf( llvm::SmallVectorImpl& dir_entries, llvm::SmallVectorImpl& non_dir_entries, llvm::function_refbool> predicate) -> ErrorOr { CARBON_ASSIGN_OR_RETURN(Reader reader, Read()); for (const Entry& entry : reader) { llvm::StringRef name = entry.name(); if (name == "." || name == "..") { continue; } if (predicate && !predicate(name)) { continue; } std::filesystem::path name_path = name.str(); if (entry.is_known_dir()) { dir_entries.push_back(std::move(name_path)); continue; } if (!entry.is_unknown_type()) { non_dir_entries.push_back(std::move(name_path)); continue; } auto stat_result = Lstat(name_path); if (!stat_result.ok()) { return FdError(stat_result.error().unix_errnum(), "Dir::AppendEntriesIf on '{0}' failed while stat-ing " "entries to determine which are directories", dfd_); } if (stat_result->is_dir()) { dir_entries.push_back(std::move(name_path)); } else { non_dir_entries.push_back(std::move(name_path)); } } return Success(); } auto DirRef::OpenDir(const std::filesystem::path& path, CreationOptions creation_options, ModeType creation_mode, OpenFlags open_flags) -> ErrorOr { // If we potentially need to create a directory, we have to do that // separately as no systems support `O_CREAT | O_DIRECTORY`, even though // that would be (much) nicer. if (creation_options == CreateNew) { // If we are required to be the one that created the directory, disable // following the last symlink when we open that directory. The last symlink // is the only one that matters for security here because it is only valid // to create the last component. It is that directory component that we want // to ensure has not been replaced with a symlink by an adversarial // concurrent process. open_flags |= OpenFlags::NoFollow; } if (creation_options != OpenExisting) { CARBON_CHECK(creation_options != CreateAlways, "Invalid `creation_options` value of `CreateAlways`: there is " "no support for truncating directories, and so they cannot be " "created in an analogous way to files if they already exist."); if (mkdirat(dfd_, path.c_str(), creation_mode) != 0) { // Unless the error is just that the path already exists, and that is // allowed for the requested creation flags, report any error here as part // of opening just like we would if the error originated from `openat` // with `O_CREAT`. if (creation_options == CreateNew || errno != EEXIST) { return PathError(errno, "Calling `mkdirat` on '{0}' relative to '{1}' during " "DirRef::OpenDir", path, dfd_); } } } // Open this path as a directory. Note that this has to succeed, and when we // created the directory we require the last component to not be a symlink in // case it was _replaced_ with a symlink while running. int result_fd = openat(dfd_, path.c_str(), static_cast(open_flags) | O_DIRECTORY); if (result_fd == -1) { // No need for `EINTR` handling here as if this is a FIFO it would be an // error with `O_DIRECTORY`. return PathError( errno, "Calling `openat` on '{0}' relative to '{1}' during DirRef::OpenDir", path, dfd_); } Dir result(result_fd); // If we were required to create the directory, we also need to verify that // the opened file descriptor continues to have the same permissions and the // correct owner as we couldn't do the creation atomically with the open. This // defends against an adversarial removal of the created directory and // creation of a new directory with the same name but either with wider // permissions such as all-write, or with a different owner. // // We don't defend against replacement with a directory of the same name, same // permissions, same owner, but different group. There is no good way to do // this defense given the complexity of group assignment, and there appears to // be no need. Achieving such a replacement without superuser power would // require a parent directory with `setgid` bit, and a group that gives the // attacker access -- but such a parent directory would make *any* creation // vulnerable without any need for a replacement, so we can't defend against // that here. The caller has ample tools to defend against this including // taking care with the parent directory and restricting the group permission // bits which we *do* verify. if (creation_options == CreateNew) { auto stat_result = result.Stat(); if (!stat_result.ok()) { // Manually propagate this error so we can attach it back to the opened // path and relative directory. return PathError(stat_result.error().unix_errnum(), "DirRef::Stat after opening '{0}' relative to '{1}'", path, dfd_); } // Check that the owning UID is the current effective UID. if (stat_result->unix_uid() != geteuid()) { // Model this as `EPERM`, which is a bit awkward, but should be fine. return PathError(EPERM, "Unexpected UID change after creating '{0}' relative to " "'{1}' during DirRef::OpenDir", path, dfd_); } // Check that the permissions are a subset of the requested ones. They may // have been masked down by `umask`, but if there are *new* permissions, // that would be a security issue. if ((stat_result->permissions() & creation_mode) != stat_result->permissions()) { // Model this with `EPERM` and a custom message. return PathError(EPERM, "Unexpected permissions after creating '{0}' relative " "to '{1}' during DirRef::OpenDir", path, dfd_); } } return result; } auto DirRef::ReadFileToString(const std::filesystem::path& path) -> ErrorOr { CARBON_ASSIGN_OR_RETURN(ReadFile f, OpenReadOnly(path)); auto result = f.ReadFileToString(); if (result.ok()) { return *std::move(result); } return PathError(result.error().unix_errnum(), "Dir::ReadFileToString on '{0}' relative to '{1}'", path, dfd_); } auto DirRef::WriteFileFromString(const std::filesystem::path& path, llvm::StringRef content, CreationOptions creation_options) -> ErrorOr { CARBON_ASSIGN_OR_RETURN(WriteFile f, OpenWriteOnly(path, creation_options)); auto write_result = f.WriteFileFromString(content); // Immediately close the file as even if there was a write error we don't want // to leave the file open. auto close_result = std::move(f).Close(); // Now report the first error encountered or return success. if (!write_result.ok()) { return PathError( write_result.error().unix_errnum(), "Write error in Dir::WriteFileFromString on '{0}' relative to '{1}'", path, dfd_); } if (!close_result.ok()) { return PathError( close_result.error().unix_errnum(), "Close error in Dir::WriteFileFromString on '{0}' relative to '{1}'", path, dfd_); } return Success(); } auto DirRef::CreateDirectories(const std::filesystem::path& path, ModeType creation_mode) -> ErrorOr { // Avoid having to handle an empty path by immediately rejecting it as // invalid. if (path.empty()) { return PathError(EINVAL, "DirRef::CreateDirectories on '{0}' relative to '{1}'", path, dfd_); } // Try directly opening the directory and use that if successful. This is an // important hot path case of users essentially doing an "open-always" form of // creating multiple steps of directories. auto open_result = OpenDir(path, OpenExisting); if (open_result.ok()) { return std::move(*open_result); } else if (!open_result.error().no_entity()) { return std::move(open_result).error(); } // Walk from the full path towards this directory (or the root) to find the // first existing directory. This is faster than walking down as no file // descriptors have to be allocated for any intervening directories, etc. We // keep the path components that are missing as we pop them off for easy // traversal back down. std::optional work_dir; // Paths typically consist of relatively few components // and so we can use a bit of stack and avoid allocating and moving the paths // in common cases. We use `8` as an arbitrary but likely good for all of the // hottest cases. llvm::SmallVector missing_components; missing_components.push_back(path.filename()); for (std::filesystem::path parent_path = path.parent_path(); !parent_path.empty(); parent_path = parent_path.parent_path()) { auto open_result = OpenDir(parent_path, OpenExisting); if (open_result.ok()) { work_dir = std::move(*open_result); break; } missing_components.push_back(parent_path.filename()); } CARBON_CHECK(!missing_components.empty()); // If we haven't yet opened an intermediate directory, start by creating one // relative to this directory. We can't do this as part of the loop below as // `this` and the newly opened directory have different types. if (!work_dir) { std::filesystem::path component = missing_components.pop_back_val(); CARBON_ASSIGN_OR_RETURN( Dir component_dir, OpenDir(component, CreationOptions::OpenAlways, creation_mode)); // Move this component into our temporary directory slot. work_dir = std::move(component_dir); } // Now walk through the remaining components opening and creating each // relative to the previous. while (!missing_components.empty()) { std::filesystem::path component = missing_components.pop_back_val(); CARBON_ASSIGN_OR_RETURN( Dir component_dir, work_dir->OpenDir(component, CreationOptions::OpenAlways, creation_mode)); // Close the current temporary directory and move the new component // directory object into its place. work_dir = std::move(component_dir); } CARBON_CHECK(work_dir, "Should always have created at least one directory for a " "non-empty path!"); return std::move(work_dir).value(); } auto DirRef::Rmtree(const std::filesystem::path& path) -> ErrorOr { struct DirAndIterator { DirRef::Reader dir; ssize_t dir_entry_start; }; llvm::SmallVector dir_stack; llvm::SmallVector dir_entries; llvm::SmallVector unknown_entries; dir_entries.push_back(path); for (;;) { // When we bottom out, we're removing the initial tree path and doing so // relative to `this` directory. DirRef current = dir_stack.empty() ? *this : dir_stack.back().dir; ssize_t dir_entry_start = dir_stack.empty() ? 0 : dir_stack.back().dir_entry_start; // If we've finished all the child directories of the current entry in the // stack, pop it off and continue. if (dir_entry_start == static_cast(dir_entries.size())) { dir_stack.pop_back(); continue; } CARBON_CHECK(dir_entry_start < static_cast(dir_entries.size())); // Take the last entry under the current directory and try removing it. const std::filesystem::path& entry_path = dir_entries.back(); auto rmdir_result = current.Rmdir(entry_path); if (rmdir_result.ok() || rmdir_result.error().no_entity()) { // Removed here or elsewhere already, so pop the entry. dir_entries.pop_back(); if (dir_entries.empty()) { // The last entry is the input path with an empty stack, so we've // finished at this point. CARBON_CHECK(dir_stack.empty()); return Success(); } continue; } // If we get any error other than not-empty, just return that. if (!rmdir_result.error().not_empty()) { return std::move(rmdir_result).error(); } // Recurse into the subdirectory since it isn't empty, opening it, getting a // reader, and pushing it onto our stack. CARBON_ASSIGN_OR_RETURN(Dir subdir, current.OpenDir(entry_path)); auto read_result = std::move(subdir).TakeAndRead(); if (!read_result.ok()) { return PathError( read_result.error().unix_errnum(), "Dir::Read on '{0}' relative to '{1}' during RmdirRecursively", entry_path, current.dfd_); } dir_stack.push_back( {*std::move(read_result), static_cast(dir_entries.size())}); // Now read the directory entries. It would be nice to be able to directly // remove the files and empty directories as we find them when reading, and // the POSIX spec appears to require that to work, but testing shows at // least some Linux environments don't work reliably in this case and will // fail to visit some entries entirely. As a consequence, we walk the entire // directory and collect the entries into data structures before beginning // to remove them. DirRef::Reader& subdir_reader = dir_stack.back().dir; for (const auto& entry : subdir_reader) { llvm::StringRef name = entry.name(); if (name == "." || name == "..") { continue; } if (entry.is_known_dir()) { dir_entries.push_back(name.str()); } else { // We end up here for entries known to be regular files, other kinds of // non-directory entries, or when the entry kind isn't known. // // Unless we *know* the entry is a directory, we put it into the unknown // entries. For these, we unlink them first in case they are // non-directory entries and use the failure of that to move any // directories that end up here to the directory entries list. unknown_entries.push_back(name.str()); } } // We can immediately try to unlink all the unknown entries, which will // include any regular files, and use an error on directories that were // unknown above to switch them to the `dir_entries` list. while (!unknown_entries.empty()) { std::filesystem::path name = unknown_entries.pop_back_val(); auto unlink_result = subdir_reader.Unlink(name); if (unlink_result.ok() || unlink_result.error().no_entity()) { continue; } else if (!unlink_result.error().is_dir()) { return std::move(unlink_result).error(); } dir_entries.push_back(std::move(name)); } // We'll handle the directory entries we've queued here in the next // iteration, removing them or recursing as needed. } } auto DirRef::ReadlinkSlow(const std::filesystem::path& path) -> ErrorOr { constexpr ssize_t MinBufferSize = #ifdef PATH_MAX PATH_MAX #else 1024 #endif ; // Read directly into a string to avoid allocating two large buffers. std::string large_buffer; // Stat the symlink to get an initial guess at the size. CARBON_ASSIGN_OR_RETURN(FileStatus status, Lstat(path)); // We try to use the size from the `lstat` unless it is empty, in which case // we try to use our minimum buffer size which is `PATH_MAX` or a constant // value. We have a fallback to dynamically discover an adequate buffer size // below that will handle any inaccuracy. ssize_t buffer_size = status.size(); if (buffer_size == 0) { buffer_size = MinBufferSize; } large_buffer.resize(buffer_size); ssize_t result = readlinkat(dfd_, path.c_str(), large_buffer.data(), large_buffer.size()); if (result == -1) { return PathError(errno, "Readlink on '{0}' relative to '{1}'", path, dfd_); } // Now the really bad fallback case: if there are racing writes to the // symlink, the guessed size may not have been large enough. As a last-ditch // effort, begin doubling (from the next power of two >= our min buffer size) // the length until it fits. We cap this at 10 MiB to prevent egregious file // system contents (or some bug somewhere) from exhausting memory. constexpr ssize_t MaxBufferSize = 10 << 20; while (result == static_cast(large_buffer.size())) { int64_t next_buffer_size = std::max( MinBufferSize, llvm::NextPowerOf2(large_buffer.size())); if (next_buffer_size > MaxBufferSize) { return PathError(ENOMEM, "Readlink on '{0}' relative to '{1}'", path, dfd_); } large_buffer.resize(next_buffer_size); result = readlinkat(dfd_, path.c_str(), large_buffer.data(), large_buffer.size()); if (result == -1) { return PathError(errno, "Readlink on '{0}' relative to '{1}'", path, dfd_); } } // Fix-up the size of the string and return it. large_buffer.resize(result); return large_buffer; } auto MakeTmpDir() -> ErrorOr { std::filesystem::path tmpdir_path = "/tmp"; // We use both `TEST_TMPDIR` and `TMPDIR`. The `TEST_TMPDIR` is set by Bazel // and preferred to keep tests using the expected output tree rather than // the system temporary directory. for (const char* tmpdir_env_name : {"TEST_TMPDIR", "TMPDIR"}) { const char* tmpdir_env_cstr = getenv(tmpdir_env_name); if (tmpdir_env_cstr == nullptr) { continue; } std::filesystem::path tmpdir_env = tmpdir_env_cstr; tmpdir_path = std::move(tmpdir_env); break; } std::filesystem::path target = BuildData::BuildTarget.str(); tmpdir_path /= target.filename(); return MakeTmpDirWithPrefix(std::move(tmpdir_path)); } auto MakeTmpDirWithPrefix(std::filesystem::path prefix) -> ErrorOr { std::filesystem::path tmpdir_path = std::move(prefix); tmpdir_path += ".XXXXXX"; std::string tmpdir_path_buffer = tmpdir_path.native(); char* result = mkdtemp(tmpdir_path_buffer.data()); if (result == nullptr) { RawStringOstream os; os << llvm::formatv("Calling mkdtemp on '{0}' failed: ", tmpdir_path.native()); PrintErrorNumber(os, errno); return Error(os.TakeStr()); } CARBON_CHECK(result == tmpdir_path_buffer.data(), "`mkdtemp` used a modified path"); tmpdir_path = std::move(tmpdir_path_buffer); // Because `mkdtemp` doesn't return an open directory atomically, open the // created directory and perform safety checks similar to `OpenDir` when // creating a new directory. CARBON_ASSIGN_OR_RETURN( Dir tmp, Cwd().OpenDir(tmpdir_path, OpenExisting, /*creation_mode=*/0, OpenFlags::NoFollow)); // Make sure we try to remove the directory from here on out. RemovingDir result_dir(std::move(tmp), tmpdir_path); // It's a bit awkward to report `fstat` errors as `Error`s, but we // don't have much choice. The stat failing here would be very weird. CARBON_ASSIGN_OR_RETURN(FileStatus stat, result_dir.Stat()); // The permissions must be exactly 0700 for a temporary directory, and the UID // should be ours. if (stat.permissions() != 0700 && stat.unix_uid() != geteuid()) { return Error( llvm::formatv("Found incorrect permissions or UID on tmpdir '{0}'", tmpdir_path.native()) .str()); } return result_dir; } } // namespace Carbon::Filesystem