filesystem.h 59 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_COMMON_FILESYSTEM_H_
  5. #define CARBON_COMMON_FILESYSTEM_H_
  6. #include <dirent.h>
  7. #include <fcntl.h>
  8. #include <sys/stat.h>
  9. #include <sys/types.h>
  10. #include <unistd.h>
  11. #include <concepts>
  12. #include <filesystem>
  13. #include <iterator>
  14. #include <string>
  15. #include "common/check.h"
  16. #include "common/error.h"
  17. #include "common/ostream.h"
  18. #include "common/raw_string_ostream.h"
  19. #include "common/template_string.h"
  20. #include "llvm/ADT/ScopeExit.h"
  21. #include "llvm/ADT/StringRef.h"
  22. #include "llvm/Support/FormatVariadic.h"
  23. // Provides a filesystem library for use in the Carbon project.
  24. //
  25. // This library provides an API designed to support modern Unix / Linux / POSIX
  26. // style filesystem operations, often called "Unix-like"[1] here, efficiently
  27. // and securely, while also carefully staying to a set of abstractions and
  28. // operations that can be reasonably implemented even on Windows platforms.
  29. //
  30. // TODO: Currently, there is not a Windows implementation, but this is actively
  31. // desired when we have testing infrastructure in place for Windows development.
  32. // Lacking that testing infrastructure and a full Windows port, the operations
  33. // here are manually compared with LLVM's filesystem library to ensure a
  34. // reasonable Windows implementation is possible.
  35. //
  36. // The library uses C++'s `std::filesystem::path` as its abstraction for
  37. // paths. This library provides two core APIs: open directories and files.
  38. //
  39. // Open directories provide relative- and absolute-path based operations to open
  40. // other directories or files. This allows secure creation of directories even
  41. // in the face of adversarial operations for example in a shared `/tmp`
  42. // directory. There is a `constexpr` current working directory available as
  43. // `Cwd()` that models normal filesystem operations with paths.
  44. //
  45. // Open files provide read, write, and other operations on the file. There are
  46. // separate types for read-only, write-only, and read-write files to model the
  47. // different APIs available.
  48. //
  49. // The APIs for both directories and files are primarily on `*Ref` types that
  50. // model a non-owning reference to the directory or file. These types are the
  51. // preferred types to use on an API boundary. Owning versions are provided that
  52. // ensure the file or directory is closed on destruction. Files support explicit
  53. // closing in order to observe any close-specific errors.
  54. //
  55. // Where APIs require flag parameters of some form, this library provides
  56. // enumerations that model those flags. The enumeration values are in turn
  57. // chosen to simplify passing these to specific native APIs. This means the
  58. // enumeration *values* should not be expected to be portable across platforms.
  59. // Customizing the values is part of the larger TODO to port the implementation
  60. // to Windows.
  61. //
  62. // [1]: Note that we refer to platforms as "Unix-like" rather than POSIX as we
  63. // want to group together all the OSes where the Unix-derived APIs are the
  64. // primary and expected way to interact with the filesystem, regardless of
  65. // whether a POSIX conforming API happens to exist. For example, both macOS
  66. // and WSL (Windows Subsystem for Linux) _are_ Unix-like as those are the
  67. // primary APIs used to access files in those environments. But Windows
  68. // itself _isn't_ Unix-like, even considering things like the defunct NT
  69. // POSIX subsystem or modern WSL, as those aren't the primary filesystem
  70. // APIs for the (non-WSL) Windows platform. This also matches the rough OS
  71. // classification used in LLVM.
  72. namespace Carbon::Filesystem {
  73. // The different creation options available when opening a file or directory.
  74. //
  75. // Because these are by far the most common parameters and they have unambiguous
  76. // names, the enumerators are also available directly within the namespace.
  77. enum class CreationOptions {
  78. // Requires an existing file or directory.
  79. OpenExisting = 0,
  80. // Opens an existing file or directory, and create one otherwise.
  81. OpenAlways = O_CREAT,
  82. // Opens and truncates an existing file or creates a new file. Provides
  83. // consistent behavior of an empty file regardless of the starting state. This
  84. // cannot be used for directories as they cannot be truncated on open. This is
  85. // essentially a short-cut for using `OpenAlways` and passing the
  86. // `OpenFlags::Truncate` below.
  87. CreateAlways = O_CREAT | O_TRUNC,
  88. // Requires no existing file or directory and will error if one is found. Only
  89. // succeeds when it creates a new file or directory.
  90. CreateNew = O_CREAT | O_EXCL,
  91. };
  92. using enum CreationOptions;
  93. // General flags to control the behavior of opening files that aren't covered by
  94. // other more specific flags.
  95. //
  96. // These can be combined using the `|` operator where the semantics are
  97. // compatible, although not all are.
  98. enum class OpenFlags : int {
  99. None = 0,
  100. // Open the file for appending rather than with the position at the start.
  101. //
  102. // An error to combine with `Truncate` or to use with `CreateAlways`.
  103. Append = O_APPEND,
  104. // Open the file and truncate its contents to be empty.
  105. Truncate = O_TRUNC,
  106. // Don't follow a symlink in the final path component being opened.
  107. NoFollow = O_NOFOLLOW,
  108. };
  109. inline auto operator|(OpenFlags lhs, OpenFlags rhs) -> OpenFlags {
  110. return static_cast<OpenFlags>(static_cast<int>(lhs) | static_cast<int>(rhs));
  111. }
  112. inline auto operator|=(OpenFlags& lhs, OpenFlags rhs) -> OpenFlags& {
  113. lhs = lhs | rhs;
  114. return lhs;
  115. }
  116. // Flags controlling which permissions should be checked in an `Access` call.
  117. //
  118. // These permissions can also be combined with the `|` operator, so
  119. // `AccessCheckFlags::Read | AccessCheckFlags::Write` checks for both read and
  120. // write access.
  121. enum class AccessCheckFlags : int {
  122. Exists = F_OK,
  123. Read = R_OK,
  124. Write = W_OK,
  125. Execute = X_OK,
  126. };
  127. inline auto operator|(AccessCheckFlags lhs, AccessCheckFlags rhs)
  128. -> AccessCheckFlags {
  129. return static_cast<AccessCheckFlags>(static_cast<int>(lhs) |
  130. static_cast<int>(rhs));
  131. }
  132. inline auto operator|=(AccessCheckFlags& lhs, AccessCheckFlags rhs)
  133. -> AccessCheckFlags& {
  134. lhs = lhs | rhs;
  135. return lhs;
  136. }
  137. // The underlying integer type that should be used to model the mode of a file.
  138. //
  139. // The mode is used in this API to represent both the permission bit mask and
  140. // special properties of a file. For example, on Unix-like systems, it combines
  141. // permissions with set-user-ID, set-group-ID, and sticky bits.
  142. //
  143. // The permission bits in the mode are represented using the Unix-style bit
  144. // pattern that facilitates octal modeling:
  145. // - Owner bit mask: 0700
  146. // - Group bit mask: 0070
  147. // - All bit mask: 0007
  148. //
  149. // For each, read is an octal value of `1`, write `2`, and execute `4`.
  150. //
  151. // Windows gracefully degrades to the effective permissions modeled using
  152. // these values.
  153. using ModeType = mode_t;
  154. // Enumeration of the different file types recognized.
  155. //
  156. // In addition to the specific type values being arranged for ease of use with
  157. // the POSIX APIs, the underlying type of the enum is arranged to use the common
  158. // mode type.
  159. enum class FileType : ModeType {
  160. // Portable file types that need to be supported across platform
  161. // implementations.
  162. Directory = S_IFDIR,
  163. RegularFile = S_IFREG,
  164. SymbolicLink = S_IFLNK,
  165. // Non-portable Unix-like platform specific types.
  166. UnixFifo = S_IFIFO,
  167. UnixCharDevice = S_IFCHR,
  168. UnixBlockDevice = S_IFBLK,
  169. UnixSocket = S_IFSOCK,
  170. // Mask for the Unix-like types to allow easy extraction.
  171. UnixMask = S_IFMT,
  172. };
  173. // Enumerates the different open access modes available.
  174. //
  175. // These are largely used to parameterize types in order to constrain which API
  176. // subset is available, and rarely needed directly.
  177. enum class OpenAccess {
  178. ReadOnly = O_RDONLY,
  179. WriteOnly = O_WRONLY,
  180. ReadWrite = O_RDWR,
  181. };
  182. // Forward declarations of various types that appear in APIs.
  183. class DirRef;
  184. class Dir;
  185. class RemovingDir;
  186. template <OpenAccess A>
  187. class FileRef;
  188. template <OpenAccess A>
  189. class File;
  190. class FdError;
  191. class PathError;
  192. namespace Internal {
  193. class FileRefBase;
  194. } // namespace Internal
  195. // Returns a constant `Dir` object that models the open current working
  196. // directory.
  197. //
  198. // Whatever the working directory of the process is will be used as the base for
  199. // any relative path operations on this object. For example, on Unix-like
  200. // systems, `Cwd().Stat("some/path")` is equivalent to `stat("some/path")`.
  201. consteval auto Cwd() -> Dir;
  202. // Creates a temporary directory and returns a removing directory handle to it.
  203. //
  204. // Each directory created will be unique and newly created by the call. It is
  205. // the caller's responsibility to clean up this directory.
  206. auto MakeTmpDir() -> ErrorOr<RemovingDir, Error>;
  207. // Class modeling a file (or directory) status information structure.
  208. //
  209. // This provides a largely-portable model that callers can use, as well as a few
  210. // APIs to access non-portable implementation details when necessary.
  211. class FileStatus {
  212. public:
  213. // The size of the file in bytes.
  214. auto size() const -> int64_t { return stat_buf_.st_size; }
  215. auto type() const -> FileType {
  216. return static_cast<FileType>(stat_buf_.st_mode &
  217. static_cast<ModeType>(FileType::UnixMask));
  218. }
  219. // Convenience predicates to test for specific values of `type()`.
  220. auto is_dir() const -> bool { return type() == FileType::Directory; }
  221. auto is_file() const -> bool { return type() == FileType::RegularFile; }
  222. auto is_symlink() const -> bool { return type() == FileType::SymbolicLink; }
  223. // The read, write, and execute permissions for user, group, and others. See
  224. // the `ModeType` documentation for how to interpret the result.
  225. auto permissions() const -> ModeType { return stat_buf_.st_mode & 0777; }
  226. // Non-portable APIs only available on Unix-like systems. See the
  227. // documentation of the Unix `stat` structure fields they expose for their
  228. // meaning.
  229. auto unix_inode() const -> uint64_t { return stat_buf_.st_ino; }
  230. auto unix_uid() const -> uid_t { return stat_buf_.st_uid; }
  231. private:
  232. friend DirRef;
  233. friend Internal::FileRefBase;
  234. FileStatus() = default;
  235. struct stat stat_buf_ = {};
  236. };
  237. // The base class defining the core `File` API.
  238. //
  239. // While not used directly, this is the base class used to implement all of the
  240. // main `File` types: `ReadFileRef`, `WriteFileRef`, and `ReadWriteFileRef`.
  241. //
  242. // Objects using this type have access to an open file handle to a specific file
  243. // and expose operations on that open file. These operations may fail directly
  244. // with their `ErrorOr` return, but some errors may be deferred until the
  245. // underlying owning file is closed.
  246. //
  247. // The type provides reference semantics to the underlying file, but is
  248. // rebindable, movable, and copyable unlike a C++ language reference.
  249. class Internal::FileRefBase {
  250. public:
  251. // This object can be default constructed, but will hold an invalid file
  252. // handle in that case. This is to support rebinding operations.
  253. FileRefBase() = default;
  254. // Reads the file status.
  255. //
  256. // Analogous to the Unix-like `fstat` call.
  257. auto Stat() -> ErrorOr<FileStatus, FdError>;
  258. // Methods to seek the current file position, with various semantics for the
  259. // offset.
  260. auto Seek(int64_t delta) -> ErrorOr<int64_t, FdError>;
  261. auto SeekFromBeginning(int64_t delta_from_beginning)
  262. -> ErrorOr<int64_t, FdError>;
  263. auto SeekFromEnd(int64_t delta_from_end) -> ErrorOr<int64_t, FdError>;
  264. // Reads as much data as is available and fits into the provided buffer.
  265. //
  266. // On success, this returns a new slice from the start to the end of the
  267. // successfully read bytes. These will always be located in the passed-in
  268. // buffer, but not all of the buffer may be filled. A partial read does not
  269. // mean that the end of the file has been reached.
  270. //
  271. // When a successful read with an *empty* slice is returned, that represents
  272. // reaching EOF on the underlying file successfully and there is no more data
  273. // to read.
  274. //
  275. // This method retries `EINTR` on Unix-like systems and returns
  276. // other errors to the caller.
  277. auto ReadToBuffer(llvm::MutableArrayRef<std::byte> buffer)
  278. -> ErrorOr<llvm::MutableArrayRef<std::byte>, FdError>;
  279. // Writes as much data as possible from the provided buffer.
  280. //
  281. // On success, this returns a new slice of the *unwritten* bytes still present
  282. // in the buffer. An empty return represents a successful write of all bytes
  283. // in the buffer. A non-empty return does not represent an error or the
  284. // inability to finish writing.
  285. //
  286. // This method retries `EINTR` on Unix-like systems and returns
  287. // other errors to the caller.
  288. auto WriteFromBuffer(llvm::ArrayRef<std::byte> buffer)
  289. -> ErrorOr<llvm::ArrayRef<std::byte>, FdError>;
  290. // Returns an LLVM `raw_fd_ostream` that writes to this file.
  291. //
  292. // Note that this doesn't expose any write errors here, those will surface
  293. // through the `raw_fd_ostream` API. The stream will also not close the file
  294. // which remains owned by the owning `File` object.
  295. auto WriteStream() -> llvm::raw_fd_ostream;
  296. // Reads the file until EOF into the returned string.
  297. //
  298. // This method will retry any recoverable errors and work to completely read
  299. // the file contents up to first encountering EOF.
  300. //
  301. // Any non-recoverable errors are returned to the caller.
  302. auto ReadToString() -> ErrorOr<std::string, FdError>;
  303. // Writes a string into the file starting from the current position.
  304. //
  305. // This method will retry any recoverable errors and work to completely write
  306. // the provided content into the file.
  307. //
  308. // Any non-recoverable errors are returned to the caller.
  309. auto WriteFromString(llvm::StringRef str) -> ErrorOr<Success, FdError>;
  310. protected:
  311. explicit FileRefBase(int fd) : fd_(fd) {}
  312. // Note: this should only be used or made part of the public API by subclasses
  313. // that provide *ownership* of the open file. It is implemented here to
  314. // provide a single, non-templated implementation.
  315. auto Close() && -> ErrorOr<Success, FdError>;
  316. // Factored out code to destroy an open read-only file. This calls `Close`
  317. // above but ignores any errors as there is no risk of data loss for a
  318. // read-only file.
  319. //
  320. // Note: this is a private API that should not be made public, and should only
  321. // be used by the implementation of subclass destructors. It should also only
  322. // be called for subclasses with *ownership* of the file reference, and is
  323. // provided here as a single non-template implementation.
  324. auto ReadOnlyDestroy() -> void;
  325. // Factored out code to destroy an open writable file. This _requires_ the
  326. // file to have already been closed with an explicit `Close` call, where it
  327. // can report any errors. Without that, destroying a writable file can easily
  328. // result in unnoticed data loss.
  329. //
  330. // Note: this is a private API that should not be made public, and should only
  331. // be used by the implementation of subclass destructors. It should also only
  332. // be called for subclasses with *ownership* of the file reference, and is
  333. // provided here as a single non-template implementation.
  334. auto WriteableDestroy() -> void;
  335. // State representing a potentially open file.
  336. //
  337. // On POSIX systems, this will be a file descriptor. For moved-from and
  338. // default-constructed file objects this may be an invalid negative value to
  339. // signal that state.
  340. //
  341. // TODO: This should be customized on non-POSIX systems.
  342. //
  343. // This member is made protected rather than private as the derived classes
  344. // need direct access to it in several contexts.
  345. // NOLINTNEXTLINE(misc-non-private-member-variables-in-classes)
  346. int fd_ = -1;
  347. };
  348. // A non-owning reference to an open file.
  349. //
  350. // Instances model a reference to an open file. Generally, rather than using a
  351. // `WriteFile&`, code should use a `WriteFileRef`.
  352. //
  353. // A specific instance provides the subset of the file API suitable for its
  354. // access based on its template parameter: read, write, or both.
  355. //
  356. // The API for file references is factored into a base class
  357. // `Internal::FileRefBase` to avoid duplication for each access instantiation.
  358. // Only the methods that are constrained by access are defined here, and they
  359. // are defined as wrappers around methods in the base where the documentation
  360. // and implementation live.
  361. template <OpenAccess A>
  362. class FileRef : public Internal::FileRefBase {
  363. public:
  364. static constexpr bool Readable =
  365. A == OpenAccess::ReadOnly || A == OpenAccess::ReadWrite;
  366. static constexpr bool Writeable =
  367. A == OpenAccess::WriteOnly || A == OpenAccess::ReadWrite;
  368. // This object can be default constructed, but will hold an invalid file
  369. // handle in that case. This is to support rebinding operations.
  370. FileRef() = default;
  371. // Read and Write methods that delegate to the `FileRefBase` implementations,
  372. // but require the relevant access. See the methods on `FileRefBase` for full
  373. // documentation.
  374. auto ReadToBuffer(llvm::MutableArrayRef<std::byte> buffer)
  375. -> ErrorOr<llvm::MutableArrayRef<std::byte>, FdError>
  376. requires Readable;
  377. auto WriteFromBuffer(llvm::ArrayRef<std::byte> buffer)
  378. -> ErrorOr<llvm::ArrayRef<std::byte>, FdError>
  379. requires Writeable;
  380. auto WriteStream() -> llvm::raw_fd_ostream
  381. requires Writeable;
  382. auto ReadToString() -> ErrorOr<std::string, FdError>
  383. requires Readable;
  384. auto WriteFromString(llvm::StringRef str) -> ErrorOr<Success, FdError>
  385. requires Writeable;
  386. protected:
  387. friend File<A>;
  388. friend DirRef;
  389. // Other constructors from the base are also available, but remain protected.
  390. using FileRefBase::FileRefBase;
  391. };
  392. // Convenience type defs for the three access combinations.
  393. using ReadFileRef = FileRef<OpenAccess::ReadOnly>;
  394. using WriteFileRef = FileRef<OpenAccess::WriteOnly>;
  395. using ReadWriteFileRef = FileRef<OpenAccess::ReadWrite>;
  396. // An owning handle to an open file.
  397. //
  398. // This extends the `FileRef` API to provide ownership of the file handle. Most
  399. // of the API is defined by `FileRef`.
  400. //
  401. // The file will be closed when the object is destroyed, and must close without
  402. // errors. If there is a chance of errors on close, and that is often where
  403. // errors are reported, code must use the `Close` API to directly handle them or
  404. // it must be correct to check-fail on them.
  405. //
  406. // This type allows intentional "slicing" to the `FileRef` base class as that is
  407. // a correct and safe conversion to pass a non-owning reference to a file to
  408. // another function, much like binding a reference to an owning type is
  409. // implicit.
  410. template <OpenAccess A>
  411. class File : public FileRef<A> {
  412. public:
  413. static constexpr bool Readable =
  414. A == OpenAccess::ReadOnly || A == OpenAccess::ReadWrite;
  415. static constexpr bool Writeable =
  416. A == OpenAccess::WriteOnly || A == OpenAccess::ReadWrite;
  417. // Default constructs an invalid file.
  418. //
  419. // This can be destroyed or assigned safely, but no other operations are
  420. // correct.
  421. File() = default;
  422. // File objects are move-only as they model ownership.
  423. File(File&& arg) noexcept : FileRef<A>(std::exchange(arg.fd_, -1)) {}
  424. auto operator=(File&& arg) noexcept -> File& {
  425. Destroy();
  426. this->fd_ = std::exchange(arg.fd_, -1);
  427. return *this;
  428. }
  429. File(const File&) = delete;
  430. auto operator=(const File&) -> File& = delete;
  431. ~File() { Destroy(); }
  432. // Closes the open file and leaves the file in a moved-from state.
  433. //
  434. // The signature is `auto Close() && -> ErrorOr<Success, FdError>`.
  435. //
  436. // This type provides ownership of the file, so expose the `Close` method to
  437. // allow checked destruction and release of the file resources.
  438. //
  439. // If any errors are encountered during closing, returns them. Note that the
  440. // file should still be considered closed, and the object is moved-from even
  441. // if errors occur.
  442. using Internal::FileRefBase::Close;
  443. private:
  444. friend DirRef;
  445. // Destroy the file.
  446. //
  447. // This dispatches to non-template code in `FileRefBase` based on whether the
  448. // file is writable or readonly. The core logic is in the non-template
  449. // methods.
  450. auto Destroy() -> void;
  451. explicit File(int fd) : FileRef<A>(fd) {}
  452. };
  453. // Convenience type defs for the three access combinations.
  454. using ReadFile = File<OpenAccess::ReadOnly>;
  455. using WriteFile = File<OpenAccess::WriteOnly>;
  456. using ReadWriteFile = File<OpenAccess::ReadWrite>;
  457. // A non-owning reference to an open directory.
  458. //
  459. // This is the main API for accessing and opening files and other directories.
  460. // Conceptually, every open file or directory is relative to some other
  461. // directory. The symbolic current working directory object is available via the
  462. // `Cwd()` function. When on a Unix-like platform, this is intended to provide
  463. // the semantics of `openat` and related functions, including the ability to
  464. // write secure filesystem operations in the face of adversarial parallel
  465. // filesystem operations.
  466. //
  467. // Relative path parameters are always relative to this directory. Absolute path
  468. // parameters are also allowed and are treated as absolute paths. This parallels
  469. // the behavior of `/` for path concatenation where an absolute path ignores all
  470. // preceding components.
  471. //
  472. // Errors for directory operations retain the path parameter used in order to
  473. // print helpful detail when unhandled, but otherwise work to be lazy and
  474. // lightweight to support low-overhead expected error patterns.
  475. //
  476. // The names are designed to mirror the underlying Unix-like APIs that implement
  477. // them, with extensions to add clarity. However, the set of operations is
  478. // expected to be reasonable to implement on Windows with reasonable fidelity.
  479. class DirRef {
  480. public:
  481. class Entry;
  482. class Iterator;
  483. class Reader;
  484. // Begin reading the entries in a directory.
  485. //
  486. // This returns a `Reader` object that can be iterated to walk over all the
  487. // entries in this directory. Note that the returned `Reader` owns a newly
  488. // allocated handle to this directory, and provides the full `DirRef` API. If
  489. // it isn't necessary to keep both open, the `Dir` class offers a
  490. // move-qualified overload that optimizes this case.
  491. //
  492. // Note that it is unspecified whether added and removed files during the
  493. // lifetime of the reader will be included when iterating, but otherwise
  494. // concurrent mutations are well defined.
  495. auto Read() & -> ErrorOr<Reader, FdError>;
  496. // Checks that the provided path can be accessed.
  497. auto Access(const std::filesystem::path& path,
  498. AccessCheckFlags check = AccessCheckFlags::Exists)
  499. -> ErrorOr<bool, PathError>;
  500. // Reads the `FileStatus` for the open directory.
  501. auto Stat() -> ErrorOr<FileStatus, FdError>;
  502. // Reads the `FileStatus` for the provided path (without opening it).
  503. //
  504. // Like the `stat` system call on Unix-like platforms, this will follow any
  505. // symlinks and provide the status of the underlying file or directory.
  506. auto Stat(const std::filesystem::path& path)
  507. -> ErrorOr<FileStatus, PathError>;
  508. // Reads the `FileStatus` for the provided path (without opening it).
  509. //
  510. // Like the `lstat` system call on Unix-like platforms, this will *not* follow
  511. // symlinks, and instead will return the status of the symlink itself.
  512. auto Lstat(const std::filesystem::path& path)
  513. -> ErrorOr<FileStatus, PathError>;
  514. // Reads the target string of the symlink at the provided path.
  515. //
  516. // This does not follow the symlink, and does not require the symlink target
  517. // to be valid or exist. It merely reads the textual string.
  518. //
  519. // Returns an error if called with a path that is not a symlink.
  520. auto Readlink(const std::filesystem::path& path)
  521. -> ErrorOr<std::string, PathError>;
  522. // Opens the provided path as a read-only file.
  523. //
  524. // The interaction with an existing file is governed by `creation_options` and
  525. // defaults to error unless opening an existing file. When creating a file,
  526. // only the leaf component in the provided path can be created with this call.
  527. //
  528. // If creating a file, the file is created with `creation_mode` which defaults
  529. // to a restrictive `0600`. The creation permission bits are also completely
  530. // independent of the access provided via the opened file. For example,
  531. // creating with write permissions doesn't impact whether write access is
  532. // available via the returned file. And creating _without_ write permission
  533. // bits is compatible with opening the file for writing.
  534. //
  535. // Additional flags can be provided to `flags` to control other aspects of
  536. // behavior on open.
  537. //
  538. // This is an error if the path exists and is a directory. If the path is a
  539. // symlink, it will follow the symlink.
  540. auto OpenReadOnly(const std::filesystem::path& path,
  541. CreationOptions creation_options = OpenExisting,
  542. ModeType creation_mode = 0600,
  543. OpenFlags flags = OpenFlags::None)
  544. -> ErrorOr<ReadFile, PathError>;
  545. // Opens the provided path as a write-only file. Otherwise, behaves as
  546. // `OpenReadOnly`.
  547. auto OpenWriteOnly(const std::filesystem::path& path,
  548. CreationOptions creation_options = OpenExisting,
  549. ModeType creation_mode = 0600,
  550. OpenFlags flags = OpenFlags::None)
  551. -> ErrorOr<WriteFile, PathError>;
  552. // Opens the provided path as a read-and-write file. Otherwise, behaves as
  553. // `OpenReadOnly`.
  554. auto OpenReadWrite(const std::filesystem::path& path,
  555. CreationOptions creation_options = OpenExisting,
  556. ModeType creation_mode = 0600,
  557. OpenFlags flags = OpenFlags::None)
  558. -> ErrorOr<ReadWriteFile, PathError>;
  559. // Opens the provided path as a directory.
  560. //
  561. // Similar to `OpenReadOnly` and other file opening APIs, accepts
  562. // `creation_options` to control the interaction with any existing directory.
  563. // However, `CreateAlways` is not implementable for directories and an error
  564. // if passed. The default permissions in the `creation_mode` are `0700` which
  565. // is more suitable for directories. There are no extra flags that can be
  566. // passed.
  567. //
  568. // As with other open routines, when creating a directory, only the leaf
  569. // component can be created by the call to this routine.
  570. //
  571. // When creating a directory with `CreateNew`, this routine works to be safe
  572. // even in the presence of adversarial, concurrent operations that attempt to
  573. // replace the created directory with one that is controlled by the adversary.
  574. //
  575. // Specifically, for `CreateNew` we ensure that the last component is a
  576. // created directory in its parent, and cannot be replaced by a symlink into
  577. // an attacker-controlled directory. We further ensure it cannot have been
  578. // replaced by a directory with a different owner or with wider permissions
  579. // than the created directory.
  580. //
  581. // However, no validation is done on any prefix path components leading to the
  582. // leaf component created. When securely creating directories, the initial
  583. // creation should have a single component from an opened existing parent
  584. // directory. Also, no validation of the owning _group_ is performed. When
  585. // securely creating a directory, the caller should either ensure the parent
  586. // directory does not have a malicious setgid bit set, or restrict the
  587. // created mode to not give group access, or both. In general, the lack of
  588. // control over the owning group motivates our choice to make the default mode
  589. // permissions restrictive and not include any group access.
  590. //
  591. // To securely achieve a result similar to `OpenAlways` instead of
  592. // `CreateNew`, callers can directly `CreateNew` and handle failures with an
  593. // explicit `OpenExisting` that also blocks following symlinks with
  594. // `OpenFlags::NoFollow` and performs any needed validation.
  595. auto OpenDir(const std::filesystem::path& path,
  596. CreationOptions creation_options = OpenExisting,
  597. ModeType creation_mode = 0700, OpenFlags flags = OpenFlags::None)
  598. -> ErrorOr<Dir, PathError>;
  599. // Reads the file at the provided path to a string.
  600. //
  601. // This is a convenience wrapper for opening the path, reading the returned
  602. // file to a string, and closing it. Errors from any step are returned.
  603. auto ReadFileToString(const std::filesystem::path& path)
  604. -> ErrorOr<std::string, PathError>;
  605. // Writes the provided `content` to the provided path.
  606. //
  607. // This is a convenience wrapper for opening the path, creating it according
  608. // to `creation_options` as necessary, writing `content` to it, and closing
  609. // it. Errors from any step are returned.
  610. auto WriteFileFromString(const std::filesystem::path& path,
  611. llvm::StringRef content,
  612. CreationOptions creation_options = CreateAlways)
  613. -> ErrorOr<Success, PathError>;
  614. // Moves a file from one directory to another directory.
  615. auto Rename(const std::filesystem::path& path, DirRef target_dir,
  616. const std::filesystem::path& target_path)
  617. -> ErrorOr<Success, PathError>;
  618. // Changes the current working directory to this directory.
  619. auto Chdir() -> ErrorOr<Success, FdError>;
  620. // Changes the current working directory to the provided path.
  621. //
  622. // An error if the provided path is not a directory. Does not open the
  623. // provided path as a directory, but it will be available as the current
  624. // working directory via `Cwd()`.
  625. auto Chdir(const std::filesystem::path& path) -> ErrorOr<Success, PathError>;
  626. // Creates a symlink at the provided path with the contents of `target`.
  627. //
  628. // Note that the target of a symlink is an arbitrary string and there is no
  629. // error checking on whether it exists or is sensible. Also, the target string
  630. // set will be up to the first null byte in `target`, regardless of its
  631. // `size`. This will not overwrite an existing symlink at the provided path.
  632. //
  633. // Also note that the written symlink will be the null-terminated string
  634. // `target.c_str()`, ignoring everything past any embedded null bytes.
  635. auto Symlink(const std::filesystem::path& path, const std::string& target)
  636. -> ErrorOr<Success, PathError>;
  637. // Creates the directories in the provided path, using the permissions in
  638. // `creation_mode`.
  639. //
  640. // This will create any missing directory components in `path`. Relative paths
  641. // will be created relative to this directory, and without re-resolving its
  642. // path. The leaf created directory is opened and returned.
  643. //
  644. // The implementation allows for concurrent creation of the same directory (or
  645. // a prefix) without error or corruption and optimizes for performance of
  646. // creating the requested path. As a consequence, this creation is _unsafe_ in
  647. // the face of adversarial concurrent manipulation of components of the path.
  648. // If you need to create directories securely, first create an initial
  649. // directory securely using `OpenDir` and `CreateNew` with restricted
  650. // permissions that preclude any adversarial behavior, then use this API to
  651. // create tree components within that root.
  652. auto CreateDirectories(const std::filesystem::path& path,
  653. ModeType creation_mode = 0700)
  654. -> ErrorOr<Dir, PathError>;
  655. // Unlink the last component of the path, removing that name from its parent
  656. // directory.
  657. //
  658. // If this was the last link to the underlying file its contents will be
  659. // removed when the last open file handle to it is closed.
  660. //
  661. // The path must not be a directory. If the path is a symbolic link, the link
  662. // will be removed, not the target. Models the behavior of `unlinkat(2)` on
  663. // Unix-like platforms.
  664. auto Unlink(const std::filesystem::path& path) -> ErrorOr<Success, PathError>;
  665. // Remove the directory entry of the last component of the path.
  666. //
  667. // The path must be a directory, and that directory must be empty. Models
  668. // `rmdirat(2)` on Unix-like platforms.
  669. auto Rmdir(const std::filesystem::path& path) -> ErrorOr<Success, PathError>;
  670. // Remove the directory tree identified by the last component of the path.
  671. //
  672. // The provided path must name a directory. This removes all files and
  673. // subdirectories contained within that named directory and then removes the
  674. // directory itself once empty.
  675. auto Rmtree(const std::filesystem::path& path) -> ErrorOr<Success, PathError>;
  676. protected:
  677. constexpr DirRef() = default;
  678. constexpr explicit DirRef(int dfd) : dfd_(dfd) {}
  679. // Slow-path fallback when unable to read the symlink target into a small
  680. // stack buffer.
  681. auto ReadlinkSlow(const std::filesystem::path& path)
  682. -> ErrorOr<std::string, PathError>;
  683. // Generic implementation of the various `Open*` variants using the
  684. // `OpenAccess` enumerator.
  685. template <OpenAccess A>
  686. auto OpenImpl(const std::filesystem::path& path,
  687. CreationOptions creation_options, ModeType creation_mode,
  688. OpenFlags flags) -> ErrorOr<File<A>, PathError>;
  689. // State representing an open directory.
  690. //
  691. // On POSIX systems, this will be a file descriptor. For moved-from and
  692. // default-constructed file objects this may be an invalid negative value to
  693. // signal that state.
  694. //
  695. // TODO: This should be customized on non-POSIX systems.
  696. //
  697. // The directory's file descriptor is part of the protected API.
  698. // NOLINTNEXTLINE(misc-non-private-member-variables-in-classes):
  699. int dfd_ = -1;
  700. };
  701. // An owning handle to an open directory.
  702. //
  703. // This extends the `DirRef` API to provide ownership of the directory. Most of
  704. // the API is defined by `DirRef`. It additionally provides optimized move-based
  705. // variations on those APIs where relevant.
  706. //
  707. // The directory will be closed when the object is destroyed. Closing an open
  708. // directory isn't an interesting error reporting path and so no direct close
  709. // API is provided.
  710. //
  711. // This type allows intentional "slicing" to the `DirRef` base class as that is
  712. // a correct and safe conversion to pass a non-owning reference to a directory
  713. // to another function, much like binding a reference to an owning type is
  714. // implicit.
  715. class Dir : public DirRef {
  716. public:
  717. Dir() = default;
  718. // Dir objects are move-only as they model ownership.
  719. Dir(Dir&& arg) noexcept : DirRef(std::exchange(arg.dfd_, -1)) {}
  720. auto operator=(Dir&& arg) noexcept -> Dir& {
  721. Destroy();
  722. dfd_ = std::exchange(arg.dfd_, -1);
  723. return *this;
  724. }
  725. Dir(const Dir&) = delete;
  726. auto operator=(const Dir&) -> Dir& = delete;
  727. constexpr ~Dir();
  728. // An optimized way to read the entries in a directory when moving from an
  729. // owning `Dir` object.
  730. //
  731. // This avoids creating a duplicate file handle for the returned `Reader`.
  732. // That `Reader` also supports the full `DirRef` API and so can often be used
  733. // without retaining the original `Dir`.
  734. //
  735. // For more details about reading, see the documentation on `DirRef::Read`.
  736. auto TakeAndRead() && -> ErrorOr<Reader, FdError>;
  737. // Also include `DirRef`'s read API.
  738. using DirRef::Read;
  739. private:
  740. friend consteval auto Cwd() -> Dir;
  741. friend DirRef;
  742. friend RemovingDir;
  743. explicit constexpr Dir(int dfd) : DirRef(dfd) {}
  744. // Prevent implicit creation of a `Dir` object from a `RemovingDir` which will
  745. // end up as a subclass below and represent harmful implicit slicing. Instead,
  746. // require friendship and an explicit construction on an _intended_ release of
  747. // the removing semantics.
  748. explicit Dir(RemovingDir&& arg) noexcept;
  749. constexpr auto Destroy() -> void;
  750. };
  751. // An owning handle to an open directory and its absolute path that will be
  752. // removed recursively when destroyed.
  753. //
  754. // This can be used to ensure removal of a directory, and also exposes the
  755. // absolute path of the directory.
  756. //
  757. // As removal may encounter errors, unless the desired behavior is a
  758. // check-failure, users should explicitly move and call `Remove` at the end of
  759. // lifetime and handle any resultant errors.
  760. class RemovingDir : public Dir {
  761. public:
  762. // Takes ownership of the open directory `d` and wraps it in a `RemovingDir`
  763. // that will remove it on destruction using `abs_path`. Requires `abs_path` to
  764. // be an absolute path and the desired path to remove on destruction.
  765. //
  766. // Note that there is no way for the implementation to validate what directory
  767. // `abs_path` refers to, that is the responsibility of the caller.
  768. explicit RemovingDir(Dir d, std::filesystem::path abs_path)
  769. : Dir(std::move(d)), abs_path_(std::move(abs_path)) {
  770. CARBON_CHECK(abs_path_.is_absolute(), "Relative path used for removal: {0}",
  771. abs_path_);
  772. }
  773. RemovingDir() = default;
  774. RemovingDir(RemovingDir&& arg) = default;
  775. auto operator=(RemovingDir&& rhs) -> RemovingDir& = default;
  776. ~RemovingDir();
  777. auto abs_path() const [[clang::lifetimebound]]
  778. -> const std::filesystem::path& {
  779. return abs_path_;
  780. }
  781. // Releases the directory from being removed and returns just the underlying
  782. // owning handle.
  783. auto Release() && -> Dir { return std::move(*this); }
  784. // Removes the directory immediately and surfaces any errors encountered.
  785. auto Remove() && -> ErrorOr<Success, PathError>;
  786. private:
  787. friend Dir;
  788. std::filesystem::path abs_path_;
  789. };
  790. // A named entry in a directory.
  791. //
  792. // This provides access to the scanned data when reading the entries of the
  793. // directory. It can only be produced by iterating over a `DirRef::Reader`.
  794. class DirRef::Entry {
  795. public:
  796. // The name of the entry.
  797. //
  798. // This is exposed as a null-terminated C-string as that is the most common
  799. // representation.
  800. auto name() const -> const char* { return dent_->d_name; }
  801. // Test if the entry has an unknown type. In this case, all other type
  802. // predicates will return false and the caller will have to directly `Lstat()`
  803. // the entry to determine its type.
  804. auto is_unknown_type() const -> bool { return dent_->d_type == DT_UNKNOWN; }
  805. // Predicates to test for known entry types.
  806. //
  807. // Note that we don't provide an enumerator here as we don't have any reliable
  808. // way to predict the set of possible values or narrow to that set. Different
  809. // platforms and even different versions of the same header may change the set
  810. // of types surfaced here.
  811. auto is_known_dir() const -> bool { return dent_->d_type == DT_DIR; }
  812. auto is_known_regular_file() const -> bool { return dent_->d_type == DT_REG; }
  813. auto is_known_symlink() const -> bool { return dent_->d_type == DT_LNK; }
  814. private:
  815. friend Dir::Reader;
  816. friend Dir::Iterator;
  817. Entry() = default;
  818. explicit Entry(dirent* dent) : dent_(dent) {}
  819. dirent* dent_ = nullptr;
  820. };
  821. // An iterator into a `DirRef::Reader`, used for walking the entries in a
  822. // directory.
  823. //
  824. // Most of the work of iterating a directory is done when constructing the
  825. // `Reader`, when constructing the beginning iterator, or when incrementing the
  826. // iterator.
  827. class DirRef::Iterator
  828. : public llvm::iterator_facade_base<Iterator, std::input_iterator_tag,
  829. const Entry> {
  830. public:
  831. // Default construct a general end iterator.
  832. Iterator() = default;
  833. auto operator==(const Iterator& rhs) const -> bool {
  834. CARBON_DCHECK(dirp_ == nullptr || rhs.dirp_ == nullptr ||
  835. dirp_ == rhs.dirp_);
  836. return entry_.dent_ == rhs.entry_.dent_;
  837. }
  838. auto operator*() const [[clang::lifetimebound]] -> const Entry& {
  839. return entry_;
  840. }
  841. auto operator++() -> Iterator&;
  842. private:
  843. friend Dir::Reader;
  844. // Construct a begin iterator for a specific directory stream.
  845. explicit Iterator(DIR* dirp) : dirp_(dirp) {
  846. // Increment immediately to populate the initial entry.
  847. ++*this;
  848. }
  849. DIR* dirp_ = nullptr;
  850. Entry entry_;
  851. };
  852. // A reader for a directory.
  853. //
  854. // This class owns a handle to a directory that is set up for reading the
  855. // entries within the directory. Because it owns a handle to the directory, it
  856. // also implements the full `DirRef` API for convenience.
  857. //
  858. // Beyond the `DirRef` API, this object can be iterated as a range to visit all
  859. // the entries in the directory.
  860. //
  861. // Note that it is unspecified whether entries added or removed prior to being
  862. // visited while iterating. Iterating also cannot be re-started once begun --
  863. // this models an input iterable range, not even a forward iterable range.
  864. //
  865. // This type allows intentional "slicing" to the `DirRef` base class as that is
  866. // a correct and safe conversion to pass a non-owning reference to a directory
  867. // to another function, much like binding a reference to an owning type is
  868. // implicit.
  869. class DirRef::Reader : public DirRef {
  870. public:
  871. Reader() = default;
  872. Reader(Reader&& arg) noexcept
  873. // The directory file descriptor isn't owning, but clear it for clarity.
  874. : DirRef(std::exchange(arg.dfd_, -1)),
  875. dirp_(std::exchange(arg.dirp_, nullptr)) {}
  876. Reader(const Reader&) = delete;
  877. auto operator=(Reader&& arg) noexcept -> Reader& {
  878. Destroy();
  879. // The directory file descriptor isn't owning, but clear it for clarity.
  880. dfd_ = std::exchange(arg.dfd_, -1);
  881. dirp_ = std::exchange(arg.dirp_, nullptr);
  882. return *this;
  883. }
  884. ~Reader() { Destroy(); }
  885. // Compute the begin and end iterators for reading the entries of the
  886. // directory.
  887. auto begin() -> Iterator;
  888. auto end() -> Iterator;
  889. private:
  890. friend DirRef;
  891. friend Dir;
  892. explicit Reader(DIR* dirp) : DirRef(dirfd(dirp)), dirp_(dirp) {}
  893. auto Destroy() -> void;
  894. DIR* dirp_ = nullptr;
  895. };
  896. namespace Internal {
  897. // Base class for `errno` errors.
  898. //
  899. // This is where we extract common APIs and logic for querying the specific
  900. // `errno`-based error.
  901. template <typename ErrorT>
  902. class ErrnoErrorBase : public ErrorBase<ErrorT> {
  903. public:
  904. // Accessors to test for specific kinds of errors that are portably available.
  905. auto already_exists() const -> bool { return errnum_ == EEXIST; }
  906. auto is_dir() const -> bool { return errnum_ == EISDIR; }
  907. auto no_entity() const -> bool { return errnum_ == ENOENT; }
  908. auto not_dir() const -> bool { return errnum_ == ENOTDIR; }
  909. auto access_denied() const -> bool { return errnum_ == EACCES; }
  910. // Specific to `Rmdir` operations, two different error values can be used.
  911. auto not_empty() const -> bool {
  912. return errnum_ == ENOTEMPTY || errnum_ == EEXIST;
  913. }
  914. // Accessor for the `errno` based error number. This is not a portable API,
  915. // code using it will need to be ported to use a different API on Windows.
  916. // TODO: Add a Windows-specific API for its low-level error information.
  917. auto unix_errnum() const -> int { return errnum_; }
  918. protected:
  919. // NOLINTNEXTLINE(bugprone-crtp-constructor-accessibility):
  920. explicit ErrnoErrorBase(int errnum) : errnum_(errnum) {}
  921. private:
  922. int errnum_;
  923. };
  924. } // namespace Internal
  925. // Error from a file-descriptor operation.
  926. //
  927. // This is the implementation of the file-descriptor-based error type. When
  928. // operations on a file descriptor fail, they use this object to convey the
  929. // error plus the descriptor in question.
  930. //
  931. // Specific context on the exact point or nature of the operation that failed
  932. // can be included in the custom format string. The format string should include
  933. // a placeholder for the file descriptor to be substituted into. The format
  934. // string should describe the _operation_ that failed, once rendered it will
  935. // have `failed: ` and a description of the `errno`-indicated failure appended.
  936. //
  937. // For example:
  938. //
  939. // `FdError(EPERM, "Read of file '{0}'", 42)`
  940. //
  941. // Will be rendered similarly to:
  942. //
  943. // "Read of file '42' failed: EPERM: ..."
  944. class FdError : public Internal::ErrnoErrorBase<FdError> {
  945. public:
  946. FdError(FdError&&) noexcept = default;
  947. auto operator=(FdError&&) noexcept -> FdError& = default;
  948. // Prints this error to the provided string.
  949. //
  950. // Works to render the `errno` in a friendly way and includes the file
  951. // descriptor for context.
  952. auto Print(llvm::raw_ostream& out) const -> void;
  953. private:
  954. friend Internal::FileRefBase;
  955. friend ReadFile;
  956. friend WriteFile;
  957. friend ReadWriteFile;
  958. friend DirRef;
  959. friend Dir;
  960. explicit FdError(int errnum, llvm::StringLiteral format, int fd)
  961. : ErrnoErrorBase(errnum), fd_(fd), format_(format) {}
  962. int fd_;
  963. llvm::StringLiteral format_;
  964. };
  965. // Error from a path-based operation.
  966. //
  967. // This is the implementation of the path-based error type. When operations on a
  968. // path fail, they use this object to convey the error plus both the path and
  969. // relevant directory FD leading to the failure.
  970. //
  971. // Specific context on the exact point or nature of the operation that failed
  972. // can be included in the custom format string. The format string should include
  973. // placeholders for the path and the directory file descriptor to be substituted
  974. // into. The format string should describe the _operation_ that failed, once
  975. // rendered it will have `failed: ` and a description of the `errno`-indicated
  976. // failure appended.
  977. //
  978. // For example:
  979. //
  980. // `PathError(EPERM, "Open of '{0}' relative to '{1}'", "filename", 42)`
  981. //
  982. // Will be rendered similarly to:
  983. //
  984. // "Open of 'filename' relative to '42' failed: EPERM: ..."
  985. class PathError : public Internal::ErrnoErrorBase<PathError> {
  986. public:
  987. PathError(PathError&&) noexcept = default;
  988. auto operator=(PathError&&) noexcept -> PathError& = default;
  989. // Prints this error to the provided string.
  990. //
  991. // Works to render the `errno` in a friendly way and includes the path and
  992. // directory file descriptor for context.
  993. auto Print(llvm::raw_ostream& out) const -> void;
  994. private:
  995. friend DirRef;
  996. friend Dir;
  997. explicit PathError(int errnum, llvm::StringLiteral format,
  998. std::filesystem::path path, int dir_fd)
  999. : ErrnoErrorBase(errnum),
  1000. dir_fd_(dir_fd),
  1001. path_(std::move(path)),
  1002. format_(format) {}
  1003. int dir_fd_;
  1004. std::filesystem::path path_;
  1005. llvm::StringLiteral format_;
  1006. };
  1007. // Implementation details only below.
  1008. consteval auto Cwd() -> Dir { return Dir(AT_FDCWD); }
  1009. inline auto Internal::FileRefBase::Stat() -> ErrorOr<FileStatus, FdError> {
  1010. FileStatus status;
  1011. if (fstat(fd_, &status.stat_buf_) == 0) {
  1012. return status;
  1013. }
  1014. return FdError(errno, "File::Stat on '{0}'", fd_);
  1015. }
  1016. inline auto Internal::FileRefBase::Seek(int64_t delta)
  1017. -> ErrorOr<int64_t, FdError> {
  1018. int64_t byte_offset = lseek(fd_, delta, SEEK_CUR);
  1019. if (byte_offset == -1) {
  1020. return FdError(errno, "File::Seek on '{0}'", fd_);
  1021. }
  1022. return byte_offset;
  1023. }
  1024. inline auto Internal::FileRefBase::SeekFromBeginning(
  1025. int64_t delta_from_beginning) -> ErrorOr<int64_t, FdError> {
  1026. int64_t byte_offset = lseek(fd_, delta_from_beginning, SEEK_SET);
  1027. if (byte_offset == -1) {
  1028. return FdError(errno, "File::SeekTo on '{0}'", fd_);
  1029. }
  1030. return byte_offset;
  1031. }
  1032. inline auto Internal::FileRefBase::SeekFromEnd(int64_t delta_from_end)
  1033. -> ErrorOr<int64_t, FdError> {
  1034. int64_t byte_offset = lseek(fd_, delta_from_end, SEEK_END);
  1035. if (byte_offset == -1) {
  1036. return FdError(errno, "File::SeekFromEnd on '{0}'", fd_);
  1037. }
  1038. return byte_offset;
  1039. }
  1040. inline auto Internal::FileRefBase::ReadToBuffer(
  1041. llvm::MutableArrayRef<std::byte> buffer)
  1042. -> ErrorOr<llvm::MutableArrayRef<std::byte>, FdError> {
  1043. for (;;) {
  1044. ssize_t read_bytes = read(fd_, buffer.data(), buffer.size());
  1045. if (read_bytes == -1) {
  1046. if (errno == EINTR) {
  1047. continue;
  1048. }
  1049. return FdError(errno, "File::Read on '{0}'", fd_);
  1050. }
  1051. return buffer.slice(0, read_bytes);
  1052. }
  1053. }
  1054. inline auto Internal::FileRefBase::WriteFromBuffer(
  1055. llvm::ArrayRef<std::byte> buffer)
  1056. -> ErrorOr<llvm::ArrayRef<std::byte>, FdError> {
  1057. for (;;) {
  1058. ssize_t written_bytes = write(fd_, buffer.data(), buffer.size());
  1059. if (written_bytes == -1) {
  1060. if (errno == EINTR) {
  1061. continue;
  1062. }
  1063. return FdError(errno, "File::Write on '{0}'", fd_);
  1064. }
  1065. return buffer.drop_front(written_bytes);
  1066. }
  1067. }
  1068. inline auto Internal::FileRefBase::WriteStream() -> llvm::raw_fd_ostream {
  1069. return llvm::raw_fd_ostream(fd_, /*shouldClose=*/false);
  1070. }
  1071. inline auto Internal::FileRefBase::Close() && -> ErrorOr<Success, FdError> {
  1072. // Put the file in a moved-from state immediately as it is invalid to
  1073. // retry closing or use the file in any way even if the close fails.
  1074. int fd = std::exchange(fd_, -1);
  1075. int result = close(fd);
  1076. if (result == 0) {
  1077. return Success();
  1078. }
  1079. return FdError(errno, "File::Close on '{0}'", fd);
  1080. }
  1081. inline auto Internal::FileRefBase::ReadOnlyDestroy() -> void {
  1082. if (fd_ >= 0) {
  1083. auto result = std::move(*this).Close();
  1084. // Intentionally drop errors, as there is no interesting error here. There
  1085. // is no risk of data loss, and the least bad thing we can do is to just
  1086. // leak the file descriptor.
  1087. static_cast<void>(result);
  1088. }
  1089. }
  1090. inline auto Internal::FileRefBase::WriteableDestroy() -> void {
  1091. CARBON_CHECK(
  1092. fd_ == -1,
  1093. "Cannot destroy an open writable file, they _must_ be destroyed by "
  1094. "calling `Close` and handling any errors to avoid data loss.");
  1095. }
  1096. template <OpenAccess A>
  1097. auto FileRef<A>::ReadToBuffer(llvm::MutableArrayRef<std::byte> buffer)
  1098. -> ErrorOr<llvm::MutableArrayRef<std::byte>, FdError>
  1099. requires Readable
  1100. {
  1101. return FileRefBase::ReadToBuffer(buffer);
  1102. }
  1103. template <OpenAccess A>
  1104. auto FileRef<A>::ReadToString() -> ErrorOr<std::string, FdError>
  1105. requires Readable
  1106. {
  1107. return FileRefBase::ReadToString();
  1108. }
  1109. template <OpenAccess A>
  1110. auto FileRef<A>::WriteFromBuffer(llvm::ArrayRef<std::byte> buffer)
  1111. -> ErrorOr<llvm::ArrayRef<std::byte>, FdError>
  1112. requires Writeable
  1113. {
  1114. return FileRefBase::WriteFromBuffer(buffer);
  1115. }
  1116. template <OpenAccess A>
  1117. auto FileRef<A>::WriteStream() -> llvm::raw_fd_ostream
  1118. requires Writeable
  1119. {
  1120. return FileRefBase::WriteStream();
  1121. }
  1122. template <OpenAccess A>
  1123. auto FileRef<A>::WriteFromString(llvm::StringRef str)
  1124. -> ErrorOr<Success, FdError>
  1125. requires Writeable
  1126. {
  1127. return FileRefBase::WriteFromString(str);
  1128. }
  1129. template <OpenAccess A>
  1130. auto File<A>::Destroy() -> void {
  1131. if constexpr (Writeable) {
  1132. this->WriteableDestroy();
  1133. } else {
  1134. this->ReadOnlyDestroy();
  1135. }
  1136. }
  1137. inline auto DirRef::Read() & -> ErrorOr<Reader, FdError> {
  1138. int dup_dfd = dup(dfd_);
  1139. if (dup_dfd == -1) {
  1140. // There are very few plausible errors here, but we can return one so it
  1141. // doesn't hurt to do so. While `EINTR` and `EBUSY` are mentioned in some
  1142. // documentation, there is no indication that for just `dup` it is useful to
  1143. // loop and retry.
  1144. return FdError(errno, "Dir::Read on '{0}'", dfd_);
  1145. }
  1146. return Dir(dup_dfd).TakeAndRead();
  1147. }
  1148. inline auto DirRef::Access(const std::filesystem::path& path,
  1149. AccessCheckFlags check) -> ErrorOr<bool, PathError> {
  1150. if (faccessat(dfd_, path.c_str(), static_cast<int>(check), /*flags=*/0) ==
  1151. 0) {
  1152. return true;
  1153. }
  1154. return PathError(errno, "Dir::Access on '{0}' relative to '{1}'", path, dfd_);
  1155. }
  1156. inline auto DirRef::Stat() -> ErrorOr<FileStatus, FdError> {
  1157. FileStatus status;
  1158. if (fstat(dfd_, &status.stat_buf_) == 0) {
  1159. return status;
  1160. }
  1161. return FdError(errno, "Dir::Stat on '{0}': ", dfd_);
  1162. }
  1163. inline auto DirRef::Stat(const std::filesystem::path& path)
  1164. -> ErrorOr<FileStatus, PathError> {
  1165. FileStatus status;
  1166. if (fstatat(dfd_, path.c_str(), &status.stat_buf_, /*flags=*/0) == 0) {
  1167. return status;
  1168. }
  1169. return PathError(errno, "Dir::Stat on '{0}' relative to '{1}'", path, dfd_);
  1170. }
  1171. inline auto DirRef::Lstat(const std::filesystem::path& path)
  1172. -> ErrorOr<FileStatus, PathError> {
  1173. FileStatus status;
  1174. if (fstatat(dfd_, path.c_str(), &status.stat_buf_,
  1175. /*flags=*/AT_SYMLINK_NOFOLLOW) == 0) {
  1176. return status;
  1177. }
  1178. return PathError(errno, "Dir::Lstat on '{0}' relative to '{1}'", path, dfd_);
  1179. }
  1180. inline auto DirRef::Readlink(const std::filesystem::path& path)
  1181. -> ErrorOr<std::string, PathError> {
  1182. // On the fast path, we read into a small stack buffer and get the whole
  1183. // contents.
  1184. constexpr ssize_t BufferSize = 256;
  1185. char buffer[BufferSize];
  1186. ssize_t read_bytes = readlinkat(dfd_, path.c_str(), buffer, BufferSize);
  1187. if (read_bytes == -1) {
  1188. return PathError(errno, "Dir::Readlink on '{0}' relative to '{1}'", path,
  1189. dfd_);
  1190. }
  1191. if (read_bytes < BufferSize) {
  1192. // We got the whole contents in one shot, return it.
  1193. return std::string(buffer, read_bytes);
  1194. }
  1195. // Otherwise, fallback to an out-of-line function to handle the slow path.
  1196. return ReadlinkSlow(path);
  1197. }
  1198. inline auto DirRef::OpenReadOnly(const std::filesystem::path& path,
  1199. CreationOptions creation_options,
  1200. ModeType creation_mode, OpenFlags flags)
  1201. -> ErrorOr<ReadFile, PathError> {
  1202. return OpenImpl<OpenAccess::ReadOnly>(path, creation_options, creation_mode,
  1203. flags);
  1204. }
  1205. inline auto DirRef::OpenWriteOnly(const std::filesystem::path& path,
  1206. CreationOptions creation_options,
  1207. ModeType creation_mode, OpenFlags flags)
  1208. -> ErrorOr<WriteFile, PathError> {
  1209. return OpenImpl<OpenAccess::WriteOnly>(path, creation_options, creation_mode,
  1210. flags);
  1211. }
  1212. inline auto DirRef::OpenReadWrite(const std::filesystem::path& path,
  1213. CreationOptions creation_options,
  1214. ModeType creation_mode, OpenFlags flags)
  1215. -> ErrorOr<ReadWriteFile, PathError> {
  1216. return OpenImpl<OpenAccess::ReadWrite>(path, creation_options, creation_mode,
  1217. flags);
  1218. }
  1219. inline auto DirRef::Rename(const std::filesystem::path& path, DirRef target_dir,
  1220. const std::filesystem::path& target_path)
  1221. -> ErrorOr<Success, PathError> {
  1222. if (renameat(dfd_, path.c_str(), target_dir.dfd_, target_path.c_str()) ==
  1223. -1) {
  1224. return PathError(errno, "Dir::Rename on '{0}' relative to '{1}'", path,
  1225. dfd_);
  1226. }
  1227. return Success();
  1228. }
  1229. inline auto DirRef::Chdir() -> ErrorOr<Success, FdError> {
  1230. if (fchdir(dfd_) == -1) {
  1231. return FdError(errno, "Dir::Chdir on '{0}'", dfd_);
  1232. }
  1233. return Success();
  1234. }
  1235. inline auto DirRef::Chdir(const std::filesystem::path& path)
  1236. -> ErrorOr<Success, PathError> {
  1237. if (path.is_absolute()) {
  1238. if (chdir(path.c_str()) == -1) {
  1239. return PathError(errno, "Dir::Chdir on '{0}' relative to '{1}'", path,
  1240. dfd_);
  1241. }
  1242. return Success();
  1243. }
  1244. CARBON_ASSIGN_OR_RETURN(Dir d, OpenDir(path));
  1245. auto result = d.Chdir();
  1246. if (result.ok()) {
  1247. return Success();
  1248. }
  1249. return PathError(result.error().unix_errnum(),
  1250. "Dir::Chdir on '{0}' relative to '{1}'", path, dfd_);
  1251. }
  1252. inline auto DirRef::Symlink(const std::filesystem::path& path,
  1253. const std::string& target)
  1254. -> ErrorOr<Success, PathError> {
  1255. if (symlinkat(target.c_str(), dfd_, path.c_str()) == -1) {
  1256. return PathError(errno, "Dir::Symlink on '{0}' relative to '{1}'", path,
  1257. dfd_);
  1258. }
  1259. return Success();
  1260. }
  1261. inline auto DirRef::Unlink(const std::filesystem::path& path)
  1262. -> ErrorOr<Success, PathError> {
  1263. if (unlinkat(dfd_, path.c_str(), /*flags=*/0) == -1) {
  1264. return PathError(errno, "Dir::Unlink on '{0}' relative to '{1}'", path,
  1265. dfd_);
  1266. }
  1267. return Success();
  1268. }
  1269. inline auto DirRef::Rmdir(const std::filesystem::path& path)
  1270. -> ErrorOr<Success, PathError> {
  1271. if (unlinkat(dfd_, path.c_str(), AT_REMOVEDIR) == -1) {
  1272. return PathError(errno, "Dir::Rmdir on '{0}' relative to '{1}'", path,
  1273. dfd_);
  1274. }
  1275. return Success();
  1276. }
  1277. template <OpenAccess A>
  1278. inline auto DirRef::OpenImpl(const std::filesystem::path& path,
  1279. CreationOptions creation_options,
  1280. ModeType creation_mode, OpenFlags flags)
  1281. -> ErrorOr<File<A>, PathError> {
  1282. for (;;) {
  1283. int fd = openat(dfd_, path.c_str(),
  1284. static_cast<int>(A) | static_cast<int>(creation_options) |
  1285. static_cast<int>(flags),
  1286. creation_mode);
  1287. if (fd == -1) {
  1288. // May need to retry on `EINTR` when opening FIFOs on Linux.
  1289. if (errno == EINTR) {
  1290. continue;
  1291. }
  1292. return PathError(errno, "Dir::Open on '{0}' relative to '{1}'", path,
  1293. dfd_);
  1294. }
  1295. return File<A>(fd);
  1296. }
  1297. }
  1298. constexpr Dir::~Dir() { Destroy(); }
  1299. inline auto Dir::TakeAndRead() && -> ErrorOr<Reader, FdError> {
  1300. // Transition our file descriptor into a directory stream, clearing it in the
  1301. // process.
  1302. int dfd = std::exchange(dfd_, -1);
  1303. DIR* dirp = fdopendir(dfd);
  1304. if (dirp == nullptr) {
  1305. return FdError(errno, "Dir::Read on '{0}'", dfd);
  1306. }
  1307. return Dir::Reader(dirp);
  1308. }
  1309. inline Dir::Dir(RemovingDir&& arg) noexcept : Dir(static_cast<Dir&&>(arg)) {
  1310. arg.abs_path_.clear();
  1311. }
  1312. constexpr auto Dir::Destroy() -> void {
  1313. if (dfd_ != -1 && dfd_ != AT_FDCWD) {
  1314. auto result = close(dfd_);
  1315. // Closing a directory shouldn't produce errors, directly check fail on any.
  1316. //
  1317. // This is a very different case from `close` on a file producing an error.
  1318. // We don't actually write through the directory file descriptor, and for
  1319. // most platforms `closedir` (the closest thing in documentation and
  1320. // exclusively about directories), only provides a very few possible errors
  1321. // here:
  1322. //
  1323. // EBADF: This should be precluded by the types here, and so we consider
  1324. // it a programming error.
  1325. //
  1326. // EINTR: Technically, a system could fail here. We have good evidence
  1327. // that systems we practically support don't as there also is nothing
  1328. // useful to *do* in the face of this: retrying on almost all systems
  1329. // is not allowed as the file descriptor is immediately released. And
  1330. // here, there is no potentially dropped data to report.
  1331. //
  1332. // If we ever discover a platform that fails here, we should adjust this
  1333. // code to not fail in the face of that, likely by dropping the error. If we
  1334. // end up supporting a platform that actually requires well-specified
  1335. // retries, this code should handle that. Until then, we require these to
  1336. // succeed so we will learn about any issues during porting to new
  1337. // platforms.
  1338. CARBON_CHECK(result == 0, "{0}",
  1339. FdError(errno, "Dir::Destroy on '{0}'", dfd_));
  1340. }
  1341. dfd_ = -1;
  1342. }
  1343. inline RemovingDir::~RemovingDir() {
  1344. if (dfd_ != -1) {
  1345. auto result = std::move(*this).Remove();
  1346. CARBON_CHECK(result.ok(), "{0}", result.error());
  1347. }
  1348. }
  1349. inline auto RemovingDir::Remove() && -> ErrorOr<Success, PathError> {
  1350. CARBON_CHECK(dfd_ != -1,
  1351. "Unexpected explicit remove on a `RemovingDir` with no owned "
  1352. "directory!");
  1353. // Close the directory base object prior to removing it.
  1354. static_cast<Dir&>(*this) = Dir();
  1355. return Cwd().Rmtree(abs_path_);
  1356. }
  1357. inline auto Dir::Iterator::operator++() -> Iterator& {
  1358. CARBON_CHECK(dirp_, "Cannot increment an end-iterator");
  1359. errno = 0;
  1360. entry_.dent_ = readdir(dirp_);
  1361. // There are no documented errors beyond an erroneous `dirp_` which would be
  1362. // a programming error and not due to any recoverable failure of the
  1363. // filesystem.
  1364. CARBON_CHECK(entry_.dent_ != nullptr || errno == 0,
  1365. "Using a directory iterator with a non-directory, errno '{0}'",
  1366. errno);
  1367. if (entry_.dent_ == nullptr) {
  1368. // Clear the directory pointer to ease debugging increments past the end.
  1369. dirp_ = nullptr;
  1370. }
  1371. return *this;
  1372. }
  1373. inline auto Dir::Reader::begin() -> Iterator { return Iterator(dirp_); }
  1374. inline auto Dir::Reader::end() -> Iterator { return Iterator(); }
  1375. inline auto Dir::Reader::Destroy() -> void {
  1376. if (dirp_) {
  1377. int result = closedir(dirp_);
  1378. // Closing a directory shouldn't produce interesting errors, so check fail
  1379. // on them directly.
  1380. //
  1381. // See the detailed comment on `Dir::Destroy` for more context on closing of
  1382. // directories, why we check-fail, and what we should do if we discover
  1383. // platforms where an error needs to be handled here.
  1384. CARBON_CHECK(result == 0, "{0}",
  1385. FdError(errno, "Dir::Reader::Destroy on '{0}'", dfd_));
  1386. dirp_ = nullptr;
  1387. dfd_ = -1;
  1388. }
  1389. }
  1390. } // namespace Carbon::Filesystem
  1391. #endif // CARBON_COMMON_FILESYSTEM_H_