Просмотр исходного кода

Hand-port of re2.h into Carbon (#1161)

Richard Smith 4 лет назад
Родитель
Сommit
9d0f55a4b5
3 измененных файлов с 1036 добавлено и 1 удалено
  1. 1 1
      .pre-commit-config.yaml
  2. 27 0
      third_party/examples/re2/LICENSE
  3. 1008 0
      third_party/examples/re2/re2.carbon

+ 1 - 1
.pre-commit-config.yaml

@@ -110,7 +110,7 @@ repos:
           (?x)^(
               .bazelversion|
               compile_flags.txt|
-              third_party/examples/.*/compile_flags.carbon.txt|
+              third_party/.*|
               .*\.def|
               .*\.svg|
               .*/testdata/.*\.golden

+ 27 - 0
third_party/examples/re2/LICENSE

@@ -0,0 +1,27 @@
+// Copyright (c) 2009 The RE2 Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ 1008 - 0
third_party/examples/re2/re2.carbon

@@ -0,0 +1,1008 @@
+// Copyright 2003-2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO: Package name conflicts with member class RE2!
+package RE2 api;
+
+// C++ interface to the re2 regular-expression library.
+// RE2 supports Perl-style regular expressions (with extensions like
+// \d, \w, \s, ...).
+//
+// -----------------------------------------------------------------------
+// REGEXP SYNTAX:
+//
+// This module uses the re2 library and hence supports
+// its syntax for regular expressions, which is similar to Perl's with
+// some of the more complicated things thrown away.  In particular,
+// backreferences and generalized assertions are not available, nor is \Z.
+//
+// See https://github.com/google/re2/wiki/Syntax for the syntax
+// supported by RE2, and a comparison with PCRE and PERL regexps.
+//
+// For those not familiar with Perl's regular expressions,
+// here are some examples of the most commonly used extensions:
+//
+//   "hello (\\w+) world"  -- \w matches a "word" character
+//   "version (\\d+)"      -- \d matches a digit
+//   "hello\\s+world"      -- \s matches any whitespace character
+//   "\\b(\\w+)\\b"        -- \b matches non-empty string at word boundary
+//   "(?i)hello"           -- (?i) turns on case-insensitive matching
+//   "/\\*(.*?)\\*/"       -- .*? matches . minimum no. of times possible
+//
+// The double backslashes are needed when writing C++ string literals.
+// However, they should NOT be used when writing C++11 raw string literals:
+//
+//   R"(hello (\w+) world)"  -- \w matches a "word" character
+//   R"(version (\d+))"      -- \d matches a digit
+//   R"(hello\s+world)"      -- \s matches any whitespace character
+//   R"(\b(\w+)\b)"          -- \b matches non-empty string at word boundary
+//   R"((?i)hello)"          -- (?i) turns on case-insensitive matching
+//   R"(/\*(.*?)\*/)"        -- .*? matches . minimum no. of times possible
+//
+// When using UTF-8 encoding, case-insensitive matching will perform
+// simple case folding, not full case folding.
+//
+// -----------------------------------------------------------------------
+// MATCHING INTERFACE:
+//
+// The "FullMatch" operation checks that supplied text matches a
+// supplied pattern exactly.
+//
+// Example: successful match
+//    CHECK(RE2::FullMatch("hello", "h.*o"));
+//
+// Example: unsuccessful match (requires full match):
+//    CHECK(!RE2::FullMatch("hello", "e"));
+//
+// -----------------------------------------------------------------------
+// UTF-8 AND THE MATCHING INTERFACE:
+//
+// By default, the pattern and input text are interpreted as UTF-8.
+// The RE2::Latin1 option causes them to be interpreted as Latin-1.
+//
+// Example:
+//    CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern)));
+//    CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1)));
+//
+// -----------------------------------------------------------------------
+// MATCHING WITH SUBSTRING EXTRACTION:
+//
+// You can supply extra pointer arguments to extract matched substrings.
+// On match failure, none of the pointees will have been modified.
+// On match success, the substrings will be converted (as necessary) and
+// their values will be assigned to their pointees until all conversions
+// have succeeded or one conversion has failed.
+// On conversion failure, the pointees will be in an indeterminate state
+// because the caller has no way of knowing which conversion failed.
+// However, conversion cannot fail for types like string and StringPiece
+// that do not inspect the substring contents. Hence, in the common case
+// where all of the pointees are of such types, failure is always due to
+// match failure and thus none of the pointees will have been modified.
+//
+// Example: extracts "ruby" into "s" and 1234 into "i"
+//    int i;
+//    std::string s;
+//    CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
+//
+// Example: fails because string cannot be stored in integer
+//    CHECK(!RE2::FullMatch("ruby", "(.*)", &i));
+//
+// Example: fails because there aren't enough sub-patterns
+//    CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s));
+//
+// Example: does not try to extract any extra sub-patterns
+//    CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
+//
+// Example: does not try to extract into NULL
+//    CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
+//
+// Example: integer overflow causes failure
+//    CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
+//
+// NOTE(rsc): Asking for substrings slows successful matches quite a bit.
+// This may get a little faster in the future, but right now is slower
+// than PCRE.  On the other hand, failed matches run *very* fast (faster
+// than PCRE), as do matches without substring extraction.
+//
+// -----------------------------------------------------------------------
+// PARTIAL MATCHES
+//
+// You can use the "PartialMatch" operation when you want the pattern
+// to match any substring of the text.
+//
+// Example: simple search for a string:
+//      CHECK(RE2::PartialMatch("hello", "ell"));
+//
+// Example: find first number in a string
+//      int number;
+//      CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number));
+//      CHECK_EQ(number, 100);
+//
+// -----------------------------------------------------------------------
+// PRE-COMPILED REGULAR EXPRESSIONS
+//
+// RE2 makes it easy to use any string as a regular expression, without
+// requiring a separate compilation step.
+//
+// If speed is of the essence, you can create a pre-compiled "RE2"
+// object from the pattern and use it multiple times.  If you do so,
+// you can typically parse text faster than with sscanf.
+//
+// Example: precompile pattern for faster matching:
+//    RE2 pattern("h.*o");
+//    while (ReadLine(&str)) {
+//      if (RE2::FullMatch(str, pattern)) ...;
+//    }
+//
+// -----------------------------------------------------------------------
+// SCANNING TEXT INCREMENTALLY
+//
+// The "Consume" operation may be useful if you want to repeatedly
+// match regular expressions at the front of a string and skip over
+// them as they match.  This requires use of the "StringPiece" type,
+// which represents a sub-range of a real string.
+//
+// Example: read lines of the form "var = value" from a string.
+//      std::string contents = ...;     // Fill string somehow
+//      StringPiece input(contents);    // Wrap a StringPiece around it
+//
+//      std::string var;
+//      int value;
+//      while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
+//        ...;
+//      }
+//
+// Each successful call to "Consume" will set "var/value", and also
+// advance "input" so it points past the matched text.  Note that if the
+// regular expression matches an empty string, input will advance
+// by 0 bytes.  If the regular expression being used might match
+// an empty string, the loop body must check for this case and either
+// advance the string or break out of the loop.
+//
+// The "FindAndConsume" operation is similar to "Consume" but does not
+// anchor your match at the beginning of the string.  For example, you
+// could extract all words from a string by repeatedly calling
+//     RE2::FindAndConsume(&input, "(\\w+)", &word)
+//
+// -----------------------------------------------------------------------
+// USING VARIABLE NUMBER OF ARGUMENTS
+//
+// The above operations require you to know the number of arguments
+// when you write the code.  This is not always possible or easy (for
+// example, the regular expression may be calculated at run time).
+// You can use the "N" version of the operations when the number of
+// match arguments are determined at run time.
+//
+// Example:
+//   const RE2::Arg* args[10];
+//   int n;
+//   // ... populate args with pointers to RE2::Arg values ...
+//   // ... set n to the number of RE2::Arg objects ...
+//   bool match = RE2::FullMatchN(input, pattern, args, n);
+//
+// The last statement is equivalent to
+//
+//   bool match = RE2::FullMatch(input, pattern,
+//                               *args[0], *args[1], ..., *args[n - 1]);
+//
+// -----------------------------------------------------------------------
+// PARSING HEX/OCTAL/C-RADIX NUMBERS
+//
+// By default, if you pass a pointer to a numeric value, the
+// corresponding text is interpreted as a base-10 number.  You can
+// instead wrap the pointer with a call to one of the operators Hex(),
+// Octal(), or CRadix() to interpret the text in another base.  The
+// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
+// prefixes, but defaults to base-10.
+//
+// Example:
+//   int a, b, c, d;
+//   CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
+//         RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d));
+// will leave 64 in a, b, c, and d.
+
+import Cpp library "<algorithm>";
+import Cpp library "<map>";
+import Cpp library "<mutex>";
+import Cpp library "<vector>";
+
+// TODO: How to express target-specific conditional compilation?
+// TODO: #if defined(__APPLE__)
+// TODO: #include <TargetConditionals.h>
+// TODO: #endif
+
+// TODO: How to forward declare classes from another library?
+// Is a physical dependency on the library required?
+// TODO: namespace re2 {
+// TODO: class Prog;
+// TODO: class Regexp;
+// TODO: }  // namespace re2
+
+private interface Parse4ary;
+
+// Interface for regular expression matching.  Also corresponds to a
+// pre-compiled regular expression.  An "RE2" object is safe for
+// concurrent use by multiple threads.
+class RE2 {
+  // We convert user-passed pointers into special Arg objects
+  class Arg;
+  class Options;
+
+  // Defined in set.h.
+  class Set;
+
+  // TODO: Assuming a C++-like enum syntax for now.
+  enum ErrorCode {
+    NoError = 0,
+
+    // Unexpected error
+    ErrorInternal,
+
+    // Parse errors
+    // bad escape sequence
+    ErrorBadEscape,
+    // bad character class
+    ErrorBadCharClass,
+    // bad character class range
+    ErrorBadCharRange,
+    // missing closing ]
+    ErrorMissingBracket,
+    // missing closing )
+    ErrorMissingParen,
+    // unexpected closing )
+    ErrorUnexpectedParen,
+    // trailing \ at end of regexp
+    ErrorTrailingBackslash,
+    // repeat argument missing, e.g. "*"
+    ErrorRepeatArgument,
+    // bad repetition argument
+    ErrorRepeatSize,
+    // bad repetition operator
+    ErrorRepeatOp,
+    // bad perl operator
+    ErrorBadPerlOp,
+    // invalid UTF-8 in regexp
+    ErrorBadUTF8,
+    // bad named capture group
+    ErrorBadNamedCapture,
+    // pattern too large (compile failed)
+    ErrorPatternTooLarge
+  }
+
+  // Predefined common options.
+  // If you need more complicated things, instantiate
+  // an Option class, possibly passing one of these to
+  // the Option constructor, change the settings, and pass that
+  // Option class to the RE2 constructor.
+  enum CannedOptions {
+    DefaultOptions = 0,
+    // treat input as Latin-1 (default UTF-8)
+    Latin1,
+    // POSIX syntax, leftmost-longest match
+    POSIX,
+    // do not log about regexp parse errors
+    Quiet
+  }
+
+  fn Make(pattern: StringPiece) -> RE2;
+  fn Make(pattern: StringPiece, options: Options) -> RE2;
+
+  // TODO: Should a Carbonic RE2 support these?
+  impl StringView as ImplicitAs(RE2) {
+    fn Convert[me: Self]() -> RE2 { return Make(me); }
+  }
+  impl String as ImplicitAs(RE2) {
+    fn Convert[me: Self]() -> RE2 { return Make(me); }
+  }
+  impl StringPiece as ImplicitAs(RE2) {
+    fn Convert[me: Self]() -> RE2 { return Make(me); }
+  }
+
+  impl as Destroyable;
+
+  // Returns whether RE2 was created properly.
+  fn ok[me: Self]() -> Bool { return me.error_code() == ErrorCode.NoError; }
+
+  // The string specification for this RE2.  E.g.
+  //   RE2 re("ab*c?d+");
+  //   re.pattern();    // "ab*c?d+"
+  fn pattern[me: Self]() -> String { return me.pattern_; }
+
+  // If RE2 could not be created properly, returns an error string.
+  // Else returns the empty string.
+  fn error[me: Self]() -> String { return *me.error_; }
+
+  // If RE2 could not be created properly, returns an error code.
+  // Else returns RE2::NoError (== 0).
+  fn error_code[me: Self]() -> ErrorCode { return me.error_code_; }
+
+  // If RE2 could not be created properly, returns the offending
+  // portion of the regexp.
+  fn error_arg[me: Self]() -> String { return me.error_arg_; }
+
+  // Returns the program size, a very approximate measure of a regexp's "cost".
+  // Larger numbers are more expensive than smaller numbers.
+  fn ProgramSize[me: Self]() -> i32;
+  fn ReverseProgramSize[me: Self]() -> i32;
+
+  // If histogram is not null, outputs the program fanout
+  // as a histogram bucketed by powers of 2.
+  // Returns the number of the largest non-empty bucket.
+  fn ProgramFanout[me: Self](histogram: Cpp.std.vector(i32)*) -> i32;
+  fn ReverseProgramFanout[me: Self](histogram: Cpp.std.vector(i32)*) -> i32;
+
+  // Returns the underlying Regexp; not for general use.
+  // Returns entire_regexp_ so that callers don't need
+  // to know about prefix_ and prefix_foldcase_.
+  fn Regexp[me: Self]() -> package.Regexp* { return me.entire_regexp_; }
+
+  /***** The array-based matching interface ******/
+
+  // The functions here have names ending in 'N' and are used to implement
+  // the functions whose names are the prefix before the 'N'. It is sometimes
+  // useful to invoke them directly, but the syntax is awkward, so the 'N'-less
+  // versions should be preferred.
+  // TODO: pointer with const pointee
+  fn FullMatchN(text: StringPiece, re: Self,
+                args: Array(const Arg*), n: i32) -> bool;
+  fn PartialMatchN(text: StringPiece, re: Self,
+                   args: Array(const Arg*), n: i32) -> bool;
+  fn ConsumeN(input: StringPiece*, re: Self,
+              args: Array(const Arg*), n: i32) -> bool;
+  fn FindAndConsumeN(input: StringPiece*, re: RE2,
+                     args: Array(const Arg*), n: i32) -> bool;
+
+  private fn Apply[template F:! Type, SP:! Type](f: F, sp: SP, re: Self) {
+    return f(sp, re, nullptr, 0);
+  }
+
+  // TODO (variadics)
+  // TODO: template <typename F, typename SP, typename... A>
+  // TODO: static inline bool Apply(F f, SP sp, const RE2& re, const A&... a) {
+  // TODO:   const Arg* const args[] = {&a...};
+  // TODO:   const int n = sizeof...(a);
+  // TODO:   return f(sp, re, args, n);
+  // TODO: }
+
+  // In order to allow FullMatch() et al. to be called with a varying number
+  // of arguments of varying types, we use two layers of variadic templates.
+  // The first layer constructs the temporary Arg objects. The second layer
+  // (above) constructs the array of pointers to the temporary Arg objects.
+
+  /***** The useful part: the matching interface *****/
+
+  // Matches "text" against "re".  If pointer arguments are
+  // supplied, copies matched sub-patterns into them.
+  //
+  // You can pass in a "const char*" or a "std::string" for "text".
+  // You can pass in a "const char*" or a "std::string" or a "RE2" for "re".
+  //
+  // The provided pointer arguments can be pointers to any scalar numeric
+  // type, or one of:
+  //    std::string     (matched piece is copied to string)
+  //    StringPiece     (StringPiece is mutated to point to matched piece)
+  //    T               (where "bool T::ParseFrom(const char*, size_t)" exists)
+  //    (void*)NULL     (the corresponding matched sub-pattern is not copied)
+  //
+  // Returns true iff all of the following conditions are satisfied:
+  //   a. "text" matches "re" fully - from the beginning to the end of "text".
+  //   b. The number of matched sub-patterns is >= number of supplied pointers.
+  //   c. The "i"th argument has a suitable type for holding the
+  //      string captured as the "i"th sub-pattern.  If you pass in
+  //      NULL for the "i"th argument, or pass fewer arguments than
+  //      number of sub-patterns, the "i"th captured sub-pattern is
+  //      ignored.
+  //
+  // CAVEAT: An optional sub-pattern that does not exist in the
+  // matched string is assigned the empty string.  Therefore, the
+  // following will return false (because the empty string is not a
+  // valid number):
+  //    int number;
+  //    RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number);
+  fn FullMatch(text: StringPiece, re: Self) -> bool {
+    return Apply(FullMatchN, text, re);
+  }
+  // TODO: template <typename... A>
+  // TODO: static bool FullMatch(const StringPiece& text, const RE2& re, A&&... a) {
+  // TODO:   return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
+  // TODO: }
+
+  // Like FullMatch(), except that "re" is allowed to match a substring
+  // of "text".
+  //
+  // Returns true iff all of the following conditions are satisfied:
+  //   a. "text" matches "re" partially - for some substring of "text".
+  //   b. The number of matched sub-patterns is >= number of supplied pointers.
+  //   c. The "i"th argument has a suitable type for holding the
+  //      string captured as the "i"th sub-pattern.  If you pass in
+  //      NULL for the "i"th argument, or pass fewer arguments than
+  //      number of sub-patterns, the "i"th captured sub-pattern is
+  //      ignored.
+  fn PartialMatch(text: StringPiece, re: Self) -> bool {
+    return Apply(PartialMatchN, text, re);
+  }
+  // TODO: template <typename... A>
+  // TODO: static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) {
+  // TODO:   return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...);
+  // TODO: }
+
+  // Like FullMatch() and PartialMatch(), except that "re" has to match
+  // a prefix of the text, and "input" is advanced past the matched
+  // text.  Note: "input" is modified iff this routine returns true
+  // and "re" matched a non-empty substring of "input".
+  //
+  // Returns true iff all of the following conditions are satisfied:
+  //   a. "input" matches "re" partially - for some prefix of "input".
+  //   b. The number of matched sub-patterns is >= number of supplied pointers.
+  //   c. The "i"th argument has a suitable type for holding the
+  //      string captured as the "i"th sub-pattern.  If you pass in
+  //      NULL for the "i"th argument, or pass fewer arguments than
+  //      number of sub-patterns, the "i"th captured sub-pattern is
+  //      ignored.
+  fn Consume(input: StringPiece*, re: Self) {
+    return Apply(ConsumeN, input, re);
+  }
+  // TODO: template <typename... A>
+  // TODO: static bool Consume(StringPiece* input, const RE2& re, A&&... a) {
+  // TODO:   return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...);
+  // TODO: }
+
+  // Like Consume(), but does not anchor the match at the beginning of
+  // the text.  That is, "re" need not start its match at the beginning
+  // of "input".  For example, "FindAndConsume(s, "(\\w+)", &word)" finds
+  // the next word in "s" and stores it in "word".
+  //
+  // Returns true iff all of the following conditions are satisfied:
+  //   a. "input" matches "re" partially - for some substring of "input".
+  //   b. The number of matched sub-patterns is >= number of supplied pointers.
+  //   c. The "i"th argument has a suitable type for holding the
+  //      string captured as the "i"th sub-pattern.  If you pass in
+  //      NULL for the "i"th argument, or pass fewer arguments than
+  //      number of sub-patterns, the "i"th captured sub-pattern is
+  //      ignored.
+  fn FindAndConsume(input: StringPiece*, re: Self) {
+    return Apply(FindAndConsumeN, input, re);
+  }
+  // TODO: template <typename... A>
+  // TODO: static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) {
+  // TODO:   return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...);
+  // TODO: }
+
+  // Replace the first match of "re" in "str" with "rewrite".
+  // Within "rewrite", backslash-escaped digits (\1 to \9) can be
+  // used to insert text matching corresponding parenthesized group
+  // from the pattern.  \0 in "rewrite" refers to the entire matching
+  // text.  E.g.,
+  //
+  //   std::string s = "yabba dabba doo";
+  //   CHECK(RE2::Replace(&s, "b+", "d"));
+  //
+  // will leave "s" containing "yada dabba doo"
+  //
+  // Returns true if the pattern matches and a replacement occurs,
+  // false otherwise.
+  fn Replace(str: String*, re: Self, rewrite: StringPiece) -> bool;
+
+  // Like Replace(), except replaces successive non-overlapping occurrences
+  // of the pattern in the string with the rewrite. E.g.
+  //
+  //   std::string s = "yabba dabba doo";
+  //   CHECK(RE2::GlobalReplace(&s, "b+", "d"));
+  //
+  // will leave "s" containing "yada dada doo"
+  // Replacements are not subject to re-matching.
+  //
+  // Because GlobalReplace only replaces non-overlapping matches,
+  // replacing "ana" within "banana" makes only one replacement, not two.
+  //
+  // Returns the number of replacements made.
+  fn GlobalReplace(str: String*, re: Self, rewrite: StringPiece) -> i32;
+
+  // Like Replace, except that if the pattern matches, "rewrite"
+  // is copied into "out" with substitutions.  The non-matching
+  // portions of "text" are ignored.
+  //
+  // Returns true iff a match occurred and the extraction happened
+  // successfully;  if no match occurs, the string is left unaffected.
+  //
+  // REQUIRES: "text" must not alias any part of "*out".
+  fn Extract(text: StringPiece,
+             re: Self,
+             rewrite: StringPiece,
+             out: String*)
+    -> bool;
+
+  // Escapes all potentially meaningful regexp characters in
+  // 'unquoted'.  The returned string, used as a regular expression,
+  // will match exactly the original string.  For example,
+  //           1.5-2.0?
+  // may become:
+  //           1\.5\-2\.0\?
+  fn QuoteMeta(unquoted: StringPiece) -> String;
+
+  // Computes range for any strings matching regexp. The min and max can in
+  // some cases be arbitrarily precise, so the caller gets to specify the
+  // maximum desired length of string returned.
+  //
+  // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any
+  // string s that is an anchored match for this regexp satisfies
+  //   min <= s && s <= max.
+  //
+  // Note that PossibleMatchRange() will only consider the first copy of an
+  // infinitely repeated element (i.e., any regexp element followed by a '*' or
+  // '+' operator). Regexps with "{N}" constructions are not affected, as those
+  // do not compile down to infinite repetitions.
+  //
+  // Returns true on success, false on error.
+  fn PossibleMatchRange[me: Self](min: String*, max: String*, maxlen: i32);
+
+  // Generic matching interface
+
+  // Type of match.
+  enum Anchor {
+    // No anchoring
+    UNANCHORED,
+    // Anchor at start only
+    ANCHOR_START,
+    // Anchor at start and end
+    ANCHOR_BOTH
+  }
+
+  // Return the number of capturing subpatterns, or -1 if the
+  // regexp wasn't valid on construction.  The overall match ($0)
+  // does not count: if the regexp is "(a)(b)", returns 2.
+  fn NumberOfCapturingGroups[me: Self]() -> i32 { return me.num_captures_; }
+
+  // Return a map from names to capturing indices.
+  // The map records the index of the leftmost group
+  // with the given name.
+  // NOTE: Originally returned by reference with comment "valid until re is deleted".
+  fn NamedCapturingGroups[me: Self]() -> Map(String, i32);
+
+  // Return a map from capturing indices to names.
+  // The map has no entries for unnamed groups.
+  // NOTE: Originally returned by reference with comment "valid until re is deleted".
+  fn CapturingGroupNames[me: Self]() -> Map(i32, String);
+
+  // General matching routine.
+  // Match against text starting at offset startpos
+  // and stopping the search at offset endpos.
+  // Returns true if match found, false if not.
+  // On a successful match, fills in submatch[] (up to nsubmatch entries)
+  // with information about submatches.
+  // I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true, with
+  // submatch[0] = "barbaz", submatch[1].data() = NULL, submatch[2] = "bar",
+  // submatch[3].data() = NULL, ..., up to submatch[nsubmatch-1].data() = NULL.
+  // Caveat: submatch[] may be clobbered even on match failure.
+  //
+  // Don't ask for more match information than you will use:
+  // runs much faster with nsubmatch == 1 than nsubmatch > 1, and
+  // runs even faster if nsubmatch == 0.
+  // Doesn't make sense to use nsubmatch > 1 + NumberOfCapturingGroups(),
+  // but will be handled correctly.
+  //
+  // Passing text == StringPiece(NULL, 0) will be handled like any other
+  // empty string, but note that on return, it will not be possible to tell
+  // whether submatch i matched the empty string or did not match:
+  // either way, submatch[i].data() == NULL.
+  fn Match[me: Self](text: StringPiece,
+                     startpos: i64,
+                     endpos: i64,
+                     re_anchor: Anchor,
+                     submatch: ArrayIterator(StringPiece),
+                     nsubmatch: i32)
+    -> bool;
+
+  // Check that the given rewrite string is suitable for use with this
+  // regular expression.  It checks that:
+  //   * The regular expression has enough parenthesized subexpressions
+  //     to satisfy all of the \N tokens in rewrite
+  //   * The rewrite string doesn't have any syntax errors.  E.g.,
+  //     '\' followed by anything other than a digit or '\'.
+  // A true return value guarantees that Replace() and Extract() won't
+  // fail because of a bad rewrite string.
+  fn CheckRewriteString[me: Self](rewrite: StringPiece, error: String*) -> bool;
+
+  // Returns the maximum submatch needed for the rewrite to be done by
+  // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2.
+  fn MaxSubmatch(rewrite: StringPiece) -> i32;
+
+  // Append the "rewrite" string, with backslash subsitutions from "vec",
+  // to string "out".
+  // Returns true on success.  This method can fail because of a malformed
+  // rewrite string.  CheckRewriteString guarantees that the rewrite will
+  // be successful.
+  fn Rewrite[me: Self](out: String*, rewrite: StringPiece,
+                       vec: ArrayIterator(StringPiece), veclen: i32)
+    -> bool;
+
+  // Constructor options
+  class Options {
+    // The options are (defaults in parentheses):
+    //
+    //   utf8             (true)  text and pattern are UTF-8; otherwise Latin-1
+    //   posix_syntax     (false) restrict regexps to POSIX egrep syntax
+    //   longest_match    (false) search for longest match, not first match
+    //   log_errors       (true)  log syntax and execution errors to ERROR
+    //   max_mem          (see below)  approx. max memory footprint of RE2
+    //   literal          (false) interpret string as literal, not regexp
+    //   never_nl         (false) never match \n, even if it is in regexp
+    //   dot_nl           (false) dot matches everything including new line
+    //   never_capture    (false) parse all parens as non-capturing
+    //   case_sensitive   (true)  match is case-sensitive (regexp can override
+    //                              with (?i) unless in posix_syntax mode)
+    //
+    // The following options are only consulted when posix_syntax == true.
+    // When posix_syntax == false, these features are always enabled and
+    // cannot be turned off; to perform multi-line matching in that case,
+    // begin the regexp with (?m).
+    //   perl_classes     (false) allow Perl's \d \s \w \D \S \W
+    //   word_boundary    (false) allow Perl's \b \B (word boundary and not)
+    //   one_line         (false) ^ and $ only match beginning and end of text
+    //
+    // The max_mem option controls how much memory can be used
+    // to hold the compiled form of the regexp (the Prog) and
+    // its cached DFA graphs.  Code Search placed limits on the number
+    // of Prog instructions and DFA states: 10,000 for both.
+    // In RE2, those limits would translate to about 240 KB per Prog
+    // and perhaps 2.5 MB per DFA (DFA state sizes vary by regexp; RE2 does a
+    // better job of keeping them small than Code Search did).
+    // Each RE2 has two Progs (one forward, one reverse), and each Prog
+    // can have two DFAs (one first match, one longest match).
+    // That makes 4 DFAs:
+    //
+    //   forward, first-match    - used for UNANCHORED or ANCHOR_START searches
+    //                               if opt.longest_match() == false
+    //   forward, longest-match  - used for all ANCHOR_BOTH searches,
+    //                               and the other two kinds if
+    //                               opt.longest_match() == true
+    //   reverse, first-match    - never used
+    //   reverse, longest-match  - used as second phase for unanchored searches
+    //
+    // The RE2 memory budget is statically divided between the two
+    // Progs and then the DFAs: two thirds to the forward Prog
+    // and one third to the reverse Prog.  The forward Prog gives half
+    // of what it has left over to each of its DFAs.  The reverse Prog
+    // gives it all to its longest-match DFA.
+    //
+    // Once a DFA fills its budget, it flushes its cache and starts over.
+    // If this happens too often, RE2 falls back on the NFA implementation.
+
+    // For now, make the default budget something close to Code Search.
+    // TODO: How to define a class-scope constant?
+    let kDefaultMaxMem:! i32 = 8 << 20;
+
+    enum Encoding {
+      EncodingUTF8 = 1,
+      EncodingLatin1
+    }
+
+    // TODO: A `;` after this would be nicer than a `{}`.
+    impl as DefaultValue where .Value = {
+      .encoding_ = EncodingUTF8,
+      .posix_syntax_ = false,
+      .longest_match_ = false,
+      .log_errors_ = true,
+      .max_mem_ = kDefaultMaxMem,
+      .literal_ = false,
+      .never_nl_ = false,
+      .dot_nl_ = false,
+      .never_capture_ = false,
+      .case_sensitive_ = true,
+      .perl_classes_ = false,
+      .word_boundary_ = false,
+      .one_line_ = false} {}
+
+    impl CannedOptions as ImplicitAs(Self);
+
+    fn encoding[me: Self]() -> Encoding { return me.encoding_; }
+    fn set_encoding[addr me: Self*](encoding: Encoding) { me->encoding_ = encoding; }
+
+    fn posix_syntax[me: Self]() -> bool { return me.posix_syntax_; }
+    fn set_posix_syntax[addr me: Self*](b: bool) { me->posix_syntax_ = b; }
+
+    fn longest_match[me: Self]() -> bool { return me.longest_match_; }
+    fn set_longest_match[addr me: Self*](b: bool) { me->longest_match_ = b; }
+
+    fn log_errors[me: Self]() -> bool { return me.log_errors_; }
+    fn set_log_errors[addr me: Self*](b: bool) { me->log_errors_ = b; }
+
+    fn max_mem[me: Self]() -> i64 { return me.max_mem_; }
+    fn set_max_mem[addr me: Self*](m: i64) { me->max_mem_ = m; }
+
+    fn literal[me: Self]() -> bool { return me.literal_; }
+    fn set_literal[addr me: Self*](b: bool) { me->literal_ = b; }
+
+    fn never_nl[me: Self]() -> bool { return me.never_nl_; }
+    fn set_never_nl[addr me: Self*](b: bool) { me->never_nl_ = b; }
+
+    fn dot_nl[me: Self]() -> bool { return me.dot_nl_; }
+    fn set_dot_nl[addr me: Self*](b: bool) { me->dot_nl_ = b; }
+
+    fn never_capture[me: Self]() -> bool { return me.never_capture_; }
+    fn set_never_capture[addr me: Self*](b: bool) { me->never_capture_ = b; }
+
+    fn case_sensitive[me: Self]() -> bool { return me.case_sensitive_; }
+    fn set_case_sensitive[addr me: Self*](b: bool) { me->case_sensitive_ = b; }
+
+    fn perl_classes[me: Self]() -> bool { return me.perl_classes_; }
+    fn set_perl_classes[addr me: Self*](b: bool) { me->perl_classes_ = b; }
+
+    fn word_boundary[me: Self]() -> bool { return me.word_boundary_; }
+    fn set_word_boundary[addr me: Self*](b: bool) { me->word_boundary_ = b; }
+
+    fn one_line[me: Self]() -> bool { return me.one_line_; }
+    fn set_one_line[addr me: Self*](b: bool) { me->one_line_ = b; }
+
+    fn Copy[addr me: Self*](src: Options) {
+      *me = src;
+    }
+
+    fn ParseFlags[me: Self]() -> i32;
+
+    private var encoding_: Encoding;
+    private var posix_syntax_: bool;
+    private var longest_match_: bool;
+    private var log_errors_: bool;
+    private var max_mem_: i64;
+    private var literal_: bool;
+    private var never_nl_: bool;
+    private var dot_nl_: bool;
+    private var never_capture_: bool;
+    private var case_sensitive_: bool;
+    private var perl_classes_: bool;
+    private var word_boundary_: bool;
+    private var one_line_: bool;
+  };
+
+  // Returns the options set in the constructor.
+  fn options[me: Self]() -> Options { return me.options_; }
+
+  // Argument converters; see below.
+  // TODO: Should these be package members not class members in Carbon
+  // so you use `RE2.Hex` not `RE2.RE2.Hex`?
+  fn CRadix[T:! Parse4ary](ptr: T*) -> Self.Arg;
+  fn Hex[T:! Parse4ary](ptr: T*) -> Self.Arg;
+  fn Octal[T:! Parse4ary](ptr: T*) -> Self.Arg;
+
+  private fn Init[addr me: Self](pattern: StringPiece, options: Options);
+
+  private fn DoMatch[me: Self](text: StringPiece,
+                               re_anchor: Anchor,
+                               consumed: i64*,
+                               // TODO: Pointer to `const Arg`.
+                               args: Array(Arg*),
+                               n: i32)
+    -> bool;
+
+  fn ReverseProg[me: Self]() -> package.Prog*;
+
+  // string regular expression
+  private var pattern_: String;
+  // option flags
+  private var options_: Options;
+  // parsed regular expression
+  private var entire_regexp_: package.Regexp*;
+  // error indicator (or points to empty string)
+  // TODO: pointer to `const String`
+  private var error_: String*;
+  // error code
+  private var error_code_: ErrorCode;
+  // fragment of regexp showing error
+  private var error_arg_: String;
+  // required prefix (before suffix_regexp_)
+  private var prefix_: String;
+  // prefix_ is ASCII case-insensitive
+  private var prefix_foldcase_: bool;
+  // parsed regular expression, prefix_ removed
+  private var suffix_regexp_: package.Regexp*;
+  // compiled program for regexp
+  private var prog_: package.Prog*;
+  // number of capturing groups
+  private var num_captures_: i32;
+  // can use prog_->SearchOnePass?
+  private var is_one_pass_: bool;
+
+  // TODO: Rest of the member variables are mutable.
+
+  // Reverse Prog for DFA execution only
+  private var rprog_: package.Prog*;
+  // Map from capture names to indices
+  // TODO: pointer to const map
+  private var named_groups_: Map(String, i32)*;
+  // Map from capture indices to names
+  // TODO: pointer to const map
+  private var group_names_: Map(i32, String)*;
+
+  private var rprog_once_: Cpp.std.once_flag;
+  private var named_groups_once_: Cpp.std.once_flag;
+  private var group_names_once_: Cpp.std.once_flag;
+};
+
+/***** Implementation details *****/
+
+private interface Parse3ary {
+  fn Parse(str: StringView, n: i64, dest: Self*) -> bool;
+}
+impl void as Parse3ary;
+impl String as Parse3ary;
+impl StringPiece as Parse3ary;
+impl Char as Parse3ary;
+impl f32 as Parse3ary;
+impl f64 as Parse3ary;
+
+private interface Parse4ary {
+  fn Parse(str: StringView, n: i64, dest: Self*, radix: i32) -> bool;
+}
+impl i16 as Parse4ary;
+impl u16 as Parse4ary;
+impl i32 as Parse4ary;
+impl u32 as Parse4ary;
+impl i64 as Parse4ary;
+impl u64 as Parse4ary;
+
+interface ParseFrom {
+  fn Parse(str: StringView, n: i64) -> bool;
+}
+
+class RE2.Arg {
+  fn Make() -> Self { return Make(nullptr); }
+  // TODO: Can we put an irrefutable pattern here?
+  // TODO: Is 'nullptr' an irrefutable pattern of type nullptr_t (whatever we call that)?
+  fn Make(nullptr) -> Self { return Make(nullptr as NullArg*); }
+
+  interface Parseable {
+    fn Parse[addr me: Self*](str: StringView, n: i64) -> bool;
+  }
+  match_first {
+    impl [T:! Parse3ary] T as Parseable {
+      fn Parse[addr me: Self*](str: StringView, n: i64) -> bool {
+        return T.Parse(str, n, me);
+      }
+    }
+    impl [T:! Parse4ary] T as Parseable {
+      fn Parse[addr me: Self*](str: StringView, n: i64) -> bool {
+        return T.Parse(str, n, me, 10);
+      }
+    }
+    impl [T:! ParseFrom] T as Parseable {
+      fn Parse[addr me: Self*](str: StringView, n: i64) -> bool {
+        if (me == nullptr) { return true; }
+        return T.Parse(str, n, me);
+      }
+    }
+  }
+
+  private class NullArg {}
+  impl NullArg as Parseable {
+    fn Parse[addr me: Self*](str: StringView, n: i64) -> bool {
+      return true;
+    }
+  }
+
+  fn Make[T:! Parseable](ptr: T*) {
+    return {.type_ = T, .arg_ = ptr};
+  }
+
+  fn Parse[me: Self](str: StringView, n: i64) -> bool {
+    return me.arg_->Parse(str, n);
+  }
+
+  // TODO: Existential types or `DynPtr(Parseable)`.
+  private let type_: Parseable;
+  private var arg_: Nullable(type_*);
+}
+
+private adapter ParseAsBase(T:! Parse4ary, base: i32) for T {
+  impl as Self.Arg.Parseable {
+    fn Parse[addr me: Self*](str: StringView, n: i64) -> bool {
+      return T.Parse(str, n, me, base);
+    }
+  }
+}
+
+fn RE2.CRadix[T:! Parse4ary](ptr: T*) -> Self.Arg {
+  return Self.Arg.Make(ptr as ParseAsBase(T, 0)*);
+}
+
+fn RE2.Hex[T:! Parse4ary](ptr: T*) -> Self.Arg {
+  return Self.Arg.Make(ptr as ParseAsBase(T, 16)*);
+}
+
+fn RE2.Octal[T:! Parse4ary](ptr: T*) -> Self.Arg {
+  return Self.Arg.Make(ptr as ParseAsBase(T, 8)*);
+}
+
+// Helper for writing global or static RE2s safely.
+// Write
+//     static LazyRE2 re = {".*"};
+// and then use *re instead of writing
+//     static RE2 re(".*");
+// The former is more careful about multithreaded
+// situations than the latter.
+//
+// N.B. This class never deletes the RE2 object that
+// it constructs: that's a feature, so that it can be used
+// for global and function static variables.
+class LazyRE2 {
+  class NoArg {}
+
+  alias element_type = RE2;  // support std::pointer_traits
+
+  // Permit implicit conversion from a struct.
+  // TODO: Think about how this interacts with the access check for the `As`
+  // and `ImplicitAs` conversions from structs to classes.
+  impl {.pattern_: StringPiece} as ImplicitAs(Self) {}
+  impl {.pattern_: StringPiece, .options_: RE2.CannedOptions} as ImplicitAs(Self) {}
+
+  // Pretend to be a pointer to Type (never NULL due to on-demand creation):
+  impl as Pointer where .Pointee = RE2 {
+    fn Resolve[me: Self]() -> Pointee* { return me.get(); }
+  }
+
+  // Named accessor/initializer:
+  fn get[addr me: Self*]() -> RE* {
+    Cpp.std.call_once(once_, Self.Init, me);
+    return ptr_;
+  }
+
+  var pattern_: StringPiece;
+  var options_: RE2.CannedOptions;
+
+  // TODO: mutable?
+  private var ptr_: RE2*;
+  private var once_: Cpp.std.once_flag;
+
+  private fn Init(lazy_re2: LazyRE2*) {
+    lazy_re2->ptr_ = heap.New!(RE2.Make(lazy_re2->pattern_, lazy_re2->options_));
+  }
+}
+
+// TODO: namespace hooks {
+// TODO:
+// TODO: // Most platforms support thread_local. Older versions of iOS don't support
+// TODO: // thread_local, but for the sake of brevity, we lump together all versions
+// TODO: // of Apple platforms that aren't macOS. If an iOS application really needs
+// TODO: // the context pointee someday, we can get more specific then...
+// TODO: //
+// TODO: // As per https://github.com/google/re2/issues/325, thread_local support in
+// TODO: // MinGW seems to be buggy. (FWIW, Abseil folks also avoid it.)
+// TODO: #define RE2_HAVE_THREAD_LOCAL
+// TODO: #if (defined(__APPLE__) && !(defined(TARGET_OS_OSX) && TARGET_OS_OSX)) || defined(__MINGW32__)
+// TODO: #undef RE2_HAVE_THREAD_LOCAL
+// TODO: #endif
+// TODO:
+// TODO: // A hook must not make any assumptions regarding the lifetime of the context
+// TODO: // pointee beyond the current invocation of the hook. Pointers and references
+// TODO: // obtained via the context pointee should be considered invalidated when the
+// TODO: // hook returns. Hence, any data about the context pointee (e.g. its pattern)
+// TODO: // would have to be copied in order for it to be kept for an indefinite time.
+// TODO: //
+// TODO: // A hook must not use RE2 for matching. Control flow reentering RE2::Match()
+// TODO: // could result in infinite mutual recursion. To discourage that possibility,
+// TODO: // RE2 will not maintain the context pointer correctly when used in that way.
+// TODO: #ifdef RE2_HAVE_THREAD_LOCAL
+// TODO: extern thread_local const RE2* context;
+// TODO: #endif
+// TODO:
+// TODO: struct DFAStateCacheReset {
+// TODO:   int64_t state_budget;
+// TODO:   size_t state_cache_size;
+// TODO: };
+// TODO:
+// TODO: struct DFASearchFailure {
+// TODO:   // Nothing yet...
+// TODO: };
+// TODO:
+// TODO: #define DECLARE_HOOK(type)                  \
+// TODO:   using type##Callback = void(const type&); \
+// TODO:   void Set##type##Hook(type##Callback* cb); \
+// TODO:   type##Callback* Get##type##Hook();
+// TODO:
+// TODO: DECLARE_HOOK(DFAStateCacheReset)
+// TODO: DECLARE_HOOK(DFASearchFailure)
+// TODO:
+// TODO: #undef DECLARE_HOOK
+// TODO:
+// TODO: }  // namespace hooks