Просмотр исходного кода

Run clang-format on parser.ypp and lexer.lpp (#738)

Co-authored-by: Geoff Romer <gromer@google.com>
Jon Meow 4 лет назад
Родитель
Сommit
9f67c4b9a8

+ 6 - 0
.pre-commit-config.yaml

@@ -80,6 +80,12 @@ repos:
         args: ['-I', '.codespell_ignore', '--uri-ignore-words-list', '*']
   - repo: local
     hooks:
+      - id: executable-semantics-format-grammar
+        name: Format the executable_semantics grammar file
+        entry: executable_semantics/syntax/format_grammar.py
+        language: python
+        files: ^executable_semantics/syntax/parser.ypp$
+        pass_filenames: false
       - id: executable-semantics-tests
         name: Update list of executable_semantics tests
         description: Updates executable_semantics/test_list.bzl

+ 18 - 0
executable_semantics/syntax/BUILD

@@ -4,6 +4,8 @@
 
 package(default_visibility = ["//executable_semantics:__pkg__"])
 
+load("@mypy_integration//:mypy.bzl", "mypy_test")
+
 cc_library(
     name = "bison_wrap",
     hdrs = ["bison_wrap.h"],
@@ -83,3 +85,19 @@ genrule(
         "@rules_m4//m4:current_m4_toolchain",
     ],
 )
+
+py_library(
+    name = "format_grammar_lib",
+    srcs = ["format_grammar.py"],
+)
+
+py_test(
+    name = "format_grammar_test",
+    srcs = ["format_grammar_test.py"],
+    deps = ["format_grammar_lib"],
+)
+
+mypy_test(
+    name = "format_grammar_mypy_test",
+    deps = [":format_grammar_lib"],
+)

+ 427 - 0
executable_semantics/syntax/format_grammar.py

@@ -0,0 +1,427 @@
+#!/usr/bin/env python3
+
+"""Formats parser.ypp and lexer.lpp with clang-format."""
+
+__copyright__ = """
+Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+Exceptions. See /LICENSE for license information.
+SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+import textwrap
+from dataclasses import dataclass
+from typing import cast
+from typing import Dict
+from typing import List
+from typing import Optional
+from typing import Tuple
+
+# Files to format.
+_FILES = (
+    "executable_semantics/syntax/parser.ypp",
+    "executable_semantics/syntax/lexer.lpp",
+)
+
+# Columns to format to.
+_COLS = 80
+
+# An arbitrary separator to use when formatting multiple code segments.
+_FORMAT_SEPARATOR = "\n// CLANG FORMAT CODE SEGMENT SEPARATOR\n"
+
+# The table begin and end comments, including table-bounding newlines.
+_TABLE_BEGIN = "/* Table begin. */\n"
+_TABLE_END = "\n/* Table end. */"
+_TABLE_END_WITH_SPACE = "\n /* Table end. */"
+
+
+@dataclass
+class _CppCode:
+    """Information about a code segment for formatting."""
+
+    # The index of the code segment in the list of all segments.
+    segment_index: int
+    # The code content with braces stripped.
+    content: str
+    # The column of the open brace in the original line.
+    open_brace_column: int
+    # The generated indent of the close brace when the formatted output is
+    # multi-line.
+    close_brace_indent: int
+    # Whether to write `%}` or `}`.
+    has_percent: bool
+
+
+@dataclass
+class _Table:
+    """Information about a table segment for formatting."""
+
+    # The index of the table segment in the list of all segments.
+    segment_index: int
+    # The table content, with wrapping comments stripped.
+    content: str
+
+
+def _parse_args() -> argparse.Namespace:
+    """Parses command-line arguments and flags."""
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Whether to print debug details.",
+    )
+    return parser.parse_args()
+
+
+def _clang_format(code: str, base_style: str, cols: int) -> str:
+    """Calls clang-format to format the given code."""
+    style = "--style={%s, ColumnLimit: %d}" % (base_style, cols)
+    output = subprocess.check_output(
+        args=["clang-format", style],
+        input=code.encode("utf-8"),
+    )
+    return output.decode("utf-8")
+
+
+def _find_string_end(content: str, start: int) -> int:
+    """Returns the end of a string, skipping escapes."""
+    i = start
+    while i < len(content):
+        c = content[i]
+        if c == "\\":
+            i += 1
+        elif c == '"':
+            return i
+        i += 1
+    exit("failed to find end of string: %s" % content[start : start + 20])
+
+
+def _find_brace_end(content: str, has_percent: bool, start: int) -> int:
+    """Returns the end of a braced section, skipping escapes.
+
+    If has_percent, expect `%}` instead of `}`.
+    """
+    i = start
+    while i < len(content):
+        c = content[i]
+        if c == '"':
+            # Skip over strings.
+            i = _find_string_end(content, i + 1)
+        elif c == "/" and content[i + 1 : i + 2] == "/":
+            # Skip over line comments.
+            i = content.find("\n", i + 2)
+            if i == -1:
+                i = len(content)
+        elif c == "{":
+            i = _find_brace_end(content, False, i + 1)
+        elif c == "}" and (not has_percent or content[i - 1] == "%"):
+            return i
+        i += 1
+    exit("failed to find end of brace: %s" % content[start : start + 20])
+
+
+def _add_text_segment(
+    text_segments: List[Optional[str]],
+    segment: str,
+    debug: bool,
+) -> None:
+    """Adds a text segment to the list."""
+    text_segments.append(segment)
+    if debug:
+        print("=== Text segment ===")
+        print(segment)
+        print("====================")
+
+
+def _maybe_add_cpp_segment(
+    content: str,
+    text_segments: List[Optional[str]],
+    cpp_segments: Dict[int, List[_CppCode]],
+    text_segment_start: int,
+    cpp_segment_start: int,
+    debug: bool,
+) -> Tuple[int, bool]:
+    """Checks if cpp_segment_start is really at a C++ segment, and adds if so.
+
+    Returns a tuple of (end, added) where `end` indicates the new offset into
+    content to parse at, and `added` indicates whether a C++ segment was really
+    added.
+    """
+    # lexer.lpp uses %{ %} for code, so detect it here.
+    has_percent = content[cpp_segment_start - 1] == "%"
+    # Find the end of the braced section.
+    end = _find_brace_end(content, has_percent, cpp_segment_start + 1)
+
+    # Determine the braced content, stripping the % and whitespace.
+    braced_content = content[cpp_segment_start + 1 : end]
+    if has_percent:
+        braced_content = braced_content.rstrip("% \n")
+    braced_content = braced_content.strip()
+
+    if not has_percent and braced_content[-1] not in (";", "}", '"'):
+        # Code would end with one of the indicated characters. This is
+        # likely a non-formattable braced section, such as `{AND}`.
+        # Keep treating it as text.
+        return (end, False)
+    else:
+        # Code has been found. First, record the text segment; then,
+        # indicate the non-text segment.
+        _add_text_segment(
+            text_segments, content[text_segment_start:cpp_segment_start], debug
+        )
+        text_segments.append(None)
+
+        # If the opening brace is the first character on its line, use
+        # its indent when wrapping.
+        close_brace_indent = 0
+        line_offset = content.rfind("\n", 0, cpp_segment_start)
+        if content[line_offset + 1 : cpp_segment_start].isspace():
+            close_brace_indent = cpp_segment_start - line_offset - 1
+
+        # Construct the code segment.
+        cpp_segment = _CppCode(
+            len(text_segments) - 1,
+            braced_content,
+            cpp_segment_start - (line_offset + 1),
+            close_brace_indent,
+            has_percent,
+        )
+        if debug:
+            print("=== C++ segment ===")
+            print(cpp_segment.content)
+            print(
+                "Structure: { at %d; } at %d; %%: %s"
+                % (
+                    cpp_segment.open_brace_column,
+                    cpp_segment.close_brace_indent,
+                    cpp_segment.has_percent,
+                )
+            )
+            print("===================")
+
+        # Record the code segment.
+        if close_brace_indent not in cpp_segments:
+            cpp_segments[close_brace_indent] = []
+        cpp_segments[close_brace_indent].append(cpp_segment)
+
+        # Increment cursors.
+        return (end, True)
+
+
+def _parse_block_comment(
+    content: str,
+    text_segments: List[Optional[str]],
+    table_segments: List[_Table],
+    text_segment_start: int,
+    cursor: int,
+    debug: bool,
+) -> Tuple[int, int]:
+    """Parses a comment, possibly adding a table segment.
+
+    Returns a tuple of (new_segment_start, new_cursor). Note the
+    new_segment_start may or may not change.
+    """
+    # Skip over block comments.
+    comment_end = content.find("*/", cursor + 2)
+    if comment_end == -1:
+        exit(
+            "failed to find end of /* comment: %s"
+            % content[cursor : cursor + 20]
+        )
+    comment_end += 2
+    if content[cursor : comment_end + 1] == _TABLE_BEGIN:
+        for table_end_style in (_TABLE_END, _TABLE_END_WITH_SPACE):
+            table_end = content.find(table_end_style, comment_end)
+            if table_end != -1:
+                break
+        if table_end == -1:
+            exit(
+                "failed to find end of table: %s"
+                % content[comment_end + 1 : comment_end + 20]
+            )
+        _add_text_segment(
+            text_segments, content[text_segment_start : comment_end + 1], debug
+        )
+        text_segments.append(None)
+        table_segments.append(
+            _Table(len(text_segments) - 1, content[comment_end + 1 : table_end])
+        )
+        return table_end, table_end + len(_TABLE_END) - 1
+    else:
+        return text_segment_start, comment_end - 1
+
+
+def _parse_segments(
+    content: str,
+    debug: bool,
+) -> Tuple[List[Optional[str]], Dict[int, List[_CppCode]], List[_Table]]:
+    """Parses out text, code, and table segments.
+
+    Returns a tuple `(text_segments, code_segments, table_segments)`:
+    - text_segments is a list version of the input content, with None where
+      other segments go.
+    - cpp_segments groups _CppCode objects by their close_brace_indent.
+    - table_segments is a list of _Table objects.
+    """
+    i = 0
+    segment_start = 0
+    text_segments: List[Optional[str]] = []
+    cpp_segments: Dict[int, List[_CppCode]] = {}
+    table_segments: List[_Table] = []
+    while i < len(content):
+        c = content[i]
+        if c == '"':
+            # Skip over strings.
+            i = _find_string_end(content, i + 1)
+        elif c == "/" and content[i + 1 : i + 2] == "*":
+            segment_start, i = _parse_block_comment(
+                content, text_segments, table_segments, segment_start, i, debug
+            )
+        elif c == "\\":
+            # Skip over escapes.
+            i += 1
+        elif c == "{":
+            i, added = _maybe_add_cpp_segment(
+                content, text_segments, cpp_segments, segment_start, i, debug
+            )
+            if added:
+                segment_start = i + 1
+        i += 1
+    _add_text_segment(text_segments, content[segment_start:], debug)
+    return text_segments, cpp_segments, table_segments
+
+
+def _format_cpp_segments(
+    base_style: str,
+    text_segments: List[Optional[str]],
+    cpp_segments: Dict[int, List[_CppCode]],
+    debug: bool,
+) -> None:
+    """Does the actual C++ code formatting.
+
+    Formatting is done in groups, divided by indent because that affects code
+    formatting.
+    """
+    # Iterate through code segments, formatting them in groups.
+    for close_brace_indent, code_list in cpp_segments.items():
+        format_input = _FORMAT_SEPARATOR.join(
+            [code.content for code in code_list]
+        )
+        code_indent = close_brace_indent + 2
+        formatted_block = _clang_format(
+            format_input, base_style, _COLS - code_indent
+        )
+        formatted_segments = formatted_block.split(_FORMAT_SEPARATOR)
+
+        # If there's a mismatch in lengths, error with the formatted output to
+        # help determine what was wrong with input.
+        if len(code_list) != len(formatted_segments):
+            if debug:
+                sys.stderr.write(formatted_block)
+            exit(
+                (
+                    "Unexpected formatting error (likely bad input): wanted %d "
+                    "segments, got %d (see above code)"
+                )
+                % (len(code_list), len(formatted_segments))
+            )
+
+        for i in range(len(formatted_segments)):
+            code = code_list[i]
+            formatted = formatted_segments[i]
+            # The '4' here is from the `{  }` wrapper that is otherwise added.
+            if (
+                code.has_percent
+                or code.open_brace_column + len(formatted) + 4 > _COLS
+                or "\n" in formatted
+            ):
+                close_percent = ""
+                if code.has_percent:
+                    close_percent = "%"
+                text_segments[code.segment_index] = "{\n%s\n%s%s}" % (
+                    textwrap.indent(formatted, " " * code_indent),
+                    " " * code.close_brace_indent,
+                    close_percent,
+                )
+            else:
+                text_segments[code.segment_index] = "{ %s }" % formatted
+
+
+def _format_table_segments(
+    text_segments: List[Optional[str]],
+    table_segments: List[_Table],
+    debug: bool,
+) -> None:
+    """Formats table segments."""
+    for table in table_segments:
+        lines = table.content.strip().splitlines()
+        rows: List[List[str]] = []
+        col_widths: List[int] = []
+        for row_index in range(len(lines)):
+            cols = re.findall("[^ ]+", lines[row_index])
+            rows.append(cols)
+            if not col_widths:
+                if len(cols) == 0:
+                    exit("Black line in table")
+                col_widths = [0] * len(cols)
+            elif len(col_widths) != len(cols):
+                exit(
+                    "Wanted %d columns, found %d in `%s`"
+                    % (len(col_widths), len(cols), lines[row_index])
+                )
+            for col_index in range(len(cols)):
+                col_widths[col_index] = max(
+                    col_widths[col_index], len(cols[col_index])
+                )
+        # The last column should not add spaces.
+        row_format = " ".join(
+            ["%%-%ds" % width for width in col_widths[:-1]] + ["%s"]
+        )
+        text_segments[table.segment_index] = "\n".join(
+            [row_format % tuple(cols) for cols in rows]
+        )
+
+
+def _format_file(path: str, base_style: str, debug: bool) -> None:
+    """Formats a file, writing the result."""
+    content = open(path).read()
+    text_segments, cpp_segments, table_segments = _parse_segments(
+        content, debug
+    )
+    _format_cpp_segments(base_style, text_segments, cpp_segments, debug)
+    _format_table_segments(text_segments, table_segments, debug)
+    assert None not in text_segments
+    open(path, "w").write("".join(cast(List[str], text_segments)))
+
+
+def main() -> None:
+    """See the file comment."""
+    parsed_args = _parse_args()
+
+    # Go to the repository root so that paths will match bazel's view.
+    os.chdir(os.path.join(os.path.dirname(__file__), "../.."))
+
+    # TODO: Switch to `BasedOnStyle: InheritParentConfig`
+    # (https://reviews.llvm.org/D93844) once releases support it.
+    format_config = open(".clang-format").readlines()
+    base_style = ", ".join(
+        [
+            x.strip()
+            for x in format_config
+            if x[0].isalpha()
+            # Allow single-line blocks for short rules.
+            and not x.startswith("AllowShortBlocksOnASingleLine:")
+        ]
+    )
+
+    # Format the grammar files.
+    for path in _FILES:
+        _format_file(path, base_style, parsed_args.debug)
+
+
+if __name__ == "__main__":
+    main()

+ 192 - 0
executable_semantics/syntax/format_grammar_test.py

@@ -0,0 +1,192 @@
+"""Tests for format_grammar.py."""
+
+__copyright__ = """
+Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+Exceptions. See /LICENSE for license information.
+SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""
+
+import unittest
+
+from executable_semantics.syntax import format_grammar
+
+
+class TestFormatGrammar(unittest.TestCase):
+    def test_empty(self):
+        self.assertEqual(
+            format_grammar._parse_segments("", False), ([""], {}, [])
+        )
+
+    def test_text(self):
+        self.assertEqual(
+            format_grammar._parse_segments("text", False),
+            (["text"], {}, []),
+        )
+
+    def test_cpp(self):
+        self.assertEqual(
+            format_grammar._parse_segments("{ code; }", False),
+            (
+                ["", None, ""],
+                {0: [format_grammar._CppCode(1, "code;", 0, 0, False)]},
+                [],
+            ),
+        )
+
+    def test_word_in_braces(self):
+        self.assertEqual(
+            format_grammar._parse_segments("{AND}", False),
+            (["{AND}"], {}, []),
+        )
+
+    def test_cpp_str(self):
+        self.assertEqual(
+            format_grammar._parse_segments('{ "\\x {"; }', False),
+            (
+                ["", None, ""],
+                {0: [format_grammar._CppCode(1, '"\\x {";', 0, 0, False)]},
+                [],
+            ),
+        )
+
+    def test_brace_in_str(self):
+        self.assertEqual(
+            format_grammar._parse_segments('"{" not code }', False),
+            (['"{" not code }'], {}, []),
+        )
+
+    def test_quote_regex(self):
+        self.assertEqual(
+            format_grammar._parse_segments('\\"', False),
+            (['\\"'], {}, []),
+        )
+
+    def test_block_comment_quote(self):
+        self.assertEqual(
+            format_grammar._parse_segments('/* " */', False),
+            (['/* " */'], {}, []),
+        )
+
+    def test_cpp_after_block_comment(self):
+        self.assertEqual(
+            format_grammar._parse_segments("/* */{ code; }", False),
+            (
+                ["/* */", None, ""],
+                {0: [format_grammar._CppCode(1, "code;", 5, 0, False)]},
+                [],
+            ),
+        )
+
+    def test_line_comment_quote(self):
+        self.assertEqual(
+            format_grammar._parse_segments('{\n// "\n}', False),
+            (
+                ["", None, ""],
+                {0: [format_grammar._CppCode(1, '// "', 0, 0, False)]},
+                [],
+            ),
+        )
+
+    def test_table(self):
+        self.assertEqual(
+            format_grammar._parse_segments(
+                "content\n"
+                "/* Table begin. */\n"
+                "{VAR} { return SIMPLE_TOKEN(VAR); }\n"
+                "{WHILE} { return SIMPLE_TOKEN(WHILE); }\n"
+                "/* Table end. */\n"
+                "more content\n",
+                False,
+            ),
+            (
+                [
+                    "content\n" "/* Table begin. */\n",
+                    None,
+                    "\n" "/* Table end. */\n" "more content\n",
+                ],
+                {},
+                [
+                    format_grammar._Table(
+                        1,
+                        "{VAR} { return SIMPLE_TOKEN(VAR); }\n"
+                        "{WHILE} { return SIMPLE_TOKEN(WHILE); }",
+                    )
+                ],
+            ),
+        )
+
+    def test_table_with_space(self):
+        self.assertEqual(
+            format_grammar._parse_segments(
+                "content\n"
+                " /* Table begin. */\n"
+                "{VAR} { return SIMPLE_TOKEN(VAR); }\n"
+                "{WHILE} { return SIMPLE_TOKEN(WHILE); }\n"
+                " /* Table end. */\n"
+                "more content\n",
+                False,
+            ),
+            (
+                [
+                    "content\n /* Table begin. */\n",
+                    None,
+                    "\n /* Table end. */\nmore content\n",
+                ],
+                {},
+                [
+                    format_grammar._Table(
+                        1,
+                        "{VAR} { return SIMPLE_TOKEN(VAR); }\n"
+                        "{WHILE} { return SIMPLE_TOKEN(WHILE); }",
+                    )
+                ],
+            ),
+        )
+
+    def test_format_table_defines(self):
+        text_segments = [None]
+        format_grammar._format_table_segments(
+            text_segments,
+            [
+                format_grammar._Table(
+                    0,
+                    'CONTINUE "continue"\n'
+                    'DEFAULT "default"\n'
+                    'DOUBLE_ARROW "=>"',
+                )
+            ],
+            False,
+        )
+        self.assertEqual(
+            text_segments,
+            [
+                'CONTINUE     "continue"\n'
+                'DEFAULT      "default"\n'
+                'DOUBLE_ARROW "=>"'
+            ],
+        )
+
+    def test_format_table_returns(self):
+        text_segments = [None]
+        format_grammar._format_table_segments(
+            text_segments,
+            [
+                format_grammar._Table(
+                    0,
+                    "{VAR} { return SIMPLE_TOKEN(VAR); }\n"
+                    "{WHILE} { return SIMPLE_TOKEN(WHILE); }",
+                )
+            ],
+            False,
+        )
+        self.assertEqual(
+            text_segments,
+            [
+                "{VAR}   { return SIMPLE_TOKEN(VAR);   }\n"
+                "{WHILE} { return SIMPLE_TOKEN(WHILE); }"
+            ],
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()

+ 69 - 73
executable_semantics/syntax/lexer.lpp

@@ -5,15 +5,14 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 */
 
 %{
+  #include <cstdlib>
 
-#include <cstdlib>
-
-#include "common/check.h"
-#include "common/string_helpers.h"
-#include "executable_semantics/common/tracing_flag.h"
-#include "executable_semantics/syntax/parse_and_lex_context.h"
-#include "executable_semantics/syntax/parser.h"
-#include "llvm/ADT/StringExtras.h"
+  #include "common/check.h"
+  #include "common/string_helpers.h"
+  #include "executable_semantics/common/tracing_flag.h"
+  #include "executable_semantics/syntax/parse_and_lex_context.h"
+  #include "executable_semantics/syntax/parser.h"
+  #include "llvm/ADT/StringExtras.h"
 %}
 
 /* Turn off legacy bits we don't need. */
@@ -29,6 +28,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  */
 %s AFTER_OPERAND
 
+/* Table begin. */
 AND                  "and"
 ARROW                "->"
 AUTO                 "auto"
@@ -75,14 +75,16 @@ TYPE                 "Type"
 UNDERSCORE           "_"
 VAR                  "var"
 WHILE                "while"
+/* Table end. */
 
+/* This should be kept table-like, but isn't automatic due to spaces. */
 identifier            [A-Za-z_][A-Za-z0-9_]*
 sized_type_literal    [iuf][1-9][0-9]*
 integer_literal       [0-9]+
 horizontal_whitespace [ \t\r]
 whitespace            [ \t\r\n]
 one_line_comment      \/\/[^\n]*\n
-operand_start         [(A-Za-z0-9_"]
+operand_start         [(A-Za-z0-9_\"]
 
 /* Single-line string literals should reject vertical whitespace. */
 string_literal        \"([^\\\"\n\v\f\r]|\\.)*\"
@@ -93,18 +95,17 @@ string_literal        \"([^\\\"\n\v\f\r]|\\.)*\"
   // Advances the current token position by yyleng columns without changing
   // the line number, and takes us out of the after-whitespace / after-operand
   // state.
-  # define YY_USER_ACTION \
-      context.current_token_position.columns(yyleng); \
-      if (YY_START == AFTER_WHITESPACE || \
-          YY_START == AFTER_OPERAND) { \
-        BEGIN(INITIAL); \
-      }
+  #define YY_USER_ACTION                                             \
+    context.current_token_position.columns(yyleng);                  \
+    if (YY_START == AFTER_WHITESPACE || YY_START == AFTER_OPERAND) { \
+      BEGIN(INITIAL);                                                \
+    }
 
   #define SIMPLE_TOKEN(name) \
-      Carbon::Parser::make_##name(context.current_token_position);
+    Carbon::Parser::make_##name(context.current_token_position);
 
   #define ARG_TOKEN(name, arg) \
-      Carbon::Parser::make_##name(arg, context.current_token_position);
+    Carbon::Parser::make_##name(arg, context.current_token_position);
 %}
 
 %%
@@ -116,49 +117,51 @@ string_literal        \"([^\\\"\n\v\f\r]|\\.)*\"
   context.current_token_position.step();
 %}
 
-{AND}                 { return SIMPLE_TOKEN(AND); }
-{ARROW}               { return SIMPLE_TOKEN(ARROW); }
-{AUTO}                { return SIMPLE_TOKEN(AUTO); }
-{AWAIT}               { return SIMPLE_TOKEN(AWAIT); }
-{BOOL}                { return SIMPLE_TOKEN(BOOL); }
-{BREAK}               { return SIMPLE_TOKEN(BREAK); }
-{CASE}                { return SIMPLE_TOKEN(CASE); }
-{CHOICE}              { return SIMPLE_TOKEN(CHOICE); }
-{CLASS}               { return SIMPLE_TOKEN(CLASS); }
-{COLON_BANG}          { return SIMPLE_TOKEN(COLON_BANG); }
-{COLON}               { return SIMPLE_TOKEN(COLON); }
-{COMMA}               { return SIMPLE_TOKEN(COMMA); }
-{CONTINUATION_TYPE}   { return SIMPLE_TOKEN(CONTINUATION_TYPE); }
-{CONTINUATION}        { return SIMPLE_TOKEN(CONTINUATION); }
-{CONTINUE}            { return SIMPLE_TOKEN(CONTINUE); }
-{DEFAULT}             { return SIMPLE_TOKEN(DEFAULT); }
-{DOUBLE_ARROW}        { return SIMPLE_TOKEN(DOUBLE_ARROW); }
-{ELSE}                { return SIMPLE_TOKEN(ELSE); }
-{EQUAL_EQUAL}         { return SIMPLE_TOKEN(EQUAL_EQUAL); }
-{EQUAL}               { return SIMPLE_TOKEN(EQUAL); }
-{FALSE}               { return SIMPLE_TOKEN(FALSE); }
-{FNTY}                { return SIMPLE_TOKEN(FNTY); }
-{FN}                  { return SIMPLE_TOKEN(FN); }
-{IF}                  { return SIMPLE_TOKEN(IF); }
-{LEFT_PARENTHESIS}    { return SIMPLE_TOKEN(LEFT_PARENTHESIS); }
-{LEFT_CURLY_BRACE}    { return SIMPLE_TOKEN(LEFT_CURLY_BRACE); }
+ /* Table begin. */
+{AND}                 { return SIMPLE_TOKEN(AND);                 }
+{ARROW}               { return SIMPLE_TOKEN(ARROW);               }
+{AUTO}                { return SIMPLE_TOKEN(AUTO);                }
+{AWAIT}               { return SIMPLE_TOKEN(AWAIT);               }
+{BOOL}                { return SIMPLE_TOKEN(BOOL);                }
+{BREAK}               { return SIMPLE_TOKEN(BREAK);               }
+{CASE}                { return SIMPLE_TOKEN(CASE);                }
+{CHOICE}              { return SIMPLE_TOKEN(CHOICE);              }
+{CLASS}               { return SIMPLE_TOKEN(CLASS);               }
+{COLON_BANG}          { return SIMPLE_TOKEN(COLON_BANG);          }
+{COLON}               { return SIMPLE_TOKEN(COLON);               }
+{COMMA}               { return SIMPLE_TOKEN(COMMA);               }
+{CONTINUATION_TYPE}   { return SIMPLE_TOKEN(CONTINUATION_TYPE);   }
+{CONTINUATION}        { return SIMPLE_TOKEN(CONTINUATION);        }
+{CONTINUE}            { return SIMPLE_TOKEN(CONTINUE);            }
+{DEFAULT}             { return SIMPLE_TOKEN(DEFAULT);             }
+{DOUBLE_ARROW}        { return SIMPLE_TOKEN(DOUBLE_ARROW);        }
+{ELSE}                { return SIMPLE_TOKEN(ELSE);                }
+{EQUAL_EQUAL}         { return SIMPLE_TOKEN(EQUAL_EQUAL);         }
+{EQUAL}               { return SIMPLE_TOKEN(EQUAL);               }
+{FALSE}               { return SIMPLE_TOKEN(FALSE);               }
+{FNTY}                { return SIMPLE_TOKEN(FNTY);                }
+{FN}                  { return SIMPLE_TOKEN(FN);                  }
+{IF}                  { return SIMPLE_TOKEN(IF);                  }
+{LEFT_PARENTHESIS}    { return SIMPLE_TOKEN(LEFT_PARENTHESIS);    }
+{LEFT_CURLY_BRACE}    { return SIMPLE_TOKEN(LEFT_CURLY_BRACE);    }
 {LEFT_SQUARE_BRACKET} { return SIMPLE_TOKEN(LEFT_SQUARE_BRACKET); }
-{MATCH}               { return SIMPLE_TOKEN(MATCH); }
-{MINUS}               { return SIMPLE_TOKEN(MINUS); }
-{NOT}                 { return SIMPLE_TOKEN(NOT); }
-{OR}                  { return SIMPLE_TOKEN(OR); }
-{PERIOD}              { return SIMPLE_TOKEN(PERIOD); }
-{PLUS}                { return SIMPLE_TOKEN(PLUS); }
-{RETURN}              { return SIMPLE_TOKEN(RETURN); }
-{RUN}                 { return SIMPLE_TOKEN(RUN); }
-{SEMICOLON}           { return SIMPLE_TOKEN(SEMICOLON); }
-{SLASH}               { return SIMPLE_TOKEN(SLASH); }
-{STRING}              { return SIMPLE_TOKEN(STRING); }
-{TRUE}                { return SIMPLE_TOKEN(TRUE); }
-{TYPE}                { return SIMPLE_TOKEN(TYPE); }
-{UNDERSCORE}          { return SIMPLE_TOKEN(UNDERSCORE); }
-{VAR}                 { return SIMPLE_TOKEN(VAR); }
-{WHILE}               { return SIMPLE_TOKEN(WHILE); }
+{MATCH}               { return SIMPLE_TOKEN(MATCH);               }
+{MINUS}               { return SIMPLE_TOKEN(MINUS);               }
+{NOT}                 { return SIMPLE_TOKEN(NOT);                 }
+{OR}                  { return SIMPLE_TOKEN(OR);                  }
+{PERIOD}              { return SIMPLE_TOKEN(PERIOD);              }
+{PLUS}                { return SIMPLE_TOKEN(PLUS);                }
+{RETURN}              { return SIMPLE_TOKEN(RETURN);              }
+{RUN}                 { return SIMPLE_TOKEN(RUN);                 }
+{SEMICOLON}           { return SIMPLE_TOKEN(SEMICOLON);           }
+{SLASH}               { return SIMPLE_TOKEN(SLASH);               }
+{STRING}              { return SIMPLE_TOKEN(STRING);              }
+{TRUE}                { return SIMPLE_TOKEN(TRUE);                }
+{TYPE}                { return SIMPLE_TOKEN(TYPE);                }
+{UNDERSCORE}          { return SIMPLE_TOKEN(UNDERSCORE);          }
+{VAR}                 { return SIMPLE_TOKEN(VAR);                 }
+{WHILE}               { return SIMPLE_TOKEN(WHILE);               }
+ /* Table end. */
 
  /* More modern Bisons provide make_EOF. */
 <<EOF>>               { return SIMPLE_TOKEN(END_OF_FILE); }
@@ -203,26 +206,18 @@ string_literal        \"([^\\\"\n\v\f\r]|\\.)*\"
   return SIMPLE_TOKEN(BINARY_STAR);
 }
  /* `*` operator case 2: */
-<AFTER_OPERAND>"*"/{operand_start} {
-  return SIMPLE_TOKEN(BINARY_STAR);
-}
+<AFTER_OPERAND>"*"/{operand_start} { return SIMPLE_TOKEN(BINARY_STAR); }
  /* `*` operator case 3: */
-<AFTER_WHITESPACE>"*" {
-  return SIMPLE_TOKEN(PREFIX_STAR);
-}
+<AFTER_WHITESPACE>"*" { return SIMPLE_TOKEN(PREFIX_STAR); }
  /* `*` operator case 4: */
 <INITIAL,AFTER_OPERAND>"*"{whitespace}+ {
   BEGIN(AFTER_WHITESPACE);
   return SIMPLE_TOKEN(POSTFIX_STAR);
 }
  /* `*` operator case 5: */
-<INITIAL,AFTER_OPERAND>"*" {
-  return SIMPLE_TOKEN(UNARY_STAR);
-}
+<INITIAL,AFTER_OPERAND>"*" { return SIMPLE_TOKEN(UNARY_STAR); }
 
-{sized_type_literal} {
-  return ARG_TOKEN(sized_type_literal, yytext);
-}
+{sized_type_literal} { return ARG_TOKEN(sized_type_literal, yytext); }
 
 {identifier} {
   BEGIN(AFTER_OPERAND);
@@ -279,8 +274,9 @@ string_literal        \"([^\\\"\n\v\f\r]|\\.)*\"
     // "Reading a token: ".
     llvm::errs() << "\n";
   }
-  FATAL_COMPILATION_ERROR(context.SourceLoc()) << "invalid character '\\x"
-            << llvm::toHex(llvm::StringRef(yytext, 1)) << "' in source file.";
+  FATAL_COMPILATION_ERROR(context.SourceLoc())
+      << "invalid character '\\x" << llvm::toHex(llvm::StringRef(yytext, 1))
+      << "' in source file.";
 }
 
 %%

+ 137 - 84
executable_semantics/syntax/parser.ypp

@@ -50,46 +50,43 @@
 // -----------------------------------------------------------------------------
 
 %code top {
-#include <algorithm>
-#include <cstdarg>
-#include <cstdio>
-#include <cstdlib>
-#include <list>
-#include <vector>
+  #include <algorithm>
+  #include <cstdarg>
+  #include <cstdio>
+  #include <cstdlib>
+  #include <list>
+  #include <vector>
 
-#include "common/check.h"
-#include "executable_semantics/syntax/syntax_helpers.h"
-#include "executable_semantics/syntax/parse_and_lex_context.h"
-#include "llvm/ADT/StringExtras.h"
+  #include "common/check.h"
+  #include "executable_semantics/syntax/parse_and_lex_context.h"
+  #include "executable_semantics/syntax/syntax_helpers.h"
+  #include "llvm/ADT/StringExtras.h"
 }  // %code top
 
 %code requires {
-#include <optional>
+  #include <optional>
 
-#include "executable_semantics/ast/abstract_syntax_tree.h"
-#include "executable_semantics/ast/declaration.h"
-#include "executable_semantics/ast/expression.h"
-#include "executable_semantics/ast/function_definition.h"
-#include "executable_semantics/ast/pattern.h"
-#include "executable_semantics/common/arena.h"
-#include "executable_semantics/common/ptr.h"
-#include "executable_semantics/ast/paren_contents.h"
-#include "executable_semantics/syntax/bison_wrap.h"
+  #include "executable_semantics/ast/abstract_syntax_tree.h"
+  #include "executable_semantics/ast/declaration.h"
+  #include "executable_semantics/ast/expression.h"
+  #include "executable_semantics/ast/function_definition.h"
+  #include "executable_semantics/ast/paren_contents.h"
+  #include "executable_semantics/ast/pattern.h"
+  #include "executable_semantics/common/arena.h"
+  #include "executable_semantics/common/ptr.h"
+  #include "executable_semantics/syntax/bison_wrap.h"
 
-namespace Carbon {
-class ParseAndLexContext;
-}  // namespace Carbon
-
-typedef void* yyscan_t;
+  namespace Carbon {
+  class ParseAndLexContext;
+  }  // namespace Carbon
 
+  typedef void* yyscan_t;
 }  // %code requires
 
 %code {
-
-void Carbon::Parser::error(const location_type&, const std::string& message) {
-  context.PrintDiagnostic(message);
-}
-
+  void Carbon::Parser::error(const location_type&, const std::string& message) {
+    context.PrintDiagnostic(message);
+  }
 }  // %code
 
 %token <int> integer_literal
@@ -228,7 +225,10 @@ expression:
   identifier
     { $$ = global_arena->New<IdentifierExpression>(context.SourceLoc(), $1); }
 | expression designator
-    { $$ = global_arena->New<FieldAccessExpression>(context.SourceLoc(), $1, $2); }
+    {
+      $$ =
+          global_arena->New<FieldAccessExpression>(context.SourceLoc(), $1, $2);
+    }
 | expression LEFT_SQUARE_BRACKET expression RIGHT_SQUARE_BRACKET
     { $$ = global_arena->New<IndexExpression>(context.SourceLoc(), $1, $3); }
 | integer_literal
@@ -243,7 +243,8 @@ expression:
     {
       int val;
       CHECK(llvm::to_integer(llvm::StringRef($1).substr(1), val));
-      CHECK($1[0] == 'i' && val == 32)  << "Only i32 is supported for now: " << $1;
+      CHECK($1[0] == 'i' && val == 32)
+          << "Only i32 is supported for now: " << $1;
       $$ = global_arena->New<IntTypeLiteral>(context.SourceLoc());
     }
 | STRING
@@ -256,48 +257,85 @@ expression:
     { $$ = global_arena->New<ContinuationTypeLiteral>(context.SourceLoc()); }
 | paren_expression { $$ = $1; }
 | expression EQUAL_EQUAL expression
-    { $$ = global_arena->New<PrimitiveOperatorExpression>(
-        context.SourceLoc(), Operator::Eq, std::vector<Ptr<const Expression>>({$1, $3})); }
+    {
+      $$ = global_arena->New<PrimitiveOperatorExpression>(
+          context.SourceLoc(), Operator::Eq,
+          std::vector<Ptr<const Expression>>({$1, $3}));
+    }
 | expression PLUS expression
-    { $$ = global_arena->New<PrimitiveOperatorExpression>(
-        context.SourceLoc(), Operator::Add, std::vector<Ptr<const Expression>>({$1, $3})); }
+    {
+      $$ = global_arena->New<PrimitiveOperatorExpression>(
+          context.SourceLoc(), Operator::Add,
+          std::vector<Ptr<const Expression>>({$1, $3}));
+    }
 | expression MINUS expression
-    { $$ = global_arena->New<PrimitiveOperatorExpression>(
-        context.SourceLoc(), Operator::Sub, std::vector<Ptr<const Expression>>({$1, $3})); }
+    {
+      $$ = global_arena->New<PrimitiveOperatorExpression>(
+          context.SourceLoc(), Operator::Sub,
+          std::vector<Ptr<const Expression>>({$1, $3}));
+    }
 | expression BINARY_STAR expression
-    { $$ = global_arena->New<PrimitiveOperatorExpression>(
-        context.SourceLoc(), Operator::Mul, std::vector<Ptr<const Expression>>({$1, $3})); }
+    {
+      $$ = global_arena->New<PrimitiveOperatorExpression>(
+          context.SourceLoc(), Operator::Mul,
+          std::vector<Ptr<const Expression>>({$1, $3}));
+    }
 | expression AND expression
-    { $$ = global_arena->New<PrimitiveOperatorExpression>(
-        context.SourceLoc(), Operator::And, std::vector<Ptr<const Expression>>({$1, $3})); }
+    {
+      $$ = global_arena->New<PrimitiveOperatorExpression>(
+          context.SourceLoc(), Operator::And,
+          std::vector<Ptr<const Expression>>({$1, $3}));
+    }
 | expression OR expression
-    { $$ = global_arena->New<PrimitiveOperatorExpression>(
-        context.SourceLoc(), Operator::Or, std::vector<Ptr<const Expression>>({$1, $3})); }
+    {
+      $$ = global_arena->New<PrimitiveOperatorExpression>(
+          context.SourceLoc(), Operator::Or,
+          std::vector<Ptr<const Expression>>({$1, $3}));
+    }
 | NOT expression
-    { $$ = global_arena->New<PrimitiveOperatorExpression>(
-        context.SourceLoc(), Operator::Not, std::vector<Ptr<const Expression>>({$2})); }
+    {
+      $$ = global_arena->New<PrimitiveOperatorExpression>(
+          context.SourceLoc(), Operator::Not,
+          std::vector<Ptr<const Expression>>({$2}));
+    }
 | MINUS expression %prec UNARY_MINUS
-    { $$ = global_arena->New<PrimitiveOperatorExpression>(
-        context.SourceLoc(), Operator::Neg, std::vector<Ptr<const Expression>>({$2})); }
+    {
+      $$ = global_arena->New<PrimitiveOperatorExpression>(
+          context.SourceLoc(), Operator::Neg,
+          std::vector<Ptr<const Expression>>({$2}));
+    }
 | PREFIX_STAR expression
-    { $$ = global_arena->New<PrimitiveOperatorExpression>(
-        context.SourceLoc(), Operator::Deref, std::vector<Ptr<const Expression>>({$2})); }
+    {
+      $$ = global_arena->New<PrimitiveOperatorExpression>(
+          context.SourceLoc(), Operator::Deref,
+          std::vector<Ptr<const Expression>>({$2}));
+    }
 | UNARY_STAR expression %prec PREFIX_STAR
-    { $$ = global_arena->New<PrimitiveOperatorExpression>(
-        context.SourceLoc(), Operator::Deref, std::vector<Ptr<const Expression>>({$2})); }
+    {
+      $$ = global_arena->New<PrimitiveOperatorExpression>(
+          context.SourceLoc(), Operator::Deref,
+          std::vector<Ptr<const Expression>>({$2}));
+    }
 | expression tuple
     { $$ = global_arena->New<CallExpression>(context.SourceLoc(), $1, $2); }
 | expression POSTFIX_STAR
-    { $$ = global_arena->New<PrimitiveOperatorExpression>(
-        context.SourceLoc(), Operator::Ptr, std::vector<Ptr<const Expression>>({$1})); }
+    {
+      $$ = global_arena->New<PrimitiveOperatorExpression>(
+          context.SourceLoc(), Operator::Ptr,
+          std::vector<Ptr<const Expression>>({$1}));
+    }
 | expression UNARY_STAR
-    { $$ = global_arena->New<PrimitiveOperatorExpression>(
-        context.SourceLoc(), Operator::Ptr, std::vector<Ptr<const Expression>>({$1})); }
+    {
+      $$ = global_arena->New<PrimitiveOperatorExpression>(
+          context.SourceLoc(), Operator::Ptr,
+          std::vector<Ptr<const Expression>>({$1}));
+    }
 | FNTY tuple return_type
     {
       auto [return_exp, is_omitted_exp] = $3.Release();
-      $$ = global_arena->New<FunctionTypeLiteral>(
-        context.SourceLoc(), $2, return_exp, is_omitted_exp); }
+      $$ = global_arena->New<FunctionTypeLiteral>(context.SourceLoc(), $2,
+                                                  return_exp, is_omitted_exp);
+    }
 ;
 designator: PERIOD identifier { $$ = $2; }
 ;
@@ -379,7 +417,7 @@ paren_pattern_base:
 // enforce that requirement.
 paren_pattern_contents:
   paren_pattern_element
-    { $$ = {.elements = {$1}, .has_trailing_comma = false }; }
+    { $$ = {.elements = {$1}, .has_trailing_comma = false}; }
 | paren_expression_contents COMMA paren_pattern_element
     {
       $$ = ParenExpressionToParenPattern($1);
@@ -389,7 +427,9 @@ paren_pattern_contents:
     {
       $$ = $1;
       auto el = $3.Release();
-      $$.elements.push_back({.name = el.name, .term = global_arena->New<ExpressionPattern>(el.term)});
+      $$.elements.push_back(
+          {.name = el.name,
+           .term = global_arena->New<ExpressionPattern>(el.term)});
     }
 | paren_pattern_contents COMMA paren_pattern_element
     {
@@ -411,28 +451,41 @@ tuple_pattern: paren_pattern_base
 // rules out the possibility of an `expression` at this point.
 maybe_empty_tuple_pattern:
   LEFT_PARENTHESIS RIGHT_PARENTHESIS
-    { $$ = global_arena->New<TuplePattern>(context.SourceLoc(), std::vector<TuplePattern::Field>()); }
+    {
+      $$ = global_arena->New<TuplePattern>(context.SourceLoc(),
+                                           std::vector<TuplePattern::Field>());
+    }
 | tuple_pattern
     { $$ = $1; }
 ;
 clause:
   CASE pattern DOUBLE_ARROW statement
-    { $$ = global_arena->RawNew<std::pair<Ptr<const Pattern>, Ptr<const Statement>>>($2, $4); }
+    {
+      $$ = global_arena
+               ->RawNew<std::pair<Ptr<const Pattern>, Ptr<const Statement>>>(
+                   $2, $4);
+    }
 | DEFAULT DOUBLE_ARROW statement
     {
-      auto vp = global_arena->New<BindingPattern>(
-          context.SourceLoc(), std::nullopt, global_arena->New<AutoPattern>(context.SourceLoc()));
-      $$ = global_arena->RawNew<std::pair<Ptr<const Pattern>, Ptr<const Statement>>>(vp, $3);
+      auto vp = global_arena -> New<BindingPattern>(
+                    context.SourceLoc(), std::nullopt,
+                    global_arena->New<AutoPattern>(context.SourceLoc()));
+      $$ = global_arena
+               ->RawNew<std::pair<Ptr<const Pattern>, Ptr<const Statement>>>(
+                   vp, $3);
     }
 ;
 clause_list:
   // Empty
     {
-      $$ = global_arena->RawNew<std::list<
-          std::pair<Ptr<const Pattern>, Ptr<const Statement>>>>();
+      $$ = global_arena->RawNew<
+          std::list<std::pair<Ptr<const Pattern>, Ptr<const Statement>>>>();
     }
 | clause clause_list
-    { $$ = $2; $$->push_front(*$1); }
+    {
+      $$ = $2;
+      $$->push_front(*$1);
+    }
 ;
 statement:
   expression EQUAL expression SEMICOLON
@@ -452,7 +505,8 @@ statement:
 | RETURN return_expression SEMICOLON
     {
       auto [return_exp, is_omitted_exp] = $2.Release();
-      $$ = global_arena->New<Return>(context.SourceLoc(), return_exp, is_omitted_exp);
+      $$ = global_arena->New<Return>(context.SourceLoc(), return_exp,
+                                     is_omitted_exp);
     }
 | block
     { $$ = $1; }
@@ -502,9 +556,7 @@ return_type:
 ;
 generic_binding:
   identifier COLON_BANG expression
-    {
-      $$ = GenericBinding({.name = std::move($1), .type = $3});
-    }
+    { $$ = GenericBinding({.name = std::move($1), .type = $3}); }
 ;
 deduced_param_list:
   // Empty
@@ -532,8 +584,7 @@ function_definition:
       auto [return_exp, is_omitted_exp] = $5.Release();
       $$ = global_arena->New<FunctionDefinition>(
           context.SourceLoc(), $2, $3, $4,
-          global_arena->New<ExpressionPattern>(return_exp),
-          is_omitted_exp, $6);
+          global_arena->New<ExpressionPattern>(return_exp), is_omitted_exp, $6);
     }
 | FN identifier deduced_params maybe_empty_tuple_pattern DOUBLE_ARROW expression
   SEMICOLON
@@ -552,8 +603,8 @@ function_declaration:
       auto [return_exp, is_omitted_exp] = $5.Release();
       $$ = global_arena->New<FunctionDefinition>(
           context.SourceLoc(), $2, $3, $4,
-          global_arena->New<ExpressionPattern>(return_exp),
-          is_omitted_exp, std::nullopt);
+          global_arena->New<ExpressionPattern>(return_exp), is_omitted_exp,
+          std::nullopt);
     }
 ;
 variable_declaration: identifier COLON pattern
@@ -566,7 +617,10 @@ member_list:
   // Empty
     { $$ = std::list<Ptr<Member>>(); }
 | member member_list
-    { $$ = $2; $$.push_front($1); }
+    {
+      $$ = $2;
+      $$.push_front($1);
+    }
 ;
 alternative:
   identifier tuple
@@ -586,7 +640,10 @@ alternative_list:
       $$.push_front($1);
     }
 | alternative COMMA alternative_list
-    { $$ = std::move($3); $$.push_front($1); }
+    {
+      $$ = std::move($3);
+      $$.push_front($1);
+    }
 ;
 declaration:
   function_definition
@@ -594,13 +651,9 @@ declaration:
 | function_declaration
     { $$ = global_arena->New<FunctionDeclaration>($1); }
 | CLASS identifier LEFT_CURLY_BRACE member_list RIGHT_CURLY_BRACE
-    {
-      $$ = global_arena->New<ClassDeclaration>(context.SourceLoc(), $2, $4);
-    }
+    { $$ = global_arena->New<ClassDeclaration>(context.SourceLoc(), $2, $4); }
 | CHOICE identifier LEFT_CURLY_BRACE alternative_list RIGHT_CURLY_BRACE
-    {
-      $$ = global_arena->New<ChoiceDeclaration>(context.SourceLoc(), $2, $4);
-    }
+    { $$ = global_arena->New<ChoiceDeclaration>(context.SourceLoc(), $2, $4); }
 | VAR variable_declaration EQUAL expression SEMICOLON
     {
       $$ = global_arena->New<VariableDeclaration>(context.SourceLoc(), $2, $4);