Ver Fonte

Add table sorting functionality (#796)

Also apply it to the list of tokens. This way there's no need for "sort order" comments.

This was a side-effect of me trying to merge in api/impl/library/package, and thinking "why am I doing this manually?"



Co-authored-by: Geoff Romer <gromer@google.com>
Jon Meow há 4 anos atrás
pai
commit
820081b8c1

+ 79 - 33
executable_semantics/syntax/format_grammar.py

@@ -33,11 +33,6 @@ _COLS = 80
 # An arbitrary separator to use when formatting multiple code segments.
 _FORMAT_SEPARATOR = "\n// CLANG FORMAT CODE SEGMENT SEPARATOR\n"
 
-# The table begin and end comments, including table-bounding newlines.
-_TABLE_BEGIN = "/* Table begin. */\n"
-_TABLE_END = "\n/* Table end. */"
-_TABLE_END_WITH_SPACE = "\n /* Table end. */"
-
 
 @dataclass
 class _CppCode:
@@ -66,6 +61,16 @@ class _Table:
     content: str
 
 
+def _print_header(msg: str) -> None:
+    """Prints a header, mostly for debug output."""
+    print(("=== %s " % msg).ljust(79, "="))
+
+
+def _print_footer() -> None:
+    """Prints a footer, mostly for debug output."""
+    print("=" * 79)
+
+
 def _parse_args() -> argparse.Namespace:
     """Parses command-line arguments and flags."""
     parser = argparse.ArgumentParser(description=__doc__)
@@ -132,9 +137,9 @@ def _add_text_segment(
     """Adds a text segment to the list."""
     text_segments.append(segment)
     if debug:
-        print("=== Text segment ===")
+        _print_header("Text segment")
         print(segment)
-        print("====================")
+        _print_footer()
 
 
 def _maybe_add_cpp_segment(
@@ -191,7 +196,7 @@ def _maybe_add_cpp_segment(
             has_percent,
         )
         if debug:
-            print("=== C++ segment ===")
+            _print_header("C++ segment")
             print(cpp_segment.content)
             print(
                 "Structure: { at %d; } at %d; %%: %s"
@@ -201,7 +206,7 @@ def _maybe_add_cpp_segment(
                     cpp_segment.has_percent,
                 )
             )
-            print("===================")
+            _print_footer()
 
         # Record the code segment.
         if close_brace_indent not in cpp_segments:
@@ -212,12 +217,16 @@ def _maybe_add_cpp_segment(
         return (end, True)
 
 
-def _parse_block_comment(
+def _parse_comment(
     content: str,
     text_segments: List[Optional[str]],
     table_segments: List[_Table],
     text_segment_start: int,
     cursor: int,
+    comment_start_str: str,
+    comment_end_str: str,
+    table_start_str: str,
+    table_end_pattern: str,
     debug: bool,
 ) -> Tuple[int, int]:
     """Parses a comment, possibly adding a table segment.
@@ -226,31 +235,35 @@ def _parse_block_comment(
     new_segment_start may or may not change.
     """
     # Skip over block comments.
-    comment_end = content.find("*/", cursor + 2)
+    comment_end = content.find(comment_end_str, cursor + len(comment_start_str))
     if comment_end == -1:
         exit(
-            "failed to find end of /* comment: %s"
-            % content[cursor : cursor + 20]
+            "failed to find end of %s comment: %s"
+            % (comment_start_str, content[cursor : cursor + 20])
         )
-    comment_end += 2
-    if content[cursor : comment_end + 1] == _TABLE_BEGIN:
-        for table_end_style in (_TABLE_END, _TABLE_END_WITH_SPACE):
-            table_end = content.find(table_end_style, comment_end)
-            if table_end != -1:
-                break
-        if table_end == -1:
+    if comment_end_str != "\n":
+        comment_end += len(comment_end_str)
+    if content[cursor : comment_end + 1] == table_start_str:
+        m = re.compile(table_end_pattern).search(content, comment_end)
+        if not m:
             exit(
-                "failed to find end of table: %s"
-                % content[comment_end + 1 : comment_end + 20]
+                "failed to find end of table: `%s`"
+                % content[comment_end : comment_end + 20]
             )
         _add_text_segment(
             text_segments, content[text_segment_start : comment_end + 1], debug
         )
         text_segments.append(None)
-        table_segments.append(
-            _Table(len(text_segments) - 1, content[comment_end + 1 : table_end])
+        table_segment = _Table(
+            len(text_segments) - 1, content[comment_end + 1 : m.start()]
         )
-        return table_end, table_end + len(_TABLE_END) - 1
+        table_segments.append(table_segment)
+        if debug:
+            _print_header("Table segment")
+            print(table_segment.content)
+            _print_footer()
+
+        return m.start(), m.end()
     else:
         return text_segment_start, comment_end - 1
 
@@ -268,7 +281,7 @@ def _parse_segments(
     - table_segments is a list of _Table objects.
     """
     i = 0
-    segment_start = 0
+    text_segment_start = 0
     text_segments: List[Optional[str]] = []
     cpp_segments: Dict[int, List[_CppCode]] = {}
     table_segments: List[_Table] = []
@@ -278,20 +291,47 @@ def _parse_segments(
             # Skip over strings.
             i = _find_string_end(content, i + 1)
         elif c == "/" and content[i + 1 : i + 2] == "*":
-            segment_start, i = _parse_block_comment(
-                content, text_segments, table_segments, segment_start, i, debug
+            text_segment_start, i = _parse_comment(
+                content=content,
+                text_segments=text_segments,
+                table_segments=table_segments,
+                text_segment_start=text_segment_start,
+                cursor=i,
+                comment_start_str="/*",
+                comment_end_str="*/",
+                table_start_str="/* table-begin */\n",
+                table_end_pattern=r"\n\s*/\* table-end \*/\n",
+                debug=debug,
+            )
+        elif c == "/" and content[i + 1 : i + 2] == "/":
+            text_segment_start, i = _parse_comment(
+                content=content,
+                text_segments=text_segments,
+                table_segments=table_segments,
+                text_segment_start=text_segment_start,
+                cursor=i,
+                comment_start_str="//",
+                comment_end_str="\n",
+                table_start_str="// table-begin\n",
+                table_end_pattern=r"\n\s*// table-end\n",
+                debug=debug,
             )
         elif c == "\\":
             # Skip over escapes.
             i += 1
         elif c == "{":
             i, added = _maybe_add_cpp_segment(
-                content, text_segments, cpp_segments, segment_start, i, debug
+                content,
+                text_segments,
+                cpp_segments,
+                text_segment_start,
+                i,
+                debug,
             )
             if added:
-                segment_start = i + 1
+                text_segment_start = i + 1
         i += 1
-    _add_text_segment(text_segments, content[segment_start:], debug)
+    _add_text_segment(text_segments, content[text_segment_start:], debug)
     return text_segments, cpp_segments, table_segments
 
 
@@ -358,7 +398,9 @@ def _format_table_segments(
 ) -> None:
     """Formats table segments."""
     for table in table_segments:
-        lines = table.content.strip().splitlines()
+        # Split, removing empty lines.
+        lines = [line for line in table.content.splitlines() if line]
+        lines.sort()
         rows: List[List[str]] = []
         col_widths: List[int] = []
         for row_index in range(len(lines)):
@@ -377,8 +419,12 @@ def _format_table_segments(
                 col_widths[col_index] = max(
                     col_widths[col_index], len(cols[col_index])
                 )
+        # Count prefix spaces for the indent.
+        m = re.match("^ *", lines[0])
+        assert m
+        indent = m.end()
         # The last column should not add spaces.
-        row_format = " ".join(
+        row_format = (" " * indent) + " ".join(
             ["%%-%ds" % width for width in col_widths[:-1]] + ["%s"]
         )
         text_segments[table.segment_index] = "\n".join(

+ 51 - 9
executable_semantics/syntax/format_grammar_test.py

@@ -91,18 +91,18 @@ class TestFormatGrammar(unittest.TestCase):
         self.assertEqual(
             format_grammar._parse_segments(
                 "content\n"
-                "/* Table begin. */\n"
+                "/* table-begin */\n"
                 "{VAR} { return SIMPLE_TOKEN(VAR); }\n"
                 "{WHILE} { return SIMPLE_TOKEN(WHILE); }\n"
-                "/* Table end. */\n"
+                "/* table-end */\n"
                 "more content\n",
                 False,
             ),
             (
                 [
-                    "content\n" "/* Table begin. */\n",
+                    "content\n" "/* table-begin */\n",
                     None,
-                    "\n" "/* Table end. */\n" "more content\n",
+                    "\n" "/* table-end */\n" "more content\n",
                 ],
                 {},
                 [
@@ -119,18 +119,18 @@ class TestFormatGrammar(unittest.TestCase):
         self.assertEqual(
             format_grammar._parse_segments(
                 "content\n"
-                " /* Table begin. */\n"
+                " /* table-begin */\n"
                 "{VAR} { return SIMPLE_TOKEN(VAR); }\n"
                 "{WHILE} { return SIMPLE_TOKEN(WHILE); }\n"
-                " /* Table end. */\n"
+                " /* table-end */\n"
                 "more content\n",
                 False,
             ),
             (
                 [
-                    "content\n /* Table begin. */\n",
+                    "content\n /* table-begin */\n",
                     None,
-                    "\n /* Table end. */\nmore content\n",
+                    "\n /* table-end */\nmore content\n",
                 ],
                 {},
                 [
@@ -143,6 +143,29 @@ class TestFormatGrammar(unittest.TestCase):
             ),
         )
 
+    def test_table_tokens(self):
+        self.assertEqual(
+            format_grammar._parse_segments(
+                "%tokens\n"
+                "  // Comment\n"
+                "  // table-begin\n"
+                "  VAR\n"
+                "  WHILE\n"
+                "  // table-end\n"
+                "  MORE\n",
+                False,
+            ),
+            (
+                [
+                    "%tokens\n" "  // Comment\n" "  // table-begin\n",
+                    None,
+                    "\n" "  // table-end\n" "  MORE\n",
+                ],
+                {},
+                [format_grammar._Table(1, "  VAR\n" "  WHILE")],
+            ),
+        )
+
     def test_format_table_defines(self):
         text_segments = [None]
         format_grammar._format_table_segments(
@@ -150,8 +173,8 @@ class TestFormatGrammar(unittest.TestCase):
             [
                 format_grammar._Table(
                     0,
-                    'CONTINUE "continue"\n'
                     'DEFAULT "default"\n'
+                    'CONTINUE "continue"\n'
                     'DOUBLE_ARROW "=>"',
                 )
             ],
@@ -187,6 +210,25 @@ class TestFormatGrammar(unittest.TestCase):
             ],
         )
 
+    def test_format_table_tokens(self):
+        text_segments = [None]
+        format_grammar._format_table_segments(
+            text_segments,
+            [
+                format_grammar._Table(
+                    0,
+                    "  AND\n" "  CONTINUE\n" "  BREAK",
+                )
+            ],
+            False,
+        )
+        self.assertEqual(
+            text_segments,
+            [
+                "  AND\n" "  BREAK\n" "  CONTINUE",
+            ],
+        )
+
 
 if __name__ == "__main__":
     unittest.main()

+ 5 - 5
executable_semantics/syntax/lexer.lpp

@@ -28,7 +28,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  */
 %s AFTER_OPERAND
 
-/* Table begin. */
+/* table-begin */
 AND                  "and"
 API                  "api"
 ARROW                "->"
@@ -80,7 +80,7 @@ TYPE                 "Type"
 UNDERSCORE           "_"
 VAR                  "var"
 WHILE                "while"
-/* Table end. */
+/* table-end */
 
 /* This should be kept table-like, but isn't automatic due to spaces. */
 identifier            [A-Za-z_][A-Za-z0-9_]*
@@ -122,7 +122,7 @@ string_literal        \"([^\\\"\n\v\f\r]|\\.)*\"
   context.current_token_position.step();
 %}
 
- /* Table begin. */
+ /* table-begin */
 {AND}                 { return SIMPLE_TOKEN(AND);                 }
 {API}                 { return SIMPLE_TOKEN(API);                 }
 {ARROW}               { return SIMPLE_TOKEN(ARROW);               }
@@ -150,8 +150,8 @@ string_literal        \"([^\\\"\n\v\f\r]|\\.)*\"
 {IF}                  { return SIMPLE_TOKEN(IF);                  }
 {IMPL}                { return SIMPLE_TOKEN(IMPL);                }
 {IMPORT}              { return SIMPLE_TOKEN(IMPORT);              }
-{LEFT_PARENTHESIS}    { return SIMPLE_TOKEN(LEFT_PARENTHESIS);    }
 {LEFT_CURLY_BRACE}    { return SIMPLE_TOKEN(LEFT_CURLY_BRACE);    }
+{LEFT_PARENTHESIS}    { return SIMPLE_TOKEN(LEFT_PARENTHESIS);    }
 {LEFT_SQUARE_BRACKET} { return SIMPLE_TOKEN(LEFT_SQUARE_BRACKET); }
 {LIBRARY}             { return SIMPLE_TOKEN(LIBRARY);             }
 {MATCH}               { return SIMPLE_TOKEN(MATCH);               }
@@ -171,7 +171,7 @@ string_literal        \"([^\\\"\n\v\f\r]|\\.)*\"
 {UNDERSCORE}          { return SIMPLE_TOKEN(UNDERSCORE);          }
 {VAR}                 { return SIMPLE_TOKEN(VAR);                 }
 {WHILE}               { return SIMPLE_TOKEN(WHILE);               }
- /* Table end. */
+ /* table-end */
 
  /* More modern Bisons provide make_EOF. */
 <<EOF>>               { return SIMPLE_TOKEN(END_OF_FILE); }

+ 2 - 0
executable_semantics/syntax/parser.ypp

@@ -137,6 +137,7 @@
 
 %token
   // Most tokens have their spelling defined in lexer.lpp.
+  // table-begin
   AND
   API
   ARROW
@@ -188,6 +189,7 @@
   UNDERSCORE
   VAR
   WHILE
+  // table-end
   // Used to track EOF.
   END_OF_FILE 0
   // Only used for precedence.