há 4 anos atrás · 820081b8c1
--- a/executable_semantics/syntax/format_grammar.py
+++ b/executable_semantics/syntax/format_grammar.py
@@ -33,11 +33,6 @@ _COLS = 80
 
				 # An arbitrary separator to use when formatting multiple code segments.
			
 
				 _FORMAT_SEPARATOR = "\n// CLANG FORMAT CODE SEGMENT SEPARATOR\n"
			
 
				 
			
 
				-# The table begin and end comments, including table-bounding newlines.
			
 
				-_TABLE_BEGIN = "/* Table begin. */\n"
			
 
				-_TABLE_END = "\n/* Table end. */"
			
 
				-_TABLE_END_WITH_SPACE = "\n /* Table end. */"
			
 
				-
			
 
				 
			
 
				 @dataclass
			
 
				 class _CppCode:
			
@@ -66,6 +61,16 @@ class _Table:
 
				     content: str
			
 
				 
			
 
				 
			
 
				+def _print_header(msg: str) -> None:
			
 
				+    """Prints a header, mostly for debug output."""
			
 
				+    print(("=== %s " % msg).ljust(79, "="))
			
 
				+
			
 
				+
			
 
				+def _print_footer() -> None:
			
 
				+    """Prints a footer, mostly for debug output."""
			
 
				+    print("=" * 79)
			
 
				+
			
 
				+
			
 
				 def _parse_args() -> argparse.Namespace:
			
 
				     """Parses command-line arguments and flags."""
			
 
				     parser = argparse.ArgumentParser(description=__doc__)
			
@@ -132,9 +137,9 @@ def _add_text_segment(
 
				     """Adds a text segment to the list."""
			
 
				     text_segments.append(segment)
			
 
				     if debug:
			
 
				-        print("=== Text segment ===")
			
 
				+        _print_header("Text segment")
			
 
				         print(segment)
			
 
				-        print("====================")
			
 
				+        _print_footer()
			
 
				 
			
 
				 
			
 
				 def _maybe_add_cpp_segment(
			
@@ -191,7 +196,7 @@ def _maybe_add_cpp_segment(
 
				             has_percent,
			
 
				         )
			
 
				         if debug:
			
 
				-            print("=== C++ segment ===")
			
 
				+            _print_header("C++ segment")
			
 
				             print(cpp_segment.content)
			
 
				             print(
			
 
				                 "Structure: { at %d; } at %d; %%: %s"
			
@@ -201,7 +206,7 @@ def _maybe_add_cpp_segment(
 
				                     cpp_segment.has_percent,
			
 
				                 )
			
 
				             )
			
 
				-            print("===================")
			
 
				+            _print_footer()
			
 
				 
			
 
				         # Record the code segment.
			
 
				         if close_brace_indent not in cpp_segments:
			
@@ -212,12 +217,16 @@ def _maybe_add_cpp_segment(
 
				         return (end, True)
			
 
				 
			
 
				 
			
 
				-def _parse_block_comment(
			
 
				+def _parse_comment(
			
 
				     content: str,
			
 
				     text_segments: List[Optional[str]],
			
 
				     table_segments: List[_Table],
			
 
				     text_segment_start: int,
			
 
				     cursor: int,
			
 
				+    comment_start_str: str,
			
 
				+    comment_end_str: str,
			
 
				+    table_start_str: str,
			
 
				+    table_end_pattern: str,
			
 
				     debug: bool,
			
 
				 ) -> Tuple[int, int]:
			
 
				     """Parses a comment, possibly adding a table segment.
			
@@ -226,31 +235,35 @@ def _parse_block_comment(
 
				     new_segment_start may or may not change.
			
 
				     """
			
 
				     # Skip over block comments.
			
 
				-    comment_end = content.find("*/", cursor + 2)
			
 
				+    comment_end = content.find(comment_end_str, cursor + len(comment_start_str))
			
 
				     if comment_end == -1:
			
 
				         exit(
			
 
				-            "failed to find end of /* comment: %s"
			
 
				-            % content[cursor : cursor + 20]
			
 
				+            "failed to find end of %s comment: %s"
			
 
				+            % (comment_start_str, content[cursor : cursor + 20])
			
 
				         )
			
 
				-    comment_end += 2
			
 
				-    if content[cursor : comment_end + 1] == _TABLE_BEGIN:
			
 
				-        for table_end_style in (_TABLE_END, _TABLE_END_WITH_SPACE):
			
 
				-            table_end = content.find(table_end_style, comment_end)
			
 
				-            if table_end != -1:
			
 
				-                break
			
 
				-        if table_end == -1:
			
 
				+    if comment_end_str != "\n":
			
 
				+        comment_end += len(comment_end_str)
			
 
				+    if content[cursor : comment_end + 1] == table_start_str:
			
 
				+        m = re.compile(table_end_pattern).search(content, comment_end)
			
 
				+        if not m:
			
 
				             exit(
			
 
				-                "failed to find end of table: %s"
			
 
				-                % content[comment_end + 1 : comment_end + 20]
			
 
				+                "failed to find end of table: `%s`"
			
 
				+                % content[comment_end : comment_end + 20]
			
 
				             )
			
 
				         _add_text_segment(
			
 
				             text_segments, content[text_segment_start : comment_end + 1], debug
			
 
				         )
			
 
				         text_segments.append(None)
			
 
				-        table_segments.append(
			
 
				-            _Table(len(text_segments) - 1, content[comment_end + 1 : table_end])
			
 
				+        table_segment = _Table(
			
 
				+            len(text_segments) - 1, content[comment_end + 1 : m.start()]
			
 
				         )
			
 
				-        return table_end, table_end + len(_TABLE_END) - 1
			
 
				+        table_segments.append(table_segment)
			
 
				+        if debug:
			
 
				+            _print_header("Table segment")
			
 
				+            print(table_segment.content)
			
 
				+            _print_footer()
			
 
				+
			
 
				+        return m.start(), m.end()
			
 
				     else:
			
 
				         return text_segment_start, comment_end - 1
			
 
				 
			
@@ -268,7 +281,7 @@ def _parse_segments(
 
				     - table_segments is a list of _Table objects.
			
 
				     """
			
 
				     i = 0
			
 
				-    segment_start = 0
			
 
				+    text_segment_start = 0
			
 
				     text_segments: List[Optional[str]] = []
			
 
				     cpp_segments: Dict[int, List[_CppCode]] = {}
			
 
				     table_segments: List[_Table] = []
			
@@ -278,20 +291,47 @@ def _parse_segments(
 
				             # Skip over strings.
			
 
				             i = _find_string_end(content, i + 1)
			
 
				         elif c == "/" and content[i + 1 : i + 2] == "*":
			
 
				-            segment_start, i = _parse_block_comment(
			
 
				-                content, text_segments, table_segments, segment_start, i, debug
			
 
				+            text_segment_start, i = _parse_comment(
			
 
				+                content=content,
			
 
				+                text_segments=text_segments,
			
 
				+                table_segments=table_segments,
			
 
				+                text_segment_start=text_segment_start,
			
 
				+                cursor=i,
			
 
				+                comment_start_str="/*",
			
 
				+                comment_end_str="*/",
			
 
				+                table_start_str="/* table-begin */\n",
			
 
				+                table_end_pattern=r"\n\s*/\* table-end \*/\n",
			
 
				+                debug=debug,
			
 
				+            )
			
 
				+        elif c == "/" and content[i + 1 : i + 2] == "/":
			
 
				+            text_segment_start, i = _parse_comment(
			
 
				+                content=content,
			
 
				+                text_segments=text_segments,
			
 
				+                table_segments=table_segments,
			
 
				+                text_segment_start=text_segment_start,
			
 
				+                cursor=i,
			
 
				+                comment_start_str="//",
			
 
				+                comment_end_str="\n",
			
 
				+                table_start_str="// table-begin\n",
			
 
				+                table_end_pattern=r"\n\s*// table-end\n",
			
 
				+                debug=debug,
			
 
				             )
			
 
				         elif c == "\\":
			
 
				             # Skip over escapes.
			
 
				             i += 1
			
 
				         elif c == "{":
			
 
				             i, added = _maybe_add_cpp_segment(
			
 
				-                content, text_segments, cpp_segments, segment_start, i, debug
			
 
				+                content,
			
 
				+                text_segments,
			
 
				+                cpp_segments,
			
 
				+                text_segment_start,
			
 
				+                i,
			
 
				+                debug,
			
 
				             )
			
 
				             if added:
			
 
				-                segment_start = i + 1
			
 
				+                text_segment_start = i + 1
			
 
				         i += 1
			
 
				-    _add_text_segment(text_segments, content[segment_start:], debug)
			
 
				+    _add_text_segment(text_segments, content[text_segment_start:], debug)
			
 
				     return text_segments, cpp_segments, table_segments
			
 
				 
			
 
				 
			
@@ -358,7 +398,9 @@ def _format_table_segments(
 
				 ) -> None:
			
 
				     """Formats table segments."""
			
 
				     for table in table_segments:
			
 
				-        lines = table.content.strip().splitlines()
			
 
				+        # Split, removing empty lines.
			
 
				+        lines = [line for line in table.content.splitlines() if line]
			
 
				+        lines.sort()
			
 
				         rows: List[List[str]] = []
			
 
				         col_widths: List[int] = []
			
 
				         for row_index in range(len(lines)):
			
@@ -377,8 +419,12 @@ def _format_table_segments(
 
				                 col_widths[col_index] = max(
			
 
				                     col_widths[col_index], len(cols[col_index])
			
 
				                 )
			
 
				+        # Count prefix spaces for the indent.
			
 
				+        m = re.match("^ *", lines[0])
			
 
				+        assert m
			
 
				+        indent = m.end()
			
 
				         # The last column should not add spaces.
			
 
				-        row_format = " ".join(
			
 
				+        row_format = (" " * indent) + " ".join(
			
 
				             ["%%-%ds" % width for width in col_widths[:-1]] + ["%s"]
			
 
				         )
			
 
				         text_segments[table.segment_index] = "\n".join(
			
--- a/executable_semantics/syntax/format_grammar_test.py
+++ b/executable_semantics/syntax/format_grammar_test.py
@@ -91,18 +91,18 @@ class TestFormatGrammar(unittest.TestCase):
 
				         self.assertEqual(
			
 
				             format_grammar._parse_segments(
			
 
				                 "content\n"
			
 
				-                "/* Table begin. */\n"
			
 
				+                "/* table-begin */\n"
			
 
				                 "{VAR} { return SIMPLE_TOKEN(VAR); }\n"
			
 
				                 "{WHILE} { return SIMPLE_TOKEN(WHILE); }\n"
			
 
				-                "/* Table end. */\n"
			
 
				+                "/* table-end */\n"
			
 
				                 "more content\n",
			
 
				                 False,
			
 
				             ),
			
 
				             (
			
 
				                 [
			
 
				-                    "content\n" "/* Table begin. */\n",
			
 
				+                    "content\n" "/* table-begin */\n",
			
 
				                     None,
			
 
				-                    "\n" "/* Table end. */\n" "more content\n",
			
 
				+                    "\n" "/* table-end */\n" "more content\n",
			
 
				                 ],
			
 
				                 {},
			
 
				                 [
			
@@ -119,18 +119,18 @@ class TestFormatGrammar(unittest.TestCase):
 
				         self.assertEqual(
			
 
				             format_grammar._parse_segments(
			
 
				                 "content\n"
			
 
				-                " /* Table begin. */\n"
			
 
				+                " /* table-begin */\n"
			
 
				                 "{VAR} { return SIMPLE_TOKEN(VAR); }\n"
			
 
				                 "{WHILE} { return SIMPLE_TOKEN(WHILE); }\n"
			
 
				-                " /* Table end. */\n"
			
 
				+                " /* table-end */\n"
			
 
				                 "more content\n",
			
 
				                 False,
			
 
				             ),
			
 
				             (
			
 
				                 [
			
 
				-                    "content\n /* Table begin. */\n",
			
 
				+                    "content\n /* table-begin */\n",
			
 
				                     None,
			
 
				-                    "\n /* Table end. */\nmore content\n",
			
 
				+                    "\n /* table-end */\nmore content\n",
			
 
				                 ],
			
 
				                 {},
			
 
				                 [
			
@@ -143,6 +143,29 @@ class TestFormatGrammar(unittest.TestCase):
 
				             ),
			
 
				         )
			
 
				 
			
 
				+    def test_table_tokens(self):
			
 
				+        self.assertEqual(
			
 
				+            format_grammar._parse_segments(
			
 
				+                "%tokens\n"
			
 
				+                "  // Comment\n"
			
 
				+                "  // table-begin\n"
			
 
				+                "  VAR\n"
			
 
				+                "  WHILE\n"
			
 
				+                "  // table-end\n"
			
 
				+                "  MORE\n",
			
 
				+                False,
			
 
				+            ),
			
 
				+            (
			
 
				+                [
			
 
				+                    "%tokens\n" "  // Comment\n" "  // table-begin\n",
			
 
				+                    None,
			
 
				+                    "\n" "  // table-end\n" "  MORE\n",
			
 
				+                ],
			
 
				+                {},
			
 
				+                [format_grammar._Table(1, "  VAR\n" "  WHILE")],
			
 
				+            ),
			
 
				+        )
			
 
				+
			
 
				     def test_format_table_defines(self):
			
 
				         text_segments = [None]
			
 
				         format_grammar._format_table_segments(
			
@@ -150,8 +173,8 @@ class TestFormatGrammar(unittest.TestCase):
 
				             [
			
 
				                 format_grammar._Table(
			
 
				                     0,
			
 
				-                    'CONTINUE "continue"\n'
			
 
				                     'DEFAULT "default"\n'
			
 
				+                    'CONTINUE "continue"\n'
			
 
				                     'DOUBLE_ARROW "=>"',
			
 
				                 )
			
 
				             ],
			
@@ -187,6 +210,25 @@ class TestFormatGrammar(unittest.TestCase):
 
				             ],
			
 
				         )
			
 
				 
			
 
				+    def test_format_table_tokens(self):
			
 
				+        text_segments = [None]
			
 
				+        format_grammar._format_table_segments(
			
 
				+            text_segments,
			
 
				+            [
			
 
				+                format_grammar._Table(
			
 
				+                    0,
			
 
				+                    "  AND\n" "  CONTINUE\n" "  BREAK",
			
 
				+                )
			
 
				+            ],
			
 
				+            False,
			
 
				+        )
			
 
				+        self.assertEqual(
			
 
				+            text_segments,
			
 
				+            [
			
 
				+                "  AND\n" "  BREAK\n" "  CONTINUE",
			
 
				+            ],
			
 
				+        )
			
 
				+
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     unittest.main()
			
--- a/executable_semantics/syntax/lexer.lpp
+++ b/executable_semantics/syntax/lexer.lpp
@@ -28,7 +28,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
				  */
			
 
				 %s AFTER_OPERAND
			
 
				 
			
 
				-/* Table begin. */
			
 
				+/* table-begin */
			
 
				 AND                  "and"
			
 
				 API                  "api"
			
 
				 ARROW                "->"
			
@@ -80,7 +80,7 @@ TYPE                 "Type"
 
				 UNDERSCORE           "_"
			
 
				 VAR                  "var"
			
 
				 WHILE                "while"
			
 
				-/* Table end. */
			
 
				+/* table-end */
			
 
				 
			
 
				 /* This should be kept table-like, but isn't automatic due to spaces. */
			
 
				 identifier            [A-Za-z_][A-Za-z0-9_]*
			
@@ -122,7 +122,7 @@ string_literal        \"([^\\\"\n\v\f\r]|\\.)*\"
 
				   context.current_token_position.step();
			
 
				 %}
			
 
				 
			
 
				- /* Table begin. */
			
 
				+ /* table-begin */
			
 
				 {AND}                 { return SIMPLE_TOKEN(AND);                 }
			
 
				 {API}                 { return SIMPLE_TOKEN(API);                 }
			
 
				 {ARROW}               { return SIMPLE_TOKEN(ARROW);               }
			
@@ -150,8 +150,8 @@ string_literal        \"([^\\\"\n\v\f\r]|\\.)*\"
 
				 {IF}                  { return SIMPLE_TOKEN(IF);                  }
			
 
				 {IMPL}                { return SIMPLE_TOKEN(IMPL);                }
			
 
				 {IMPORT}              { return SIMPLE_TOKEN(IMPORT);              }
			
 
				-{LEFT_PARENTHESIS}    { return SIMPLE_TOKEN(LEFT_PARENTHESIS);    }
			
 
				 {LEFT_CURLY_BRACE}    { return SIMPLE_TOKEN(LEFT_CURLY_BRACE);    }
			
 
				+{LEFT_PARENTHESIS}    { return SIMPLE_TOKEN(LEFT_PARENTHESIS);    }
			
 
				 {LEFT_SQUARE_BRACKET} { return SIMPLE_TOKEN(LEFT_SQUARE_BRACKET); }
			
 
				 {LIBRARY}             { return SIMPLE_TOKEN(LIBRARY);             }
			
 
				 {MATCH}               { return SIMPLE_TOKEN(MATCH);               }
			
@@ -171,7 +171,7 @@ string_literal        \"([^\\\"\n\v\f\r]|\\.)*\"
 
				 {UNDERSCORE}          { return SIMPLE_TOKEN(UNDERSCORE);          }
			
 
				 {VAR}                 { return SIMPLE_TOKEN(VAR);                 }
			
 
				 {WHILE}               { return SIMPLE_TOKEN(WHILE);               }
			
 
				- /* Table end. */
			
 
				+ /* table-end */
			
 
				 
			
 
				  /* More modern Bisons provide make_EOF. */
			
 
				 <<EOF>>               { return SIMPLE_TOKEN(END_OF_FILE); }
			
--- a/executable_semantics/syntax/parser.ypp
+++ b/executable_semantics/syntax/parser.ypp
@@ -137,6 +137,7 @@
 
				 
			
 
				 %token
			
 
				   // Most tokens have their spelling defined in lexer.lpp.
			
 
				+  // table-begin
			
 
				   AND
			
 
				   API
			
 
				   ARROW
			
@@ -188,6 +189,7 @@
 
				   UNDERSCORE
			
 
				   VAR
			
 
				   WHILE
			
 
				+  // table-end
			
 
				   // Used to track EOF.
			
 
				   END_OF_FILE 0
			
 
				   // Only used for precedence.