2 лет назад · 0e8e94d8bf
--- a/scripts/source_stats.py
+++ b/scripts/source_stats.py
@@ -27,6 +27,12 @@ LINE_RE = re.compile(
 
				     (?P<class_intro>\b(class|struct)\s+(?P<class_name>\w+)\b)|
			
 
				     (?P<end_open_curly>{\s*(?P<open_curly_trailing_comment>//.*)?)|
			
 
				     (?P<trailing_comment>//.*)|
			
 
				+    (?P<internal_comment>/\*.*\*/)|
			
 
				+    (?P<string_literal>"([^"]|\\")*"|'([^']|\\')*')|
			
 
				+    (?P<float_literal>\b(0[xb][0-9a-fA-F']*|[0-9][0-9']*)\.[0-9a-fA-F']*([eEpP][0-9a-fA-F']*)?)|
			
 
				+    (?P<int_literal>\b(0[xb][0-9a-fA-F']+|[0-9][0-9']*)([eEpP][0-9a-fA-F']*)?)|
			
 
				+    (?P<symbol>[\[\]{}(),.;]|[-+=!@#$%^&*/?|<>]+)|
			
 
				+    (?P<keyword>\b(auto|bool|break|case|catch|char|class|const|continue|default|do|double|else|enum|explicit|extern|false|float|for|friend|goto|if|inline|int|long|mutable|namespace|new|nullptr|operator|private|protected|public|return|short|signed|sizeof|static|struct|switch|template|this|throw|true|try|typedef|union|unsigned|using|virtual|void|while)\b)|
			
 
				     (?P<id>\b\w+\b)
			
 
				 """,
			
 
				     re.X,
			
@@ -44,6 +50,23 @@ class Stats:
 
				     comment_line_widths: Counter[int] = field(default_factory=lambda: Counter())
			
 
				     lines_with_trailing_comments: int = 0
			
 
				     classes: int = 0
			
 
				+    internal_comments: int = 0
			
 
				+    string_literals: int = 0
			
 
				+    string_literals_per_line: Counter[int] = field(
			
 
				+        default_factory=lambda: Counter()
			
 
				+    )
			
 
				+    int_literals: int = 0
			
 
				+    int_literals_per_line: Counter[int] = field(
			
 
				+        default_factory=lambda: Counter()
			
 
				+    )
			
 
				+    float_literals: int = 0
			
 
				+    float_literals_per_line: Counter[int] = field(
			
 
				+        default_factory=lambda: Counter()
			
 
				+    )
			
 
				+    symbols: int = 0
			
 
				+    symbols_per_line: Counter[int] = field(default_factory=lambda: Counter())
			
 
				+    keywords: int = 0
			
 
				+    keywords_per_line: Counter[int] = field(default_factory=lambda: Counter())
			
 
				     identifiers: int = 0
			
 
				     identifier_widths: Counter[int] = field(default_factory=lambda: Counter())
			
 
				     ids_per_line: Counter[int] = field(default_factory=lambda: Counter())
			
@@ -56,6 +79,17 @@ class Stats:
 
				         self.comment_line_widths.update(other.comment_line_widths)
			
 
				         self.lines_with_trailing_comments += other.lines_with_trailing_comments
			
 
				         self.classes += other.classes
			
 
				+        self.internal_comments += other.internal_comments
			
 
				+        self.string_literals += other.string_literals
			
 
				+        self.string_literals_per_line.update(other.string_literals_per_line)
			
 
				+        self.int_literals += other.int_literals
			
 
				+        self.int_literals_per_line.update(other.int_literals_per_line)
			
 
				+        self.float_literals += other.float_literals
			
 
				+        self.float_literals_per_line.update(other.float_literals_per_line)
			
 
				+        self.symbols += other.symbols
			
 
				+        self.symbols_per_line.update(other.symbols_per_line)
			
 
				+        self.keywords += other.keywords
			
 
				+        self.keywords_per_line.update(other.keywords_per_line)
			
 
				         self.identifiers += other.identifiers
			
 
				         self.identifier_widths.update(other.identifier_widths)
			
 
				         self.ids_per_line.update(other.ids_per_line)
			
@@ -82,6 +116,11 @@ def scan_file(file: Path) -> Stats:
 
				             else:
			
 
				                 stats.comment_line_widths[len(line)] += 1
			
 
				             continue
			
 
				+        line_string_literals = 0
			
 
				+        line_int_literals = 0
			
 
				+        line_float_literals = 0
			
 
				+        line_symbols = 0
			
 
				+        line_keywords = 0
			
 
				         line_identifiers = 0
			
 
				         for m in re.finditer(LINE_RE, line):
			
 
				             if m.group("trailing_comment"):
			
@@ -89,10 +128,23 @@ def scan_file(file: Path) -> Stats:
 
				                 break
			
 
				             if m.group("class_intro"):
			
 
				                 stats.classes += 1
			
 
				+                line_keywords += 1
			
 
				                 line_identifiers += 1
			
 
				                 stats.identifier_widths[len(m.group("class_name"))] += 1
			
 
				             elif m.group("end_open_curly"):
			
 
				-                pass
			
 
				+                line_symbols += 1
			
 
				+            elif m.group("internal_comment"):
			
 
				+                stats.internal_comments += 1
			
 
				+            elif m.group("string_literal"):
			
 
				+                line_string_literals += 1
			
 
				+            elif m.group("int_literal"):
			
 
				+                line_int_literals += 1
			
 
				+            elif m.group("float_literal"):
			
 
				+                line_float_literals += 1
			
 
				+            elif m.group("symbol"):
			
 
				+                line_symbols += 1
			
 
				+            elif m.group("keyword"):
			
 
				+                line_keywords += 1
			
 
				             else:
			
 
				                 assert m.group("id"), "Line is '%s', and match is '%s'" % (
			
 
				                     line,
			
@@ -100,6 +152,16 @@ def scan_file(file: Path) -> Stats:
 
				                 )
			
 
				                 line_identifiers += 1
			
 
				                 stats.identifier_widths[len(m.group("id"))] += 1
			
 
				+        stats.string_literals += line_string_literals
			
 
				+        stats.string_literals_per_line[line_string_literals] += 1
			
 
				+        stats.int_literals += line_int_literals
			
 
				+        stats.int_literals_per_line[line_int_literals] += 1
			
 
				+        stats.float_literals += line_float_literals
			
 
				+        stats.float_literals_per_line[line_float_literals] += 1
			
 
				+        stats.symbols += line_symbols
			
 
				+        stats.symbols_per_line[line_symbols] += 1
			
 
				+        stats.keywords += line_keywords
			
 
				+        stats.keywords_per_line[line_keywords] += 1
			
 
				         stats.identifiers += line_identifiers
			
 
				         stats.ids_per_line[line_identifiers] += 1
			
 
				     return stats
			
@@ -136,19 +198,57 @@ Comment lines: %(comment_lines)d
 
				 Empty comment lines: %(empty_comment_lines)d
			
 
				 Lines with trailing comments: %(lines_with_trailing_comments)d
			
 
				 Classes: %(classes)d
			
 
				+Internal comments: %(internal_comments)d
			
 
				+String literals: %(string_literals)d
			
 
				+Int literals: %(int_literals)d
			
 
				+Float literals: %(float_literals)d
			
 
				+Symbols: %(symbols)d
			
 
				+Keywords: %(keywords)d
			
 
				 IDs: %(identifiers)d"""
			
 
				         % asdict(stats)
			
 
				     )
			
 
				 
			
 
				+    tokens = (
			
 
				+        stats.string_literals
			
 
				+        + stats.int_literals
			
 
				+        + stats.float_literals
			
 
				+        + stats.symbols
			
 
				+        + stats.keywords
			
 
				+        + stats.identifiers
			
 
				+    )
			
 
				+    print(
			
 
				+        f"""
			
 
				+Fraction of blank lines: {stats.blank_lines / stats.lines}
			
 
				+Fraction of comment lines: {stats.comment_lines / stats.lines}
			
 
				+
			
 
				+Total counted tokens: {tokens}
			
 
				+Fraction string literals: {stats.string_literals / tokens}
			
 
				+Fraction int literals: {stats.int_literals / tokens}
			
 
				+Fraction float literals: {stats.float_literals / tokens}
			
 
				+Fraction symbols: {stats.symbols / tokens}
			
 
				+Fraction keywords: {stats.keywords / tokens}
			
 
				+Fraction IDs: {stats.identifiers / tokens}
			
 
				+    """
			
 
				+    )
			
 
				+
			
 
				     def print_histogram(
			
 
				         title: str, data: Dict[int, int], column_format: str
			
 
				     ) -> None:
			
 
				         print()
			
 
				-        print(title)
			
 
				         key_min = min(data.keys())
			
 
				         key_max = max(data.keys()) + 1
			
 
				         values = [data.get(k, 0) for k in range(key_min, key_max)]
			
 
				         keys = [column_format % k for k in range(key_min, key_max)]
			
 
				+        total = sum(values)
			
 
				+        median = key_min
			
 
				+        count = total
			
 
				+        for k in range(key_min, key_max):
			
 
				+            count -= data.get(k, 0)
			
 
				+            if count <= total / 2:
			
 
				+                median = k
			
 
				+                break
			
 
				+
			
 
				+        print(title + f" (median: {median})")
			
 
				         fig = tpl.figure()
			
 
				         fig.barh(values, keys)
			
 
				         fig.show()
			
@@ -156,6 +256,29 @@ IDs: %(identifiers)d"""
 
				     print_histogram(
			
 
				         "## Comment line widths ##", stats.comment_line_widths, "%d columns"
			
 
				     )
			
 
				+
			
 
				+    print_histogram(
			
 
				+        "## String literals per line ##",
			
 
				+        stats.string_literals_per_line,
			
 
				+        "%d literals",
			
 
				+    )
			
 
				+    print_histogram(
			
 
				+        "## Int literals per line ##",
			
 
				+        stats.int_literals_per_line,
			
 
				+        "%d literals",
			
 
				+    )
			
 
				+    print_histogram(
			
 
				+        "## Float literals per line ##",
			
 
				+        stats.float_literals_per_line,
			
 
				+        "%d literals",
			
 
				+    )
			
 
				+    print_histogram(
			
 
				+        "## Symbols per line ##", stats.symbols_per_line, "%d symbols"
			
 
				+    )
			
 
				+    print_histogram(
			
 
				+        "## Keywords per line ##", stats.keywords_per_line, "%d keywords"
			
 
				+    )
			
 
				+
			
 
				     print_histogram("## ID widths ##", stats.identifier_widths, "%d characters")
			
 
				     print_histogram("## IDs per line ##", stats.ids_per_line, "%d ids")