|
|
@@ -27,6 +27,12 @@ LINE_RE = re.compile(
|
|
|
(?P<class_intro>\b(class|struct)\s+(?P<class_name>\w+)\b)|
|
|
|
(?P<end_open_curly>{\s*(?P<open_curly_trailing_comment>//.*)?)|
|
|
|
(?P<trailing_comment>//.*)|
|
|
|
+ (?P<internal_comment>/\*.*\*/)|
|
|
|
+ (?P<string_literal>"([^"]|\\")*"|'([^']|\\')*')|
|
|
|
+ (?P<float_literal>\b(0[xb][0-9a-fA-F']*|[0-9][0-9']*)\.[0-9a-fA-F']*([eEpP][0-9a-fA-F']*)?)|
|
|
|
+ (?P<int_literal>\b(0[xb][0-9a-fA-F']+|[0-9][0-9']*)([eEpP][0-9a-fA-F']*)?)|
|
|
|
+ (?P<symbol>[\[\]{}(),.;]|[-+=!@#$%^&*/?|<>]+)|
|
|
|
+ (?P<keyword>\b(auto|bool|break|case|catch|char|class|const|continue|default|do|double|else|enum|explicit|extern|false|float|for|friend|goto|if|inline|int|long|mutable|namespace|new|nullptr|operator|private|protected|public|return|short|signed|sizeof|static|struct|switch|template|this|throw|true|try|typedef|union|unsigned|using|virtual|void|while)\b)|
|
|
|
(?P<id>\b\w+\b)
|
|
|
""",
|
|
|
re.X,
|
|
|
@@ -44,6 +50,23 @@ class Stats:
|
|
|
comment_line_widths: Counter[int] = field(default_factory=lambda: Counter())
|
|
|
lines_with_trailing_comments: int = 0
|
|
|
classes: int = 0
|
|
|
+ internal_comments: int = 0
|
|
|
+ string_literals: int = 0
|
|
|
+ string_literals_per_line: Counter[int] = field(
|
|
|
+ default_factory=lambda: Counter()
|
|
|
+ )
|
|
|
+ int_literals: int = 0
|
|
|
+ int_literals_per_line: Counter[int] = field(
|
|
|
+ default_factory=lambda: Counter()
|
|
|
+ )
|
|
|
+ float_literals: int = 0
|
|
|
+ float_literals_per_line: Counter[int] = field(
|
|
|
+ default_factory=lambda: Counter()
|
|
|
+ )
|
|
|
+ symbols: int = 0
|
|
|
+ symbols_per_line: Counter[int] = field(default_factory=lambda: Counter())
|
|
|
+ keywords: int = 0
|
|
|
+ keywords_per_line: Counter[int] = field(default_factory=lambda: Counter())
|
|
|
identifiers: int = 0
|
|
|
identifier_widths: Counter[int] = field(default_factory=lambda: Counter())
|
|
|
ids_per_line: Counter[int] = field(default_factory=lambda: Counter())
|
|
|
@@ -56,6 +79,17 @@ class Stats:
|
|
|
self.comment_line_widths.update(other.comment_line_widths)
|
|
|
self.lines_with_trailing_comments += other.lines_with_trailing_comments
|
|
|
self.classes += other.classes
|
|
|
+ self.internal_comments += other.internal_comments
|
|
|
+ self.string_literals += other.string_literals
|
|
|
+ self.string_literals_per_line.update(other.string_literals_per_line)
|
|
|
+ self.int_literals += other.int_literals
|
|
|
+ self.int_literals_per_line.update(other.int_literals_per_line)
|
|
|
+ self.float_literals += other.float_literals
|
|
|
+ self.float_literals_per_line.update(other.float_literals_per_line)
|
|
|
+ self.symbols += other.symbols
|
|
|
+ self.symbols_per_line.update(other.symbols_per_line)
|
|
|
+ self.keywords += other.keywords
|
|
|
+ self.keywords_per_line.update(other.keywords_per_line)
|
|
|
self.identifiers += other.identifiers
|
|
|
self.identifier_widths.update(other.identifier_widths)
|
|
|
self.ids_per_line.update(other.ids_per_line)
|
|
|
@@ -82,6 +116,11 @@ def scan_file(file: Path) -> Stats:
|
|
|
else:
|
|
|
stats.comment_line_widths[len(line)] += 1
|
|
|
continue
|
|
|
+ line_string_literals = 0
|
|
|
+ line_int_literals = 0
|
|
|
+ line_float_literals = 0
|
|
|
+ line_symbols = 0
|
|
|
+ line_keywords = 0
|
|
|
line_identifiers = 0
|
|
|
for m in re.finditer(LINE_RE, line):
|
|
|
if m.group("trailing_comment"):
|
|
|
@@ -89,10 +128,23 @@ def scan_file(file: Path) -> Stats:
|
|
|
break
|
|
|
if m.group("class_intro"):
|
|
|
stats.classes += 1
|
|
|
+ line_keywords += 1
|
|
|
line_identifiers += 1
|
|
|
stats.identifier_widths[len(m.group("class_name"))] += 1
|
|
|
elif m.group("end_open_curly"):
|
|
|
- pass
|
|
|
+ line_symbols += 1
|
|
|
+ elif m.group("internal_comment"):
|
|
|
+ stats.internal_comments += 1
|
|
|
+ elif m.group("string_literal"):
|
|
|
+ line_string_literals += 1
|
|
|
+ elif m.group("int_literal"):
|
|
|
+ line_int_literals += 1
|
|
|
+ elif m.group("float_literal"):
|
|
|
+ line_float_literals += 1
|
|
|
+ elif m.group("symbol"):
|
|
|
+ line_symbols += 1
|
|
|
+ elif m.group("keyword"):
|
|
|
+ line_keywords += 1
|
|
|
else:
|
|
|
assert m.group("id"), "Line is '%s', and match is '%s'" % (
|
|
|
line,
|
|
|
@@ -100,6 +152,16 @@ def scan_file(file: Path) -> Stats:
|
|
|
)
|
|
|
line_identifiers += 1
|
|
|
stats.identifier_widths[len(m.group("id"))] += 1
|
|
|
+ stats.string_literals += line_string_literals
|
|
|
+ stats.string_literals_per_line[line_string_literals] += 1
|
|
|
+ stats.int_literals += line_int_literals
|
|
|
+ stats.int_literals_per_line[line_int_literals] += 1
|
|
|
+ stats.float_literals += line_float_literals
|
|
|
+ stats.float_literals_per_line[line_float_literals] += 1
|
|
|
+ stats.symbols += line_symbols
|
|
|
+ stats.symbols_per_line[line_symbols] += 1
|
|
|
+ stats.keywords += line_keywords
|
|
|
+ stats.keywords_per_line[line_keywords] += 1
|
|
|
stats.identifiers += line_identifiers
|
|
|
stats.ids_per_line[line_identifiers] += 1
|
|
|
return stats
|
|
|
@@ -136,19 +198,57 @@ Comment lines: %(comment_lines)d
|
|
|
Empty comment lines: %(empty_comment_lines)d
|
|
|
Lines with trailing comments: %(lines_with_trailing_comments)d
|
|
|
Classes: %(classes)d
|
|
|
+Internal comments: %(internal_comments)d
|
|
|
+String literals: %(string_literals)d
|
|
|
+Int literals: %(int_literals)d
|
|
|
+Float literals: %(float_literals)d
|
|
|
+Symbols: %(symbols)d
|
|
|
+Keywords: %(keywords)d
|
|
|
IDs: %(identifiers)d"""
|
|
|
% asdict(stats)
|
|
|
)
|
|
|
|
|
|
+ tokens = (
|
|
|
+ stats.string_literals
|
|
|
+ + stats.int_literals
|
|
|
+ + stats.float_literals
|
|
|
+ + stats.symbols
|
|
|
+ + stats.keywords
|
|
|
+ + stats.identifiers
|
|
|
+ )
|
|
|
+ print(
|
|
|
+ f"""
|
|
|
+Fraction of blank lines: {stats.blank_lines / stats.lines}
|
|
|
+Fraction of comment lines: {stats.comment_lines / stats.lines}
|
|
|
+
|
|
|
+Total counted tokens: {tokens}
|
|
|
+Fraction string literals: {stats.string_literals / tokens}
|
|
|
+Fraction int literals: {stats.int_literals / tokens}
|
|
|
+Fraction float literals: {stats.float_literals / tokens}
|
|
|
+Fraction symbols: {stats.symbols / tokens}
|
|
|
+Fraction keywords: {stats.keywords / tokens}
|
|
|
+Fraction IDs: {stats.identifiers / tokens}
|
|
|
+ """
|
|
|
+ )
|
|
|
+
|
|
|
def print_histogram(
|
|
|
title: str, data: Dict[int, int], column_format: str
|
|
|
) -> None:
|
|
|
print()
|
|
|
- print(title)
|
|
|
key_min = min(data.keys())
|
|
|
key_max = max(data.keys()) + 1
|
|
|
values = [data.get(k, 0) for k in range(key_min, key_max)]
|
|
|
keys = [column_format % k for k in range(key_min, key_max)]
|
|
|
+ total = sum(values)
|
|
|
+ median = key_min
|
|
|
+ count = total
|
|
|
+ for k in range(key_min, key_max):
|
|
|
+ count -= data.get(k, 0)
|
|
|
+ if count <= total / 2:
|
|
|
+ median = k
|
|
|
+ break
|
|
|
+
|
|
|
+ print(title + f" (median: {median})")
|
|
|
fig = tpl.figure()
|
|
|
fig.barh(values, keys)
|
|
|
fig.show()
|
|
|
@@ -156,6 +256,29 @@ IDs: %(identifiers)d"""
|
|
|
print_histogram(
|
|
|
"## Comment line widths ##", stats.comment_line_widths, "%d columns"
|
|
|
)
|
|
|
+
|
|
|
+ print_histogram(
|
|
|
+ "## String literals per line ##",
|
|
|
+ stats.string_literals_per_line,
|
|
|
+ "%d literals",
|
|
|
+ )
|
|
|
+ print_histogram(
|
|
|
+ "## Int literals per line ##",
|
|
|
+ stats.int_literals_per_line,
|
|
|
+ "%d literals",
|
|
|
+ )
|
|
|
+ print_histogram(
|
|
|
+ "## Float literals per line ##",
|
|
|
+ stats.float_literals_per_line,
|
|
|
+ "%d literals",
|
|
|
+ )
|
|
|
+ print_histogram(
|
|
|
+ "## Symbols per line ##", stats.symbols_per_line, "%d symbols"
|
|
|
+ )
|
|
|
+ print_histogram(
|
|
|
+ "## Keywords per line ##", stats.keywords_per_line, "%d keywords"
|
|
|
+ )
|
|
|
+
|
|
|
print_histogram("## ID widths ##", stats.identifier_widths, "%d characters")
|
|
|
print_histogram("## IDs per line ##", stats.ids_per_line, "%d ids")
|
|
|
|