From ad437960e81a99ccb485ebf63528ee2efeda1992 Mon Sep 17 00:00:00 2001 From: Alexander Mohr Date: Tue, 10 Feb 2026 10:13:22 +0100 Subject: [PATCH] Mark two-segment CamelCase code identifiers as JUNK This prevents false author detections. Words like MeterProvider, TracerProvider, and SpanProcessor in code comments match the "created by a " grammar pattern, producing false author detections. The existing CamelCase JUNK rule only catches three or more segments (e.g. GetQueueReference). This adds a targeted rule for two-segment CamelCase words ending in common code suffixes (Provider, Processor, Exporter, Handler, Factory, etc.) so they are tagged as JUNK. Signed-off-by: Alexander Mohr --- src/cluecode/copyrights.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/cluecode/copyrights.py b/src/cluecode/copyrights.py index 6d17467acf..7c2d17c6bb 100644 --- a/src/cluecode/copyrights.py +++ b/src/cluecode/copyrights.py @@ -833,6 +833,11 @@ def build_detection_from_node( # Repeated CamelCasedWords (r'^([A-Z][a-z]+){3,}$', 'JUNK'), + # Two-segment CamelCase code identifiers such as MeterProvider or + # TracerProvider or SpanProcessor that are not person or company names. + # These are common in code comments and trigger false author detections. + (r'^[A-Z][a-z]+(?:Provider|Processor|Exporter|Importer|Factory|Builder|Handler|Listener|Manager|Resolver|Adapter|Iterator|Observer|Visitor|Value|Logger|Counter|Tracker|Wrapper|Reader|Writer|Sender|Client|Server|Module|Record|Socket|Worker|Batcher|Sampler)s?$', 'JUNK'), + ############################################################################ # JUNK proper ############################################################################