From e1ed67ee0f9d98eee1170e2d33d62b73ceb7f578 Mon Sep 17 00:00:00 2001 From: Tim Paine <3105306+timkpaine@users.noreply.github.com> Date: Fri, 6 Feb 2026 19:55:53 -0500 Subject: [PATCH] More wiki tweaks --- yardang/conf.py.j2 | 2 +- yardang/wiki.py | 220 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 215 insertions(+), 7 deletions(-) diff --git a/yardang/conf.py.j2 b/yardang/conf.py.j2 index c1d5363..4a85689 100644 --- a/yardang/conf.py.j2 +++ b/yardang/conf.py.j2 @@ -129,7 +129,7 @@ html_js_files = [ master_doc = "index" templates_path = ["_templates"] source_suffix = [".rst", ".md", *{{source_suffix}}] -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "node_modules", "_skbuild", ".pytest_cache", "js/*", *{{exclude_patterns}}] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "node_modules", "_skbuild", ".pytest_cache", "js/*", "*.wiki", "*.wiki/*", "docs/wiki", "docs/wiki/*", *{{exclude_patterns}}] language = "{{language}}" pygments_style = "{{pygments_style}}" diff --git a/yardang/wiki.py b/yardang/wiki.py index 136e69f..c1ca3e6 100644 --- a/yardang/wiki.py +++ b/yardang/wiki.py @@ -147,13 +147,56 @@ def cleanup_markdown(content: str) -> str: # Remove anchor tags before headings (e.g., ) content = re.sub(r'\s*\n?', "", content) - # Reduce image widths by 50% for wiki (GitHub wiki renders larger) - def reduce_image_width(match): - width = int(match.group(1)) - new_width = max(16, width // 2) # Reduce by 50%, minimum 16px - return f'width="{new_width}"' + # Badge URL patterns to skip (don't resize badges) + badge_patterns = [ + r"shields\.io", + r"badge\.svg", + r"codecov\.io", + r"github\.com/.+/actions/workflows/.+/badge", + r"img\.shields\.io", + r"coveralls\.io", + r"travis-ci\.org", + r"circleci\.com", + r"appveyor\.com", + r"readthedocs\.org", + ] + + def is_badge_url(url): + """Check if URL looks like a badge image.""" + return any(re.search(pattern, url) for pattern in badge_patterns) + + # Convert large images to HTML img tags with constrained width + # Match: ![alt](url) pattern (linked images) + def resize_linked_image(match): + href = match.group(1) + alt = match.group(2) + src = match.group(3) + # Skip badges - they should stay at natural size + if is_badge_url(src): + return match.group(0) + return f'{alt}' + + content = re.sub( + r'\!\[([^\]]*)\]\(([^)]+)\)', + resize_linked_image, + content, + ) + + # Match: ![alt](url) pattern (standalone images, not inside links like [![]()]) + # Skip images that are inside markdown links (preceded by [) + def resize_standalone_image(match): + alt = match.group(1) + src = match.group(2) + # Skip badges - they should stay at natural size + if is_badge_url(src): + return match.group(0) + return f'{alt}' - content = re.sub(r'width="(\d+)"', reduce_image_width, content) + content = re.sub( + r'(? and before < # Match:
content
and expand it @@ -210,6 +253,168 @@ def fix_code_block(match): return content +def cleanup_api_docs(content: str) -> str: + """Clean up API documentation for better readability. + + Reformats dense sphinx-markdown-builder API output: + - Breaks long function signatures into multiple lines + - Removes escaped underscores in code contexts + - Improves parameter list formatting + + Args: + content: Markdown content with API documentation. + + Returns: + Cleaned API documentation content. + """ + # Remove escaped underscores in code/function contexts + # Match: word\_word patterns and unescape them + content = re.sub(r"(\w)\\_(\w)", r"\1_\2", content) + + # Format long function signatures - break parameters onto separate lines + def format_signature(match): + prefix = match.group(1) # ### module.function( + params = match.group(2) # parameters + suffix = match.group(3) # ) + + # If signature is short enough, keep it + if len(match.group(0)) < 80: + return match.group(0) + + # Parse parameters and format them + # Split on ", " but be careful about nested brackets + param_list = [] + current = "" + bracket_depth = 0 + for char in params: + if char in "([{": + bracket_depth += 1 + current += char + elif char in ")]}": + bracket_depth -= 1 + current += char + elif char == "," and bracket_depth == 0: + if current.strip(): + param_list.append(current.strip()) + current = "" + else: + current += char + if current.strip(): + param_list.append(current.strip()) + + # If few parameters, keep on one line + if len(param_list) <= 2: + return match.group(0) + + # Format with line breaks + formatted_params = ",\n ".join(param_list) + return f"{prefix}\n {formatted_params}\n{suffix}" + + # Match function/method signatures: ### name(params) + content = re.sub( + r"(###\s+[\w.]+\()((?:[^()]+|\([^()]*\))*?)(\))", + format_signature, + content, + ) + + # Clean up parameter descriptions - ensure proper list formatting + # Match: * **param** – description that may wrap + content = re.sub( + r"\*\s+\*\*(\w+)\*\*\s*[–-]\s*", + r"- **\1**: ", + content, + ) + + # Clean up "Parameters:" sections - convert to simpler format + content = re.sub( + r"\*\s+\*\*Parameters:\*\*", + "\n**Parameters:**", + content, + ) + content = re.sub( + r"\*\s+\*\*Returns:\*\*", + "\n**Returns:**", + content, + ) + content = re.sub( + r"\*\s+\*\*Raises:\*\*", + "\n**Raises:**", + content, + ) + content = re.sub( + r"\*\s+\*\*Yields:\*\*", + "\n**Yields:**", + content, + ) + content = re.sub( + r"\*\s+\*\*Arguments:\*\*", + "\n**Arguments:**", + content, + ) + content = re.sub( + r"\*\s+\*\*Throws:\*\*", + "\n**Throws:**", + content, + ) + + # Fix nested list items under Parameters/Returns etc + # Convert * * to proper nested - + content = re.sub(r"^\s*\*\s+\*\s+", " - ", content, flags=re.MULTILINE) + + # Remove orphaned list markers + content = re.sub(r"^\s*\*\s*$", "", content, flags=re.MULTILINE) + + # Clean up type annotations in returns + # Match: *type* – and convert to: (*type*) + content = re.sub( + r"\n\s+\*(\w+)\*\s*[–-]\s*\n", + r"\n - *\1*: ", + content, + ) + + # Fix "#### NOTE" / "#### WARNING" etc to be more prominent + content = re.sub(r"####\s+(NOTE|WARNING|SEE ALSO|IMPORTANT)", r"> **\1**", content) + + return content + + +def _is_api_page(filename: str, content: str) -> bool: + """Detect if a markdown file is an API documentation page. + + Args: + filename: Name of the markdown file. + content: Content of the file. + + Returns: + True if this appears to be API documentation. + """ + # Check filename patterns + api_filename_patterns = [ + "api", + "autoapi", + "reference", + ] + filename_lower = filename.lower() + if any(pattern in filename_lower for pattern in api_filename_patterns): + return True + + # Check content patterns that indicate API docs + api_content_indicators = [ + "**Parameters:**", + "* **Parameters:**", + "**Returns:**", + "* **Returns:**", + "**Raises:**", + "* **Raises:**", + "**Arguments:**", + "* **Arguments:**", + ] + + indicator_count = sum(1 for ind in api_content_indicators if ind in content) + # If multiple API-style sections, treat as API docs + return indicator_count >= 2 + + def extract_toctree_entries(content: str) -> List[Tuple[str, str]]: """Extract toctree entries from markdown content. @@ -553,6 +758,9 @@ def process_wiki_output( content = md_file.read_text(encoding="utf-8") # Clean up markdown formatting issues content = cleanup_markdown(content) + # Clean up API documentation formatting if this looks like an API page + if _is_api_page(md_file.name, content): + content = cleanup_api_docs(content) # Fix internal links for wiki fixed_content = fix_wiki_links(content, page_map) md_file.write_text(fixed_content, encoding="utf-8")