From 6ed33758ecc7588d83be502de0c6b03bfb514d12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Moreau?= Date: Tue, 17 Feb 2026 11:33:47 -0500 Subject: [PATCH 01/11] CI: allow partial publish and list skipped specs --- scripts/Build-Publish.ps1 | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/Build-Publish.ps1 b/scripts/Build-Publish.ps1 index 7e0fe6ce..811304bc 100644 --- a/scripts/Build-Publish.ps1 +++ b/scripts/Build-Publish.ps1 @@ -97,6 +97,10 @@ try { if ($missingDownloads.Count -gt 0) { if ($AllowPartial) { Write-Warning "Skipping $($missingDownloads.Count) specs with missing downloads (CDN links may be stale): $($missingDownloads -join ', ')" + Write-Host 'Skipped specs (missing downloads):' + foreach ($protocolId in $missingDownloads) { + Write-Host " - $protocolId" + } $catalog = $catalog | Where-Object { $downloadedProtocolIds.Contains($_.ProtocolId) } } else { @@ -139,7 +143,16 @@ try { $missingPublished = @($catalog | Where-Object { -not $publishedProtocolIds.Contains($_.ProtocolId) } | Select-Object -ExpandProperty ProtocolId -Unique | Sort-Object) if ($missingPublished.Count -gt 0) { - throw "Missing converted output for $($missingPublished.Count) specs: $($missingPublished -join ', ')" + if ($AllowPartial) { + Write-Warning "Skipping $($missingPublished.Count) specs with missing converted output: $($missingPublished -join ', ')" + Write-Host 'Skipped specs (missing converted output):' + foreach ($protocolId in $missingPublished) { + Write-Host " - $protocolId" + } + } + else { + throw "Missing converted output for $($missingPublished.Count) specs: $($missingPublished -join ', ')" + } } $legalSource = Join-Path (Join-Path $convPath '_legal') 'LEGAL.md' From 9e589b15b3f49c6f930f00ab92ef600c5c825bea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Moreau?= Date: Tue, 17 Feb 2026 15:06:01 -0500 Subject: [PATCH 02/11] Make section-link repair deterministic and add regression tests --- .../Invoke-OpenSpecMarkdownCleanup.ps1 | 168 ++++++++++++++---- tests/AwakeCoding.OpenSpecs.Tests.ps1 | 29 +++ 2 files changed, 165 insertions(+), 32 deletions(-) diff --git a/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1 b/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1 index 2baf01dc..2274fe20 100644 --- a/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1 +++ b/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1 @@ -1664,9 +1664,62 @@ function Repair-OpenSpecSectionGuidLinksByHeadingMatch { $newLine = [Environment]::NewLine $lineArray = $Markdown -split '\r?\n' - $titleToSection = @{} + $titleToSections = @{} + $sectionOrder = @{} $anchorIdRegex = [regex]::new('', 'IgnoreCase') + $addSectionOrder = { + param([string]$sectionId, [int]$index) + + if ([string]::IsNullOrWhiteSpace($sectionId)) { + return + } + + if (-not $sectionOrder.ContainsKey($sectionId)) { + $sectionOrder[$sectionId] = $index + } + } + + $addTitleMapping = { + param([string]$title, [string]$sectionId) + + if ([string]::IsNullOrWhiteSpace($title) -or [string]::IsNullOrWhiteSpace($sectionId)) { + return + } + + $norm = ($title -replace '\*+', '' -replace '\s+', ' ').Trim() + if ([string]::IsNullOrWhiteSpace($norm)) { + return + } + + $key = $norm.ToLowerInvariant() + if (-not $titleToSections.ContainsKey($key)) { + $titleToSections[$key] = [System.Collections.Generic.List[string]]::new() + } + + $bucket = $titleToSections[$key] + if (-not $bucket.Contains($sectionId)) { + [void]$bucket.Add($sectionId) + } + } + + # Capture document-order ranking for section anchors and numbered headings. + $orderIndex = 0 + for ($i = 0; $i -lt $lineArray.Count; $i++) { + $line = $lineArray[$i] + + if ($line -match '^\s*\s*$') { + & $addSectionOrder $Matches[1] $orderIndex + $orderIndex++ + continue + } + + if ($line -match '^\s*#{1,6}\s+(\d+(?:\.\d+)*)\s+.+$') { + & $addSectionOrder "Section_$($Matches[1])" $orderIndex + $orderIndex++ + } + } + # Collect all existing anchors $existingAnchors = [System.Collections.Generic.HashSet[string]]::new([StringComparer]::OrdinalIgnoreCase) foreach ($m in $anchorIdRegex.Matches($Markdown)) { @@ -1678,11 +1731,10 @@ function Repair-OpenSpecSectionGuidLinksByHeadingMatch { $sectionId = [string]$entry.Key $title = [string]$entry.Value if ([string]::IsNullOrWhiteSpace($sectionId) -or [string]::IsNullOrWhiteSpace($title)) { continue } - $norm = ($title -replace '\s+', ' ').Trim() - if (-not $titleToSection.ContainsKey($norm)) { $titleToSection[$norm] = $sectionId } + + & $addTitleMapping $title $sectionId $withoutNum = $title -replace '^\d+(?:\.\d+)*\s+', '' - $normWithout = ($withoutNum -replace '\s+', ' ').Trim() - if ($normWithout -and -not $titleToSection.ContainsKey($normWithout)) { $titleToSection[$normWithout] = $sectionId } + & $addTitleMapping $withoutNum $sectionId } } @@ -1694,14 +1746,11 @@ function Repair-OpenSpecSectionGuidLinksByHeadingMatch { $nextLine = if ($i + 1 -lt $lineArray.Count) { $lineArray[$i + 1].Trim() } else { '' } if ([string]::IsNullOrWhiteSpace($nextLine)) { continue } $title = if ($nextLine -match '^\s*#{1,6}\s+(?.+)$') { $Matches['title'].Trim() } else { $nextLine } - $norm = ($title -replace '\s+', ' ').Trim() - if (-not $titleToSection.ContainsKey($norm)) { $titleToSection[$norm] = $sectionId } + & $addTitleMapping $title $sectionId $withoutNum = $title -replace '^\d+(?:\.\d+)*\s+', '' - $normWithout = ($withoutNum -replace '\s+', ' ').Trim() - if ($normWithout -and -not $titleToSection.ContainsKey($normWithout)) { $titleToSection[$normWithout] = $sectionId } + & $addTitleMapping $withoutNum $sectionId $withoutParen = $title -replace '\s*\([^)]*\)\s*$', '' # "Share Control Header (TS_SHARECONTROLHEADER)" -> "Share Control Header" - $normNoParen = ($withoutParen -replace '\s+', ' ').Trim() - if ($normNoParen -and -not $titleToSection.ContainsKey($normNoParen)) { $titleToSection[$normNoParen] = $sectionId } + & $addTitleMapping $withoutParen $sectionId } } @@ -1714,36 +1763,91 @@ function Repair-OpenSpecSectionGuidLinksByHeadingMatch { $sectionNum = $hm.Groups[1].Value $sectionId = "Section_$sectionNum" $title = $hm.Groups['title'].Value.Trim() - $norm = ($title -replace '\s+', ' ').Trim() - if (-not $titleToSection.ContainsKey($norm)) { $titleToSection[$norm] = $sectionId } + & $addTitleMapping $title $sectionId $withoutNum = ($title -replace '^\d+(?:\.\d+)*\s+', '') -replace '\s*\([^)]*\)\s*$', '' - $normWithout = ($withoutNum -replace '\s+', ' ').Trim() - if ($normWithout -and -not $titleToSection.ContainsKey($normWithout)) { $titleToSection[$normWithout] = $sectionId } + & $addTitleMapping $withoutNum $sectionId } } - # Find best section for link text: exact match, prefix match, or extract "(section N.N.N)" from link text. + # Find best section for link text with deterministic tie-breaking. $findSectionForLinkText = { - param($norm, $titleToSection, $existingAnchors) - if ($titleToSection.ContainsKey($norm)) { return $titleToSection[$norm] } - # Extract section number from link text like "Share Control Header (section 2.2.8.1.1.1)" - if ($norm -match '\(section\s+(\d+(?:\.\d+)*)\)') { - $extractedId = "Section_$($Matches[1])" + param($norm, $titleToSections, $existingAnchors, $sectionOrder) + + if ([string]::IsNullOrWhiteSpace($norm)) { + return $null + } + + $normalized = ($norm -replace '\*+', '' -replace '\s+', ' ').Trim() + if ([string]::IsNullOrWhiteSpace($normalized)) { + return $null + } + + # Prefer explicit section-number references in link text. + if ($normalized -match '^(?:section\s+)?(?<num>\d+(?:\.\d+)*)$') { + $directId = "Section_$($Matches['num'])" + if ($existingAnchors.Contains($directId)) { return $directId } + } + + if ($normalized -match '\(section\s+(?<num>\d+(?:\.\d+)*)\)') { + $extractedId = "Section_$($Matches['num'])" if ($existingAnchors.Contains($extractedId)) { return $extractedId } } + + $normKey = $normalized.ToLowerInvariant() + if ($titleToSections.ContainsKey($normKey)) { + $exactSections = @($titleToSections[$normKey] | Sort-Object -Property @( + @{ Expression = { if ($sectionOrder.ContainsKey($_)) { [int]$sectionOrder[$_] } else { [int]::MaxValue } } }, + @{ Expression = { $_.Length } }, + @{ Expression = { $_ } } + )) + if ($exactSections.Count -gt 0) { + return $exactSections[0] + } + } + $candidates = @() - foreach ($key in $titleToSection.Keys) { - if ($key -eq $norm) { return $titleToSection[$key] } - if ($key.StartsWith($norm + ' ') -or $key.StartsWith($norm + '(')) { $candidates += [pscustomobject]@{ Key = $key; SectionId = $titleToSection[$key] } } - elseif ($norm.StartsWith($key + ' ') -or $norm.StartsWith($key + '(')) { $candidates += [pscustomobject]@{ Key = $key; SectionId = $titleToSection[$key] } } - elseif ($key.StartsWith($norm) -or $norm.StartsWith($key)) { $candidates += [pscustomobject]@{ Key = $key; SectionId = $titleToSection[$key] } } + foreach ($key in @($titleToSections.Keys | Sort-Object -Property @{ Expression = { $_.Length } }, @{ Expression = { $_ } })) { + $score = $null + if ($key.StartsWith($normKey + ' ') -or $key.StartsWith($normKey + '(')) { + $score = 1 + } + elseif ($normKey.StartsWith($key + ' ') -or $normKey.StartsWith($key + '(')) { + $score = 2 + } + elseif ($key.StartsWith($normKey) -or $normKey.StartsWith($key)) { + $score = 3 + } + + if ($null -eq $score) { + continue + } + + foreach ($sectionId in $titleToSections[$key]) { + $candidates += [pscustomobject]@{ + Score = $score + Key = $key + KeyLength = $key.Length + SectionId = $sectionId + } + } } + if ($candidates.Count -eq 1) { return $candidates[0].SectionId } if ($candidates.Count -gt 1) { - # Prefer shortest key (most specific match), e.g. "Status Info PDU" over "Status Info PDU Data (TS_...)" - $best = $candidates | Sort-Object -Property { $_.Key.Length } | Select-Object -First 1 + $best = $candidates | + Sort-Object -Property @( + @{ Expression = { $_.Score } }, + @{ Expression = { [Math]::Abs($_.KeyLength - $normKey.Length) } }, + @{ Expression = { $_.KeyLength } }, + @{ Expression = { if ($sectionOrder.ContainsKey($_.SectionId)) { [int]$sectionOrder[$_.SectionId] } else { [int]::MaxValue } } }, + @{ Expression = { $_.SectionId.Length } }, + @{ Expression = { $_.SectionId } }, + @{ Expression = { $_.Key } } + ) | + Select-Object -First 1 return $best.SectionId } + return $null } $guidLinkRegex = [regex]::new('\[(?<text>[^\]]+)\]\(#Section_[a-fA-F0-9]{32}\)') @@ -1751,15 +1855,15 @@ function Repair-OpenSpecSectionGuidLinksByHeadingMatch { param($m) $rawText = $m.Groups['text'].Value $norm = ($rawText -replace '\*+', '' -replace '\s+', ' ').Trim() - $sectionId = & $findSectionForLinkText $norm $titleToSection $existingAnchors - if (-not $sectionId -and $rawText.Trim() -ne $norm) { $sectionId = & $findSectionForLinkText $rawText.Trim() $titleToSection $existingAnchors } + $sectionId = & $findSectionForLinkText $norm $titleToSections $existingAnchors $sectionOrder + if (-not $sectionId -and $rawText.Trim() -ne $norm) { $sectionId = & $findSectionForLinkText $rawText.Trim() $titleToSections $existingAnchors $sectionOrder } if ($sectionId) { "[$rawText](#$sectionId)" } else { $m.Value } }) $linksRepaired = 0 foreach ($m in $guidLinkRegex.Matches($Markdown)) { $norm = ($m.Groups['text'].Value -replace '\*+', '' -replace '\s+', ' ').Trim() - $sid = & $findSectionForLinkText $norm $titleToSection $existingAnchors - if (-not $sid) { $sid = & $findSectionForLinkText $m.Groups['text'].Value.Trim() $titleToSection $existingAnchors } + $sid = & $findSectionForLinkText $norm $titleToSections $existingAnchors $sectionOrder + if (-not $sid) { $sid = & $findSectionForLinkText $m.Groups['text'].Value.Trim() $titleToSections $existingAnchors $sectionOrder } if ($sid) { $linksRepaired++ } } diff --git a/tests/AwakeCoding.OpenSpecs.Tests.ps1 b/tests/AwakeCoding.OpenSpecs.Tests.ps1 index edd53ea4..bab7c3b0 100644 --- a/tests/AwakeCoding.OpenSpecs.Tests.ps1 +++ b/tests/AwakeCoding.OpenSpecs.Tests.ps1 @@ -75,3 +75,32 @@ Describe 'Conversion report aggregation' { Remove-Item -LiteralPath $tempRoot -Recurse -Force } } + +Describe 'Section GUID link repair' { + It 'is deterministic and prefers explicit section-number targets' { + $markdown = @' +<a id="Section_2.2.1.3"></a> +## 2.2.1.3 MCS Connect Initial PDU +<a id="Section_3.2.5.3.3"></a> +## 3.2.5.3.3 MCS Connect Initial PDU +Numeric reference: [2.2.1.3](#Section_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) +Heading reference: [MCS Connect Initial PDU](#Section_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb) +'@ + + $module = Get-Module AwakeCoding.OpenSpecs -ErrorAction Stop + $results = 1..5 | ForEach-Object { + & $module { + param([string]$text) + Repair-OpenSpecSectionGuidLinksByHeadingMatch -Markdown $text + } $markdown + } + + $firstMarkdown = $results[0].Markdown + foreach ($item in $results) { + $item.Markdown | Should -Be $firstMarkdown + } + + $firstMarkdown | Should -Match '\[2\.2\.1\.3\]\(#Section_2\.2\.1\.3\)' + $firstMarkdown | Should -Match '\[MCS Connect Initial PDU\]\(#Section_2\.2\.1\.3\)' + } +} From 059ca95d5a6515a622422298e2c8e2791f2dd6f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Moreau?= <marcandre.moreau@gmail.com> Date: Tue, 17 Feb 2026 16:03:17 -0500 Subject: [PATCH 03/11] Remove dead private helpers and consolidate cleanup wrappers --- .../Private/ConvertFrom-OpenSpecDocx.ps1 | 20 ------ .../Get-OpenSpecGuidSectionMapFromLearn.ps1 | 69 ------------------- scripts/Convert-TocToGitHubFriendly.ps1 | 9 +-- scripts/Invoke-MarkdownCleanupTransform.ps1 | 47 +++++++++++++ scripts/Remove-FrontMatterBoilerplate.ps1 | 9 +-- scripts/Repair-GlossaryLinks.ps1 | 9 +-- scripts/Repair-SectionGuidLinks.ps1 | 9 +-- 7 files changed, 59 insertions(+), 113 deletions(-) delete mode 100644 AwakeCoding.OpenSpecs/Private/Get-OpenSpecGuidSectionMapFromLearn.ps1 create mode 100644 scripts/Invoke-MarkdownCleanupTransform.ps1 diff --git a/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1 b/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1 index 02f2491b..90af2646 100644 --- a/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1 +++ b/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1 @@ -912,26 +912,6 @@ function Get-OpenSpecOpenXmlParagraphAnchorInfo { } } -function Get-OpenSpecOpenXmlParagraphAnchors { - [CmdletBinding()] - param( - [Parameter(Mandatory)] - [System.Xml.XmlNode]$ParagraphNode, - - [Parameter(Mandatory)] - [System.Xml.XmlNamespaceManager]$NamespaceManager, - - [Parameter()] - [string]$ParagraphText, - - [Parameter()] - [string]$HeadingStyle - ) - - $info = Get-OpenSpecOpenXmlParagraphAnchorInfo -ParagraphNode $ParagraphNode -NamespaceManager $NamespaceManager -ParagraphText $ParagraphText -HeadingStyle $HeadingStyle - return @($info.Anchors) -} - function Get-OpenSpecOpenXmlParagraphInternalHyperlinks { [CmdletBinding()] param( diff --git a/AwakeCoding.OpenSpecs/Private/Get-OpenSpecGuidSectionMapFromLearn.ps1 b/AwakeCoding.OpenSpecs/Private/Get-OpenSpecGuidSectionMapFromLearn.ps1 deleted file mode 100644 index 6ef500c7..00000000 --- a/AwakeCoding.OpenSpecs/Private/Get-OpenSpecGuidSectionMapFromLearn.ps1 +++ /dev/null @@ -1,69 +0,0 @@ -<# -.SYNOPSIS - Builds a GUID-to-section map by fetching section pages from Microsoft Learn. -.DESCRIPTION - For Open Specs that have GuidToSection=0 from DOCX conversion (e.g. MS-RDPBCGR), - fetches each section page from Learn (openspecs/windows_protocols/protocolId/{guid-with-hyphens}), - parses the H1 for the section number (e.g. "2.2.1.4 Server MCS Connect Response PDU..."), - and returns a hashtable: guid_no_hyphens -> Section_N.N. -.PARAMETER ProtocolId - Protocol ID (e.g. MS-RDPBCGR). -.PARAMETER Guids - Array of 32-character hex GUIDs (no hyphens) to resolve. -.PARAMETER ThrottleSeconds - Delay between HTTP requests to avoid overloading Learn. Default 1. -.OUTPUTS - Hashtable: lowercase guid (no hyphens) -> Section_N.N -#> -function Get-OpenSpecGuidSectionMapFromLearn { - [CmdletBinding()] - param( - [Parameter(Mandatory)] - [string]$ProtocolId, - - [Parameter(Mandatory)] - [string[]]$Guids, - - [Parameter()] - [int]$ThrottleSeconds = 1 - ) - - $ErrorActionPreference = 'Stop' - - function ConvertTo-HyphenatedGuid { - param([string]$Hex32) - if ($Hex32.Length -ne 32) { return $null } - $Hex32.Substring(0, 8) + '-' + $Hex32.Substring(8, 4) + '-' + $Hex32.Substring(12, 4) + '-' + $Hex32.Substring(16, 4) + '-' + $Hex32.Substring(20, 12) - } - - $baseUrl = "https://learn.microsoft.com/en-us/openspecs/windows_protocols/$($ProtocolId.ToLowerInvariant())" - $map = @{} - $uniqueGuids = @($Guids | ForEach-Object { $_.ToLowerInvariant() } | Select-Object -Unique) - $total = $uniqueGuids.Count - $resolved = 0 - - foreach ($i in 0..($uniqueGuids.Count - 1)) { - $guidHex = $uniqueGuids[$i] - $guidHyphenated = ConvertTo-HyphenatedGuid -Hex32 $guidHex - if (-not $guidHyphenated) { continue } - $url = "$baseUrl/$guidHyphenated" - try { - $response = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 15 -ErrorAction Stop - $html = $response.Content - if ($html -match '<h1[^>]*>\s*(\d+(?:\.\d+)*)\s+' -or $html -match '(?:^|\n)#\s+(\d+(?:\.\d+)*)\s+') { - $sectionNum = $Matches[1] - $map[$guidHex] = "Section_$sectionNum" - $resolved++ - } - } - catch { - Write-Verbose "Failed to fetch $url : $_" - } - if ($ThrottleSeconds -gt 0 -and $i -lt $uniqueGuids.Count - 1) { - Start-Sleep -Seconds $ThrottleSeconds - } - } - - Write-Verbose "Resolved $resolved / $total GUIDs from Learn" - $map -} diff --git a/scripts/Convert-TocToGitHubFriendly.ps1 b/scripts/Convert-TocToGitHubFriendly.ps1 index 5da299d9..dc15351b 100644 --- a/scripts/Convert-TocToGitHubFriendly.ps1 +++ b/scripts/Convert-TocToGitHubFriendly.ps1 @@ -1,12 +1,9 @@ # Run ConvertTo-OpenSpecGitHubFriendlyToc on a spec file. param([Parameter(Mandatory)][string]$Path) $ErrorActionPreference = 'Stop' -$repoRoot = (Get-Item $PSScriptRoot).Parent.FullName -. (Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private\Invoke-OpenSpecMarkdownCleanup.ps1') -$md = Get-Content -LiteralPath $Path -Raw -Encoding UTF8 -$r = ConvertTo-OpenSpecGitHubFriendlyToc -Markdown $md +. (Join-Path $PSScriptRoot 'Invoke-MarkdownCleanupTransform.ps1') +$r = Invoke-MarkdownCleanupTransform -Path $Path -TransformFunction 'ConvertTo-OpenSpecGitHubFriendlyToc' Write-Host 'Rewritten:' $r.Rewritten if ($r.Rewritten) { - Set-Content -LiteralPath $Path -Value $r.Markdown -Encoding UTF8 -NoNewline - Write-Host 'File updated.' + Save-MarkdownCleanupTransformResult -Path $Path -Markdown $r.Markdown } diff --git a/scripts/Invoke-MarkdownCleanupTransform.ps1 b/scripts/Invoke-MarkdownCleanupTransform.ps1 new file mode 100644 index 00000000..f617fff3 --- /dev/null +++ b/scripts/Invoke-MarkdownCleanupTransform.ps1 @@ -0,0 +1,47 @@ +function Invoke-MarkdownCleanupTransform { + [CmdletBinding()] + param( + [Parameter(Mandatory)] + [string]$Path, + + [Parameter(Mandatory)] + [string]$TransformFunction + ) + + $ErrorActionPreference = 'Stop' + + $fullPath = [System.IO.Path]::GetFullPath((Resolve-Path -LiteralPath $Path).Path) + if (-not (Test-Path -LiteralPath $fullPath -PathType Leaf)) { + throw "File not found: $fullPath" + } + + $repoRoot = (Get-Item $PSScriptRoot).Parent.FullName + $cleanupScript = Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private\Invoke-OpenSpecMarkdownCleanup.ps1' + if (-not (Test-Path -LiteralPath $cleanupScript -PathType Leaf)) { + throw "Cleanup script not found: $cleanupScript" + } + + . $cleanupScript + + if (-not (Get-Command -Name $TransformFunction -CommandType Function -ErrorAction SilentlyContinue)) { + throw "Transform function not found after loading cleanup script: $TransformFunction" + } + + $markdown = Get-Content -LiteralPath $fullPath -Raw -Encoding UTF8 + & $TransformFunction -Markdown $markdown +} + +function Save-MarkdownCleanupTransformResult { + [CmdletBinding()] + param( + [Parameter(Mandatory)] + [string]$Path, + + [Parameter(Mandatory)] + [string]$Markdown + ) + + $fullPath = [System.IO.Path]::GetFullPath((Resolve-Path -LiteralPath $Path).Path) + Set-Content -LiteralPath $fullPath -Value $Markdown -Encoding UTF8 -NoNewline + Write-Host 'File updated.' +} diff --git a/scripts/Remove-FrontMatterBoilerplate.ps1 b/scripts/Remove-FrontMatterBoilerplate.ps1 index 9cc3531f..d68698d4 100644 --- a/scripts/Remove-FrontMatterBoilerplate.ps1 +++ b/scripts/Remove-FrontMatterBoilerplate.ps1 @@ -1,12 +1,9 @@ # One-off: run Remove-OpenSpecFrontMatterBoilerplate on a spec file. param([Parameter(Mandatory)][string]$Path) $ErrorActionPreference = 'Stop' -$repoRoot = (Get-Item $PSScriptRoot).Parent.FullName -. (Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private\Invoke-OpenSpecMarkdownCleanup.ps1') -$md = Get-Content -LiteralPath $Path -Raw -Encoding UTF8 -$r = Remove-OpenSpecFrontMatterBoilerplate -Markdown $md +. (Join-Path $PSScriptRoot 'Invoke-MarkdownCleanupTransform.ps1') +$r = Invoke-MarkdownCleanupTransform -Path $Path -TransformFunction 'Remove-OpenSpecFrontMatterBoilerplate' Write-Host 'Removed:' $r.Removed if ($r.Removed) { - Set-Content -LiteralPath $Path -Value $r.Markdown -Encoding UTF8 -NoNewline - Write-Host 'File updated.' + Save-MarkdownCleanupTransformResult -Path $Path -Markdown $r.Markdown } diff --git a/scripts/Repair-GlossaryLinks.ps1 b/scripts/Repair-GlossaryLinks.ps1 index dada2c32..98f27da2 100644 --- a/scripts/Repair-GlossaryLinks.ps1 +++ b/scripts/Repair-GlossaryLinks.ps1 @@ -1,12 +1,9 @@ # Run Add-OpenSpecGlossaryAnchorsAndRepairLinks on a spec file to fix gt_ GUID links. param([Parameter(Mandatory)][string]$Path) $ErrorActionPreference = 'Stop' -$repoRoot = (Get-Item $PSScriptRoot).Parent.FullName -. (Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private\Invoke-OpenSpecMarkdownCleanup.ps1') -$md = Get-Content -LiteralPath $Path -Raw -Encoding UTF8 -$r = Add-OpenSpecGlossaryAnchorsAndRepairLinks -Markdown $md +. (Join-Path $PSScriptRoot 'Invoke-MarkdownCleanupTransform.ps1') +$r = Invoke-MarkdownCleanupTransform -Path $Path -TransformFunction 'Add-OpenSpecGlossaryAnchorsAndRepairLinks' Write-Host 'AnchorsInjected:' $r.AnchorsInjected 'LinksRepaired:' $r.LinksRepaired if ($r.LinksRepaired -gt 0) { - Set-Content -LiteralPath $Path -Value $r.Markdown -Encoding UTF8 -NoNewline - Write-Host 'File updated.' + Save-MarkdownCleanupTransformResult -Path $Path -Markdown $r.Markdown } diff --git a/scripts/Repair-SectionGuidLinks.ps1 b/scripts/Repair-SectionGuidLinks.ps1 index 50c7b98a..f8ef028b 100644 --- a/scripts/Repair-SectionGuidLinks.ps1 +++ b/scripts/Repair-SectionGuidLinks.ps1 @@ -1,12 +1,9 @@ # Run Repair-OpenSpecSectionGuidLinksByHeadingMatch on a spec file. param([Parameter(Mandatory)][string]$Path) $ErrorActionPreference = 'Stop' -$repoRoot = (Get-Item $PSScriptRoot).Parent.FullName -. (Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private\Invoke-OpenSpecMarkdownCleanup.ps1') -$md = Get-Content -LiteralPath $Path -Raw -Encoding UTF8 -$r = Repair-OpenSpecSectionGuidLinksByHeadingMatch -Markdown $md +. (Join-Path $PSScriptRoot 'Invoke-MarkdownCleanupTransform.ps1') +$r = Invoke-MarkdownCleanupTransform -Path $Path -TransformFunction 'Repair-OpenSpecSectionGuidLinksByHeadingMatch' Write-Host 'LinksRepaired:' $r.LinksRepaired if ($r.LinksRepaired -gt 0) { - Set-Content -LiteralPath $Path -Value $r.Markdown -Encoding UTF8 -NoNewline - Write-Host 'File updated.' + Save-MarkdownCleanupTransformResult -Path $Path -Markdown $r.Markdown } From ed775081f794e9eb54b4b884484e7741357378b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Moreau?= <marcandre.moreau@gmail.com> Date: Tue, 17 Feb 2026 16:06:23 -0500 Subject: [PATCH 04/11] Use shared helper in Repair-MissingSectionAnchors --- scripts/Repair-MissingSectionAnchors.ps1 | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/scripts/Repair-MissingSectionAnchors.ps1 b/scripts/Repair-MissingSectionAnchors.ps1 index cdc7fdb1..2e5515e0 100644 --- a/scripts/Repair-MissingSectionAnchors.ps1 +++ b/scripts/Repair-MissingSectionAnchors.ps1 @@ -14,22 +14,11 @@ param( ) $ErrorActionPreference = 'Stop' -$fullPath = [System.IO.Path]::GetFullPath($Path) -if (-not (Test-Path -LiteralPath $fullPath -PathType Leaf)) { - Write-Error "File not found: $fullPath" -} - -$repoRoot = (Get-Item $PSScriptRoot).Parent.FullName -$privateScript = Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private\Invoke-OpenSpecMarkdownCleanup.ps1' -if (-not (Test-Path -LiteralPath $privateScript -PathType Leaf)) { - Write-Error "Cleanup script not found: $privateScript" -} - -. $privateScript -$markdown = Get-Content -LiteralPath $fullPath -Raw -Encoding UTF8 -$result = Add-OpenSpecMissingSectionAnchorsFromToc -Markdown $markdown +. (Join-Path $PSScriptRoot 'Invoke-MarkdownCleanupTransform.ps1') +$result = Invoke-MarkdownCleanupTransform -Path $Path -TransformFunction 'Add-OpenSpecMissingSectionAnchorsFromToc' if ($result.InjectedCount -gt 0) { - $result.Markdown | Set-Content -LiteralPath $fullPath -Encoding UTF8 -NoNewline + Save-MarkdownCleanupTransformResult -Path $Path -Markdown $result.Markdown + $fullPath = [System.IO.Path]::GetFullPath((Resolve-Path -LiteralPath $Path).Path) Write-Host "Injected $($result.InjectedCount) missing section anchor(s). File updated: $fullPath" } else { Write-Host "No missing section anchors to inject." From 66494cbd9591a2a1b55615390972a4bc65b7d947 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Moreau?= <marcandre.moreau@gmail.com> Date: Tue, 17 Feb 2026 16:09:38 -0500 Subject: [PATCH 05/11] Deduplicate download retry logic --- .../Invoke-OpenSpecWebDownloadWithRetry.ps1 | 38 ++++++++++++++++ .../Public/Save-OpenSpecDocument.ps1 | 45 +++---------------- 2 files changed, 44 insertions(+), 39 deletions(-) create mode 100644 AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecWebDownloadWithRetry.ps1 diff --git a/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecWebDownloadWithRetry.ps1 b/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecWebDownloadWithRetry.ps1 new file mode 100644 index 00000000..5b29c843 --- /dev/null +++ b/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecWebDownloadWithRetry.ps1 @@ -0,0 +1,38 @@ +function Invoke-OpenSpecWebDownloadWithRetry { + [CmdletBinding()] + param( + [Parameter(Mandatory)] + [string]$Uri, + + [Parameter(Mandatory)] + [string]$OutFile, + + [Parameter()] + [int]$MaxRetries = 4, + + [Parameter()] + [int]$InitialDelaySeconds = 1 + ) + + $attempt = 0 + $delay = [Math]::Max(1, $InitialDelaySeconds) + while ($true) { + $attempt++ + try { + Invoke-WebRequest -Uri $Uri -OutFile $OutFile -MaximumRedirection 8 -ErrorAction Stop + return + } + catch { + $statusCode = $null + if ($_.Exception.Response -and $_.Exception.Response.StatusCode) { + $statusCode = [int]$_.Exception.Response.StatusCode + } + $transient = ($statusCode -in 429, 500, 502, 503, 504) -or (-not $statusCode) + if ($attempt -ge $MaxRetries -or -not $transient) { + throw + } + Start-Sleep -Seconds $delay + $delay = [Math]::Min($delay * 2, 16) + } + } +} diff --git a/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 b/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 index 342d85b1..d8e9f1e9 100644 --- a/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 +++ b/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 @@ -114,26 +114,7 @@ function Save-OpenSpecDocument { $downloadOne = { param($link, $destination) try { - $attempt = 0 - $maxRetries = 4 - $delay = 1 - while ($true) { - $attempt++ - try { - Invoke-WebRequest -Uri $link.Url -OutFile $destination -MaximumRedirection 8 -ErrorAction Stop - break - } - catch { - $statusCode = $null - if ($_.Exception.Response -and $_.Exception.Response.StatusCode) { - $statusCode = [int]$_.Exception.Response.StatusCode - } - $transient = ($statusCode -in 429, 500, 502, 503, 504) -or (-not $statusCode) - if ($attempt -ge $maxRetries -or -not $transient) { throw } - Start-Sleep -Seconds $delay - $delay = [Math]::Min($delay * 2, 16) - } - } + Invoke-OpenSpecWebDownloadWithRetry -Uri $link.Url -OutFile $destination [pscustomobject]@{ PSTypeName = 'AwakeCoding.OpenSpecs.DownloadResult' ProtocolId = $link.ProtocolId @@ -181,32 +162,18 @@ function Save-OpenSpecDocument { return $result } + $moduleBase = (Get-Module -Name 'AwakeCoding.OpenSpecs' | Select-Object -First 1).ModuleBase $useParallel = $Parallel -and $PSVersionTable.PSVersion.Major -ge 7 -and $toDownload.Count -gt 1 $results = if ($useParallel) { $toDownload | ForEach-Object -Parallel { $link = $_.Link $destination = $_.Destination try { - $attempt = 0 - $maxRetries = 4 - $delay = 1 - while ($true) { - $attempt++ - try { - Invoke-WebRequest -Uri $link.Url -OutFile $destination -MaximumRedirection 8 -ErrorAction Stop - break - } - catch { - $statusCode = $null - if ($_.Exception.Response -and $_.Exception.Response.StatusCode) { - $statusCode = [int]$_.Exception.Response.StatusCode - } - $transient = ($statusCode -in 429, 500, 502, 503, 504) -or (-not $statusCode) - if ($attempt -ge $maxRetries -or -not $transient) { throw } - Start-Sleep -Seconds $delay - $delay = [Math]::Min($delay * 2, 16) - } + $currentModule = Get-Module -Name 'AwakeCoding.OpenSpecs' | Select-Object -First 1 + if (-not $currentModule -and $using:moduleBase) { + Import-Module (Join-Path -Path $using:moduleBase -ChildPath 'AwakeCoding.OpenSpecs.psd1') -Force | Out-Null } + Invoke-OpenSpecWebDownloadWithRetry -Uri $link.Url -OutFile $destination [pscustomobject]@{ PSTypeName = 'AwakeCoding.OpenSpecs.DownloadResult' ProtocolId = $link.ProtocolId From bc9f07f390dbbe257c3f6a75e3c4a8e4ed7b24fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Moreau?= <marcandre.moreau@gmail.com> Date: Tue, 17 Feb 2026 16:11:22 -0500 Subject: [PATCH 06/11] Reuse Invoke-OpenSpecRequest for download retries --- .../Private/Invoke-OpenSpecRequest.ps1 | 14 ++++++- .../Invoke-OpenSpecWebDownloadWithRetry.ps1 | 38 ------------------- .../Public/Save-OpenSpecDocument.ps1 | 4 +- 3 files changed, 15 insertions(+), 41 deletions(-) delete mode 100644 AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecWebDownloadWithRetry.ps1 diff --git a/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecRequest.ps1 b/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecRequest.ps1 index 1743f979..7cfe93ff 100644 --- a/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecRequest.ps1 +++ b/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecRequest.ps1 @@ -4,6 +4,8 @@ function Invoke-OpenSpecRequest { [Parameter(Mandatory)] [string]$Uri, + [string]$OutFile, + [int]$MaxRetries = 4, [int]$InitialDelaySeconds = 1, @@ -22,7 +24,17 @@ function Invoke-OpenSpecRequest { while ($true) { $attempt++ try { - return Invoke-WebRequest -Uri $Uri -Headers $headers -MaximumRedirection 8 -TimeoutSec $TimeoutSec -ErrorAction Stop + $requestParams = @{ + Uri = $Uri + Headers = $headers + MaximumRedirection = 8 + TimeoutSec = $TimeoutSec + ErrorAction = 'Stop' + } + if ($OutFile) { + $requestParams.OutFile = $OutFile + } + return Invoke-WebRequest @requestParams } catch { $statusCode = $null diff --git a/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecWebDownloadWithRetry.ps1 b/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecWebDownloadWithRetry.ps1 deleted file mode 100644 index 5b29c843..00000000 --- a/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecWebDownloadWithRetry.ps1 +++ /dev/null @@ -1,38 +0,0 @@ -function Invoke-OpenSpecWebDownloadWithRetry { - [CmdletBinding()] - param( - [Parameter(Mandatory)] - [string]$Uri, - - [Parameter(Mandatory)] - [string]$OutFile, - - [Parameter()] - [int]$MaxRetries = 4, - - [Parameter()] - [int]$InitialDelaySeconds = 1 - ) - - $attempt = 0 - $delay = [Math]::Max(1, $InitialDelaySeconds) - while ($true) { - $attempt++ - try { - Invoke-WebRequest -Uri $Uri -OutFile $OutFile -MaximumRedirection 8 -ErrorAction Stop - return - } - catch { - $statusCode = $null - if ($_.Exception.Response -and $_.Exception.Response.StatusCode) { - $statusCode = [int]$_.Exception.Response.StatusCode - } - $transient = ($statusCode -in 429, 500, 502, 503, 504) -or (-not $statusCode) - if ($attempt -ge $MaxRetries -or -not $transient) { - throw - } - Start-Sleep -Seconds $delay - $delay = [Math]::Min($delay * 2, 16) - } - } -} diff --git a/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 b/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 index d8e9f1e9..0943d8b8 100644 --- a/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 +++ b/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 @@ -114,7 +114,7 @@ function Save-OpenSpecDocument { $downloadOne = { param($link, $destination) try { - Invoke-OpenSpecWebDownloadWithRetry -Uri $link.Url -OutFile $destination + Invoke-OpenSpecRequest -Uri $link.Url -OutFile $destination | Out-Null [pscustomobject]@{ PSTypeName = 'AwakeCoding.OpenSpecs.DownloadResult' ProtocolId = $link.ProtocolId @@ -173,7 +173,7 @@ function Save-OpenSpecDocument { if (-not $currentModule -and $using:moduleBase) { Import-Module (Join-Path -Path $using:moduleBase -ChildPath 'AwakeCoding.OpenSpecs.psd1') -Force | Out-Null } - Invoke-OpenSpecWebDownloadWithRetry -Uri $link.Url -OutFile $destination + Invoke-OpenSpecRequest -Uri $link.Url -OutFile $destination | Out-Null [pscustomobject]@{ PSTypeName = 'AwakeCoding.OpenSpecs.DownloadResult' ProtocolId = $link.ProtocolId From 5f4db3f9bf5cc5985792fab48e22683eaf07dedb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Moreau?= <marcandre.moreau@gmail.com> Date: Tue, 17 Feb 2026 16:11:57 -0500 Subject: [PATCH 07/11] Make DOCX fallback order-independent in parallel --- AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 b/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 index 0943d8b8..3d1ee068 100644 --- a/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 +++ b/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 @@ -206,12 +206,10 @@ function Save-OpenSpecDocument { # Retry failed DOCX via RSS fallback URLs (e.g. MS-THCH, MS-MQOD with stale Learn-page links) $downloadResults = New-Object System.Collections.Generic.List[object] - $i = 0 foreach ($r in @($results)) { - $dest = $toDownload[$i].Destination + $dest = $r.Path $r = & $tryDocxFallback -result $r -destination $dest [void]$downloadResults.Add($r) - $i++ } $existsResults From 601b20a1a1565ea261286c8dea19368f6c77839b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Moreau?= <marcandre.moreau@gmail.com> Date: Tue, 17 Feb 2026 16:15:59 -0500 Subject: [PATCH 08/11] Deduplicate per-file logic in Repair-AllBrokenLinks --- scripts/Invoke-RepairAllBrokenLinksFile.ps1 | 36 +++++++++++++++++ scripts/Repair-AllBrokenLinks.ps1 | 44 ++++++--------------- 2 files changed, 48 insertions(+), 32 deletions(-) create mode 100644 scripts/Invoke-RepairAllBrokenLinksFile.ps1 diff --git a/scripts/Invoke-RepairAllBrokenLinksFile.ps1 b/scripts/Invoke-RepairAllBrokenLinksFile.ps1 new file mode 100644 index 00000000..14974616 --- /dev/null +++ b/scripts/Invoke-RepairAllBrokenLinksFile.ps1 @@ -0,0 +1,36 @@ +function Invoke-RepairAllBrokenLinksFile { + [CmdletBinding()] + param( + [Parameter(Mandatory)] + [string]$Path, + + [Parameter(Mandatory)] + [string]$CleanupPath, + + [switch]$WhatIf + ) + + if (-not (Get-Command -Name 'Repair-OpenSpecSectionGuidLinksByHeadingMatch' -CommandType Function -ErrorAction SilentlyContinue)) { + . $CleanupPath + } + + $content = Get-Content -LiteralPath $Path -Raw -Encoding UTF8 + $sectionResult = Repair-OpenSpecSectionGuidLinksByHeadingMatch -Markdown $content + $content = $sectionResult.Markdown + + $glossaryResult = Add-OpenSpecGlossaryAnchorsAndRepairLinks -Markdown $content + $content = $glossaryResult.Markdown + + $changed = ($sectionResult.LinksRepaired -gt 0) -or ($glossaryResult.AnchorsInjected -gt 0) -or ($glossaryResult.LinksRepaired -gt 0) + if ($changed -and -not $WhatIf) { + Set-Content -LiteralPath $Path -Value $content -Encoding UTF8 -NoNewline + } + + [pscustomobject]@{ + SectionRepaired = $sectionResult.LinksRepaired + GlossaryRepaired = $glossaryResult.LinksRepaired + AnchorsInjected = $glossaryResult.AnchorsInjected + Updated = $changed -and -not $WhatIf + SpecName = [System.IO.Path]::GetFileName([System.IO.Path]::GetDirectoryName($Path)) + } +} diff --git a/scripts/Repair-AllBrokenLinks.ps1 b/scripts/Repair-AllBrokenLinks.ps1 index 1b4aa72c..8b85f4ee 100644 --- a/scripts/Repair-AllBrokenLinks.ps1 +++ b/scripts/Repair-AllBrokenLinks.ps1 @@ -22,7 +22,11 @@ $cleanupPath = Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private\Invoke-OpenSpe if (-not (Test-Path -LiteralPath $cleanupPath)) { Write-Error "Cleanup script not found: $cleanupPath" } -. $cleanupPath +$helperPath = Join-Path $PSScriptRoot 'Invoke-RepairAllBrokenLinksFile.ps1' +if (-not (Test-Path -LiteralPath $helperPath)) { + Write-Error "Helper script not found: $helperPath" +} +. $helperPath $resolved = [System.IO.Path]::GetFullPath($Path) if (-not (Test-Path -LiteralPath $resolved -PathType Container)) { @@ -42,24 +46,8 @@ $whatIfArg = $WhatIf if ($useParallel) { $results = $specFiles | ForEach-Object -Parallel { - . $using:cleanupPath - $mdPath = $_ - $content = Get-Content -LiteralPath $mdPath -Raw -Encoding UTF8 - $sectionResult = Repair-OpenSpecSectionGuidLinksByHeadingMatch -Markdown $content - $content = $sectionResult.Markdown - $glossaryResult = Add-OpenSpecGlossaryAnchorsAndRepairLinks -Markdown $content - $content = $glossaryResult.Markdown - $changed = ($sectionResult.LinksRepaired -gt 0) -or ($glossaryResult.AnchorsInjected -gt 0) -or ($glossaryResult.LinksRepaired -gt 0) - if ($changed -and -not $using:whatIfArg) { - Set-Content -LiteralPath $mdPath -Value $content -Encoding UTF8 -NoNewline - } - [pscustomobject]@{ - SectionRepaired = $sectionResult.LinksRepaired - GlossaryRepaired = $glossaryResult.LinksRepaired - AnchorsInjected = $glossaryResult.AnchorsInjected - Updated = $changed -and -not $using:whatIfArg - SpecName = [System.IO.Path]::GetFileName([System.IO.Path]::GetDirectoryName($mdPath)) - } + . $using:helperPath + Invoke-RepairAllBrokenLinksFile -Path $_ -CleanupPath $using:cleanupPath -WhatIf:$using:whatIfArg } -ThrottleLimit $ThrottleLimit $totalSection = ($results | Measure-Object -Property SectionRepaired -Sum).Sum @@ -74,21 +62,13 @@ else { $totalGlossary = 0 $updated = 0 foreach ($mdPath in $specFiles) { - $content = Get-Content -LiteralPath $mdPath -Raw -Encoding UTF8 - $sectionResult = Repair-OpenSpecSectionGuidLinksByHeadingMatch -Markdown $content - $content = $sectionResult.Markdown - $totalSection += $sectionResult.LinksRepaired - - $glossaryResult = Add-OpenSpecGlossaryAnchorsAndRepairLinks -Markdown $content - $content = $glossaryResult.Markdown - $totalGlossary += $glossaryResult.LinksRepaired + $result = Invoke-RepairAllBrokenLinksFile -Path $mdPath -CleanupPath $cleanupPath -WhatIf:$WhatIf + $totalSection += $result.SectionRepaired + $totalGlossary += $result.GlossaryRepaired - $changed = ($sectionResult.LinksRepaired -gt 0) -or ($glossaryResult.AnchorsInjected -gt 0) -or ($glossaryResult.LinksRepaired -gt 0) - if ($changed -and -not $WhatIf) { - Set-Content -LiteralPath $mdPath -Value $content -Encoding UTF8 -NoNewline + if ($result.Updated) { $updated++ - $rel = [System.IO.Path]::GetFileName([System.IO.Path]::GetDirectoryName($mdPath)) - Write-Host "Updated: $rel (Section:$($sectionResult.LinksRepaired) Glossary:$($glossaryResult.LinksRepaired)+$($glossaryResult.AnchorsInjected))" + Write-Host "Updated: $($result.SpecName) (Section:$($result.SectionRepaired) Glossary:$($result.GlossaryRepaired)+$($result.AnchorsInjected))" } } } From f2b800106a49fbfaaec8079c1fa7fb715f957e33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Moreau?= <marcandre.moreau@gmail.com> Date: Tue, 17 Feb 2026 16:44:36 -0500 Subject: [PATCH 09/11] Fix parallel download helper visibility --- AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 b/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 index 3d1ee068..db900770 100644 --- a/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 +++ b/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 @@ -163,15 +163,18 @@ function Save-OpenSpecDocument { } $moduleBase = (Get-Module -Name 'AwakeCoding.OpenSpecs' | Select-Object -First 1).ModuleBase + $openSpecRequestPath = if ($moduleBase) { Join-Path -Path $moduleBase -ChildPath 'Private\Invoke-OpenSpecRequest.ps1' } else { $null } $useParallel = $Parallel -and $PSVersionTable.PSVersion.Major -ge 7 -and $toDownload.Count -gt 1 $results = if ($useParallel) { $toDownload | ForEach-Object -Parallel { $link = $_.Link $destination = $_.Destination try { - $currentModule = Get-Module -Name 'AwakeCoding.OpenSpecs' | Select-Object -First 1 - if (-not $currentModule -and $using:moduleBase) { - Import-Module (Join-Path -Path $using:moduleBase -ChildPath 'AwakeCoding.OpenSpecs.psd1') -Force | Out-Null + if (-not (Get-Command -Name 'Invoke-OpenSpecRequest' -CommandType Function -ErrorAction SilentlyContinue)) { + if (-not $using:openSpecRequestPath -or -not (Test-Path -LiteralPath $using:openSpecRequestPath -PathType Leaf)) { + throw 'Invoke-OpenSpecRequest helper script not found for parallel download.' + } + . $using:openSpecRequestPath } Invoke-OpenSpecRequest -Uri $link.Url -OutFile $destination | Out-Null [pscustomobject]@{ From 815bcabdf3dceacd2975a9339bd47e2c204dc199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Moreau?= <marcandre.moreau@gmail.com> Date: Tue, 17 Feb 2026 16:48:03 -0500 Subject: [PATCH 10/11] Drop PDF source conversion path --- AGENTS.md | 2 +- .../Private/ConvertFrom-OpenSpecPdf.ps1 | 56 ------------------- .../Private/Get-OpenSpecToolchain.ps1 | 5 -- .../Public/Convert-OpenSpecToMarkdown.ps1 | 13 ++--- .../Invoke-OpenSpecConversionPipeline.ps1 | 7 ++- 5 files changed, 12 insertions(+), 71 deletions(-) delete mode 100644 AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecPdf.ps1 diff --git a/AGENTS.md b/AGENTS.md index bd2b6fde..c42e8ab7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -60,5 +60,5 @@ When you add a new exported function, add its name to the `$expected` array in t ## Project-specific rules - Do not remove or rename exported functions without updating `AwakeCoding.OpenSpecs.psd1` and the exports test. -- Conversion: DOCX is handled in-module via OpenXML; PDF uses external `docling` or `markitdown` when available (see `AwakeCoding.OpenSpecs/Private/Get-OpenSpecToolchain.ps1`). Output is textual (tables, ASCII), not image-based. +- Conversion: DOCX is handled in-module via OpenXML. PDF is not used as a conversion source. Output is textual (tables, ASCII), not image-based. - For bulk or CI conversions, use `-Parallel -ThrottleLimit N` with `Convert-OpenSpecToMarkdown` or `Invoke-OpenSpecConversionPipeline` (PowerShell 7 only). diff --git a/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecPdf.ps1 b/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecPdf.ps1 deleted file mode 100644 index 1d49efd4..00000000 --- a/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecPdf.ps1 +++ /dev/null @@ -1,56 +0,0 @@ -function ConvertFrom-OpenSpecPdf { - [CmdletBinding()] - param( - [Parameter(Mandatory)] - [string]$InputPath, - - [Parameter(Mandatory)] - [string]$OutputPath, - - [Parameter(Mandatory)] - [object]$Toolchain - ) - - $outputDirectory = Split-Path -Path $OutputPath -Parent - if (-not (Test-Path -LiteralPath $outputDirectory)) { - [void](New-Item -Path $outputDirectory -ItemType Directory -Force) - } - - $notes = New-Object System.Collections.Generic.List[string] - - if ($Toolchain.HasDocling) { - $doclingArguments = @('--to', 'md', '--output', $outputDirectory, $InputPath) - & $Toolchain.DoclingPath @doclingArguments - if ($LASTEXITCODE -eq 0) { - $candidate = Join-Path -Path $outputDirectory -ChildPath ("{0}.md" -f [System.IO.Path]::GetFileNameWithoutExtension($InputPath)) - if (Test-Path -LiteralPath $candidate) { - Move-Item -LiteralPath $candidate -Destination $OutputPath -Force - return [pscustomobject]@{ - PSTypeName = 'AwakeCoding.OpenSpecs.ConversionStep' - Strategy = 'docling-pdf' - OutputPath = $OutputPath - Notes = @('Converted with docling CLI.') - } - } - } - - $notes.Add('docling was detected but did not produce expected markdown output.') - } - - if ($Toolchain.HasMarkItDown) { - $markitdownArguments = @($InputPath, '--output', $OutputPath) - & $Toolchain.MarkItDownPath @markitdownArguments - if ($LASTEXITCODE -eq 0 -and (Test-Path -LiteralPath $OutputPath)) { - return [pscustomobject]@{ - PSTypeName = 'AwakeCoding.OpenSpecs.ConversionStep' - Strategy = 'markitdown-pdf' - OutputPath = $OutputPath - Notes = @('Converted with markitdown CLI.') - } - } - - $notes.Add('markitdown was detected but conversion failed.') - } - - throw ("Unable to convert PDF '{0}' to Markdown. {1}" -f $InputPath, ($notes -join ' ')) -} diff --git a/AwakeCoding.OpenSpecs/Private/Get-OpenSpecToolchain.ps1 b/AwakeCoding.OpenSpecs/Private/Get-OpenSpecToolchain.ps1 index 2805282f..14619e9b 100644 --- a/AwakeCoding.OpenSpecs/Private/Get-OpenSpecToolchain.ps1 +++ b/AwakeCoding.OpenSpecs/Private/Get-OpenSpecToolchain.ps1 @@ -1,7 +1,6 @@ function Get-OpenSpecToolchain { [CmdletBinding()] param( - [switch]$RequirePdfConverter, [switch]$RequireDocxConverter ) @@ -22,10 +21,6 @@ function Get-OpenSpecToolchain { HasOpenXml = $null -ne $openXmlModule } - if ($RequirePdfConverter -and -not ($toolchain.HasDocling -or $toolchain.HasMarkItDown)) { - throw 'No PDF converter detected. Install docling or markitdown.' - } - if ($RequireDocxConverter -and -not $toolchain.HasOpenXml) { throw 'No DOCX converter detected. Install the OpenXML PowerShell module.' } diff --git a/AwakeCoding.OpenSpecs/Public/Convert-OpenSpecToMarkdown.ps1 b/AwakeCoding.OpenSpecs/Public/Convert-OpenSpecToMarkdown.ps1 index 2073096a..c505f767 100644 --- a/AwakeCoding.OpenSpecs/Public/Convert-OpenSpecToMarkdown.ps1 +++ b/AwakeCoding.OpenSpecs/Public/Convert-OpenSpecToMarkdown.ps1 @@ -8,7 +8,7 @@ function Convert-OpenSpecToMarkdown { [string]$OutputPath = (Join-Path -Path (Get-Location) -ChildPath 'converted-specs'), - [ValidateSet('Auto', 'DOCX', 'PDF')] + [ValidateSet('Auto', 'DOCX')] [string]$SourceFormat = 'Auto', [switch]$Force, @@ -84,6 +84,11 @@ function Convert-OpenSpecToMarkdown { $SourceFormat } + if ($resolvedFormat -eq 'PDF') { + Write-Error "PDF source conversion is not supported. Use DOCX input for '$sourcePath'." + continue + } + if ($resolvedFormat -eq 'Unknown') { Write-Error "Unable to infer source format for '$sourcePath'." continue @@ -136,12 +141,6 @@ function Convert-OpenSpecToMarkdown { $mediaDirectory = Join-Path -Path $specDirectory -ChildPath 'media' $conversionStep = ConvertFrom-OpenSpecDocx -InputPath $sourcePath -OutputPath $rawMarkdownPath -Toolchain $toolchain -MediaOutputDirectory $mediaDirectory } - elseif ($resolvedFormat -eq 'PDF') { - $toolchain = Get-OpenSpecToolchain -RequirePdfConverter - $rawMarkdownPath = Join-Path -Path $artifactDirectory -ChildPath 'raw-pdf.md' - $conversionStep = ConvertFrom-OpenSpecPdf -InputPath $sourcePath -OutputPath $rawMarkdownPath -Toolchain $toolchain - } - $rawMarkdown = Get-Content -LiteralPath $conversionStep.OutputPath -Raw $normalized = ConvertTo-OpenSpecTextLayout -Markdown $rawMarkdown $sourceLinkMetadata = if ($conversionStep.PSObject.Properties['LinkMetadata']) { $conversionStep.LinkMetadata } else { $null } diff --git a/AwakeCoding.OpenSpecs/Public/Invoke-OpenSpecConversionPipeline.ps1 b/AwakeCoding.OpenSpecs/Public/Invoke-OpenSpecConversionPipeline.ps1 index f4e52567..10b062d3 100644 --- a/AwakeCoding.OpenSpecs/Public/Invoke-OpenSpecConversionPipeline.ps1 +++ b/AwakeCoding.OpenSpecs/Public/Invoke-OpenSpecConversionPipeline.ps1 @@ -5,7 +5,7 @@ function Invoke-OpenSpecConversionPipeline { [string]$Query, - [ValidateSet('PDF', 'DOCX', 'Both')] + [ValidateSet('DOCX', 'Both')] [string]$Format = 'DOCX', [string]$DownloadPath = (Join-Path -Path (Get-Location) -ChildPath 'downloads-convert'), @@ -32,6 +32,9 @@ function Invoke-OpenSpecConversionPipeline { Save-OpenSpecDocument -Query $Query -Format $Format -OutputPath $DownloadPath -Force:$Force } - $toConvert = $downloadResults | Where-Object { $_.Status -in 'Downloaded', 'Exists' } + $toConvert = $downloadResults | Where-Object { $_.Status -in 'Downloaded', 'Exists' -and $_.Format -eq 'DOCX' } + if (-not @($toConvert)) { + throw 'No DOCX files are available for conversion. PDF source conversion is not supported.' + } $toConvert | Convert-OpenSpecToMarkdown -OutputPath $OutputPath -Force:$Force -Parallel:$Parallel -ThrottleLimit $ThrottleLimit -RemoveDocumentIndex:$RemoveDocumentIndex } From 9a6c0ad4af539fd5dab79c45f789eeab4e609f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Moreau?= <marcandre.moreau@gmail.com> Date: Wed, 18 Feb 2026 11:17:54 -0500 Subject: [PATCH 11/11] Stabilize section link resolution determinism --- .../Private/ConvertFrom-OpenSpecDocx.ps1 | 168 ++++++++++++++++-- .../Invoke-OpenSpecMarkdownCleanup.ps1 | 85 +++++++-- tests/AwakeCoding.OpenSpecs.Tests.ps1 | 58 ++++++ 3 files changed, 276 insertions(+), 35 deletions(-) diff --git a/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1 b/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1 index 90af2646..a7b9db59 100644 --- a/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1 +++ b/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1 @@ -263,18 +263,84 @@ function ConvertFrom-OpenSpecDocxWithOpenXml { } } } - $titleToSection = @{} + $getSectionSortKey = { + param([string]$sectionId) + + if ($sectionId -match '^Section_(?<num>\d+(?:\.\d+)*)$') { + $parts = @($Matches['num'] -split '\.' | ForEach-Object { + if ($_ -match '^\d+$') { [int]$_ } else { 0 } + }) + $padded = @($parts | ForEach-Object { '{0:D8}' -f $_ }) + return ('0|' + ($padded -join '.')) + } + + return ('1|' + $sectionId.ToLowerInvariant()) + } + + $sectionEntries = New-Object System.Collections.Generic.List[object] foreach ($entry in $linkMetadata.SectionToTitle.GetEnumerator()) { - $key = [string]$entry.Key - $val = ([string]$entry.Value -replace '\s+', ' ').Trim() - if (-not [string]::IsNullOrWhiteSpace($val)) { - $titleToSection[$val] = $key - $withoutNum = ($val -replace '^\d+(?:\.\d+)*\s+', '').Trim() - if ($withoutNum -and -not $titleToSection.ContainsKey($withoutNum)) { - $titleToSection[$withoutNum] = $key + $sectionId = [string]$entry.Key + $title = ([string]$entry.Value -replace '\s+', ' ').Trim() + if ([string]::IsNullOrWhiteSpace($sectionId) -or [string]::IsNullOrWhiteSpace($title)) { + continue + } + + $titleWithoutNum = ($title -replace '^\d+(?:\.\d+)*\s+', '').Trim() + [void]$sectionEntries.Add([pscustomobject]@{ + SectionId = $sectionId + TitleNormalized = $title + TitleWithoutNumber = $titleWithoutNum + SortKey = (& $getSectionSortKey $sectionId) + }) + } + + $orderedSectionEntries = @($sectionEntries | Sort-Object -Property @( + @{ Expression = { $_.SortKey } }, + @{ Expression = { $_.SectionId.ToLowerInvariant() } }, + @{ Expression = { $_.TitleNormalized.ToLowerInvariant() } } + )) + + $sectionIdSet = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase) + $titleToSections = @{} + $titleWithoutNumToSections = @{} + + foreach ($entry in $orderedSectionEntries) { + [void]$sectionIdSet.Add($entry.SectionId) + + $titleKey = $entry.TitleNormalized.ToLowerInvariant() + if (-not $titleToSections.ContainsKey($titleKey)) { + $titleToSections[$titleKey] = New-Object System.Collections.Generic.List[string] + } + if (-not $titleToSections[$titleKey].Contains($entry.SectionId)) { + [void]$titleToSections[$titleKey].Add($entry.SectionId) + } + + if (-not [string]::IsNullOrWhiteSpace($entry.TitleWithoutNumber)) { + $withoutNumKey = $entry.TitleWithoutNumber.ToLowerInvariant() + if (-not $titleWithoutNumToSections.ContainsKey($withoutNumKey)) { + $titleWithoutNumToSections[$withoutNumKey] = New-Object System.Collections.Generic.List[string] + } + if (-not $titleWithoutNumToSections[$withoutNumKey].Contains($entry.SectionId)) { + [void]$titleWithoutNumToSections[$withoutNumKey].Add($entry.SectionId) } } } + + $findUniqueSection = { + param([System.Collections.Generic.List[string]]$candidateSections) + + if ($null -eq $candidateSections) { + return $null + } + + $candidates = @($candidateSections | Sort-Object) + if ($candidates.Count -eq 1) { + return $candidates[0] + } + + return $null + } + $sectionGuidRegex = [regex]::new('^(?:[Ss]ection_)?([a-f0-9]{32})$') $internalLinksArray = $linkMetadata.InternalHyperlinks.ToArray() foreach ($link in $internalLinksArray) { @@ -285,18 +351,86 @@ function ConvertFrom-OpenSpecDocxWithOpenXml { $guid = $m.Groups[1].Value.ToLowerInvariant() if ($linkMetadata.GuidToSection.ContainsKey($guid)) { continue } $matchedSection = $null - if ($titleToSection.ContainsKey($text)) { - $matchedSection = $titleToSection[$text] + + if ($text -match '^(?:section\s+)?(?<num>\d+(?:\.\d+)*)$') { + $directSection = "Section_$($Matches['num'])" + if ($sectionIdSet.Contains($directSection)) { + $matchedSection = $directSection + } } - else { - foreach ($tit in $titleToSection.Keys) { - if ($tit -eq $text) { $matchedSection = $titleToSection[$tit]; break } - $textEsc = [Management.Automation.WildcardPattern]::Escape($text) - $titEsc = [Management.Automation.WildcardPattern]::Escape($tit) - if ($tit -like "*$textEsc*" -and $text.Length -ge 8) { $matchedSection = $titleToSection[$tit]; break } - if ($text -like "*$titEsc*" -and $tit.Length -ge 8) { $matchedSection = $titleToSection[$tit]; break } + + $textKey = $text.ToLowerInvariant() + if (-not $matchedSection -and $titleToSections.ContainsKey($textKey)) { + $matchedSection = & $findUniqueSection $titleToSections[$textKey] + } + + $textWithoutNum = ($text -replace '^\d+(?:\.\d+)*\s+', '').Trim() + if (-not $matchedSection -and -not [string]::IsNullOrWhiteSpace($textWithoutNum)) { + $textWithoutNumKey = $textWithoutNum.ToLowerInvariant() + if ($titleWithoutNumToSections.ContainsKey($textWithoutNumKey)) { + $matchedSection = & $findUniqueSection $titleWithoutNumToSections[$textWithoutNumKey] } } + + if (-not $matchedSection -and $text.Length -ge 8) { + $fuzzyCandidates = New-Object System.Collections.Generic.List[object] + foreach ($entry in $orderedSectionEntries) { + $candidateTitle = $entry.TitleNormalized + if ([string]::IsNullOrWhiteSpace($candidateTitle) -or $candidateTitle.Length -lt 8) { + continue + } + + $containsText = $candidateTitle.IndexOf($text, [System.StringComparison]::OrdinalIgnoreCase) -ge 0 + $containsCandidate = $text.IndexOf($candidateTitle, [System.StringComparison]::OrdinalIgnoreCase) -ge 0 + if (-not $containsText -and -not $containsCandidate) { + continue + } + + $score = if ($containsText -and $containsCandidate) { + 0 + } + elseif ($containsText) { + 1 + } + else { + 2 + } + + [void]$fuzzyCandidates.Add([pscustomobject]@{ + Score = $score + LengthDelta = [Math]::Abs($candidateTitle.Length - $text.Length) + SortKey = $entry.SortKey + SectionId = $entry.SectionId + }) + } + + if ($fuzzyCandidates.Count -gt 0) { + $orderedCandidates = @($fuzzyCandidates | Sort-Object -Property @( + @{ Expression = { $_.Score } }, + @{ Expression = { $_.LengthDelta } }, + @{ Expression = { $_.SortKey } }, + @{ Expression = { $_.SectionId } } + )) + + $best = $orderedCandidates[0] + $isUniqueBest = $true + if ($orderedCandidates.Count -gt 1) { + $second = $orderedCandidates[1] + if ( + $second.Score -eq $best.Score -and + $second.LengthDelta -eq $best.LengthDelta -and + $second.SortKey -eq $best.SortKey + ) { + $isUniqueBest = $false + } + } + + if ($isUniqueBest) { + $matchedSection = $best.SectionId + } + } + } + if ($matchedSection) { $linkMetadata.GuidToSection[$guid] = $matchedSection } diff --git a/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1 b/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1 index 2274fe20..480ef4c6 100644 --- a/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1 +++ b/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1 @@ -1124,18 +1124,13 @@ function Resolve-OpenSpecGuidSectionAnchors { # these with the Section_X.Y.Z form fixes both issues. $guidToSection = @{} $sourceMapCount = 0 - if ($GuidToSectionMap) { - foreach ($entry in $GuidToSectionMap.GetEnumerator()) { - $guid = ([string]$entry.Key).ToLowerInvariant() - $section = [string]$entry.Value - if ([string]::IsNullOrWhiteSpace($guid) -or [string]::IsNullOrWhiteSpace($section)) { - continue - } - if (-not $guidToSection.ContainsKey($guid)) { - $guidToSection[$guid] = $section - $sourceMapCount++ - } - } + $existingSections = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase) + + foreach ($m in [regex]::Matches($result, '<a\s+id="(?<section>Section_\d+(?:\.\d+)*)"\s*></a>', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)) { + [void]$existingSections.Add($m.Groups['section'].Value) + } + foreach ($m in [regex]::Matches($result, '^\s*#{1,6}\s+(?<num>\d+(?:\.\d+)*)\b', [System.Text.RegularExpressions.RegexOptions]'IgnoreCase, Multiline')) { + [void]$existingSections.Add("Section_$($m.Groups['num'].Value)") } # Order 1: GUID anchor followed by Section anchor (most common) @@ -1162,6 +1157,20 @@ function Resolve-OpenSpecGuidSectionAnchors { } } + if ($GuidToSectionMap) { + foreach ($entry in $GuidToSectionMap.GetEnumerator()) { + $guid = ([string]$entry.Key).ToLowerInvariant() + $section = [string]$entry.Value + if ([string]::IsNullOrWhiteSpace($guid) -or [string]::IsNullOrWhiteSpace($section)) { + continue + } + if (-not $guidToSection.ContainsKey($guid)) { + $guidToSection[$guid] = $section + $sourceMapCount++ + } + } + } + if ($guidToSection.Count -eq 0) { return [pscustomobject]@{ Markdown = $result @@ -1172,15 +1181,38 @@ function Resolve-OpenSpecGuidSectionAnchors { # Rewrite all link targets that reference GUID-based section anchors. # Matches both (#Section_GUID) and (#section_GUID) forms. $rewriteCounter = @{ Value = 0 } + $numericPreferenceCounter = @{ Value = 0 } $result = [regex]::Replace( $result, - '\(#[Ss]ection_(?<guid>[0-9a-f]{32})\)', + '\[(?<text>[^\]]+)\]\(#[Ss]ection_(?<guid>[0-9a-f]{32})\)', { param($m) $guid = $m.Groups['guid'].Value.ToLowerInvariant() - if ($guidToSection.ContainsKey($guid)) { + $text = ($m.Groups['text'].Value -replace '\*+', '' -replace '\s+', ' ').Trim() + + $preferredSection = $null + if ($text -match '^(?:section\s+)?(?<num>\d+(?:\.\d+)*)$') { + $numericSection = "Section_$($Matches['num'])" + if ($existingSections.Contains($numericSection)) { + $preferredSection = $numericSection + $numericPreferenceCounter.Value++ + } + } + elseif ($text -match '\(section\s+(?<num>\d+(?:\.\d+)*)\)') { + $numericSection = "Section_$($Matches['num'])" + if ($existingSections.Contains($numericSection)) { + $preferredSection = $numericSection + $numericPreferenceCounter.Value++ + } + } + + if (-not $preferredSection -and $guidToSection.ContainsKey($guid)) { + $preferredSection = $guidToSection[$guid] + } + + if ($preferredSection) { $rewriteCounter.Value++ - "(#$($guidToSection[$guid]))" + "[$($m.Groups['text'].Value)](#$preferredSection)" } else { $m.Value @@ -1196,6 +1228,7 @@ function Resolve-OpenSpecGuidSectionAnchors { Count = $rewriteCount MappedAnchors = $guidToSection.Count SourceMappedAnchors = $sourceMapCount + NumericPreferenceRewrites = $numericPreferenceCounter.Value Reason = 'GUID-based section anchors were resolved to section number anchors.' }) } @@ -1279,16 +1312,32 @@ function Repair-OpenSpecSectionNumberLinks { # In-document links like [5.3.8](#Section_guid) often have no guid->section mapping # (Word bookmark pair missing in converted output). When the link text is a section # number, rewrite to [5.3.8](#Section_5.3.8) so they resolve to our injected anchors. + $availableSections = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase) + foreach ($m in [regex]::Matches($result, '<a\s+id="(?<section>Section_\d+(?:\.\d+)*)"\s*></a>', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)) { + [void]$availableSections.Add($m.Groups['section'].Value) + } + foreach ($m in [regex]::Matches($result, '^\s*#{1,6}\s+(?<num>\d+(?:\.\d+)*)\b', [System.Text.RegularExpressions.RegexOptions]'IgnoreCase, Multiline')) { + [void]$availableSections.Add("Section_$($m.Groups['num'].Value)") + } + $pattern = [regex]::new( - '\[(?<num>\d+(?:\.\d+)*)\]\(#Section_[a-f0-9]{32}\)', + '\[(?<num>\d+(?:\.\d+)*)\]\(#Section_(?<guid>[a-f0-9]{32})\)', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase ) - $rewriteCount = $pattern.Matches($result).Count + $rewriteCounter = @{ Value = 0 } $result = $pattern.Replace($result, { param($m) $num = $m.Groups['num'].Value - "[$num](#Section_$num)" + $targetSection = "Section_$num" + if ($availableSections.Contains($targetSection)) { + $rewriteCounter.Value++ + "[$num](#$targetSection)" + } + else { + $m.Value + } }) + $rewriteCount = $rewriteCounter.Value if ($rewriteCount -gt 0) { [void]$issues.Add([pscustomobject]@{ diff --git a/tests/AwakeCoding.OpenSpecs.Tests.ps1 b/tests/AwakeCoding.OpenSpecs.Tests.ps1 index bab7c3b0..7c0b57a7 100644 --- a/tests/AwakeCoding.OpenSpecs.Tests.ps1 +++ b/tests/AwakeCoding.OpenSpecs.Tests.ps1 @@ -104,3 +104,61 @@ Heading reference: [MCS Connect Initial PDU](#Section_bbbbbbbbbbbbbbbbbbbbbbbbbb $firstMarkdown | Should -Match '\[MCS Connect Initial PDU\]\(#Section_2\.2\.1\.3\)' } } + +Describe 'GUID anchor resolution precedence' { + It 'prefers explicit numeric section text over conflicting source metadata' { + $markdown = @' +<a id="section_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"></a> +<a id="Section_2.2.9"></a> +<a id="Section_2.2.11"></a> +From pair: [2.2.9](#Section_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) +From source-map fallback: [2.2.11](#Section_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb) +'@ + + $sourceMap = @{ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa = 'Section_2.2.8' + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb = 'Section_9.9.9' + } + + $module = Get-Module AwakeCoding.OpenSpecs -ErrorAction Stop + $result = & $module { + param([string]$text, [object]$map) + Resolve-OpenSpecGuidSectionAnchors -Markdown $text -GuidToSectionMap $map + } $markdown $sourceMap + + $result.Markdown | Should -Match '\[2\.2\.9\]\(#Section_2\.2\.9\)' + $result.Markdown | Should -Match '\[2\.2\.11\]\(#Section_2\.2\.11\)' + $result.Markdown | Should -Not -Match '\[2\.2\.9\]\(#Section_2\.2\.8\)' + $result.Markdown | Should -Not -Match '\[2\.2\.11\]\(#Section_9\.9\.9\)' + + $issue = $result.Issues | Where-Object Type -eq 'GuidAnchorResolved' | Select-Object -First 1 + $issue | Should -Not -BeNullOrEmpty + $issue.NumericPreferenceRewrites | Should -Be 2 + } +} + +Describe 'Section number link strict repair' { + It 'rewrites only when the referenced numeric section exists in the document' { + $markdown = @' +<a id="Section_5.3.8"></a> +Known anchor: [5.3.8](#Section_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) +Unknown anchor: [7.7.7](#Section_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb) +## 8.1 Heading-derived section +Heading anchor: [8.1](#Section_cccccccccccccccccccccccccccccccc) +'@ + + $module = Get-Module AwakeCoding.OpenSpecs -ErrorAction Stop + $result = & $module { + param([string]$text) + Repair-OpenSpecSectionNumberLinks -Markdown $text + } $markdown + + $result.Markdown | Should -Match '\[5\.3\.8\]\(#Section_5\.3\.8\)' + $result.Markdown | Should -Match '\[8\.1\]\(#Section_8\.1\)' + $result.Markdown | Should -Match '\[7\.7\.7\]\(#Section_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\)' + + $issue = $result.Issues | Where-Object Type -eq 'SectionNumberLinksRepaired' | Select-Object -First 1 + $issue | Should -Not -BeNullOrEmpty + $issue.Count | Should -Be 2 + } +}