diff --git a/.github/workflows/convert-and-publish.yml b/.github/workflows/convert-and-publish.yml
index bb40d92f..75fa8553 100644
--- a/.github/workflows/convert-and-publish.yml
+++ b/.github/workflows/convert-and-publish.yml
@@ -23,50 +23,16 @@ jobs:
Set-PSRepository -Name PSGallery -InstallationPolicy Trusted
Install-Module -Name OpenXML -Force -Scope CurrentUser
- - name: Import module and convert all specs
+ - name: Build publish tree and Windows_Protocols.zip
shell: pwsh
working-directory: ${{ github.workspace }}
- run: |
- Import-Module .\AwakeCoding.OpenSpecs -Force
- Get-OpenSpecCatalog |
- Save-OpenSpecDocument -Format DOCX -OutputPath ./downloads-convert -Force |
- Where-Object { $_.Status -in 'Downloaded', 'Exists' } |
- Convert-OpenSpecToMarkdown -OutputPath ./converted-specs -Force -Parallel -ThrottleLimit 4
-
- - name: Build publish directory and index
- shell: pwsh
- working-directory: ${{ github.workspace }}
- run: |
- Import-Module .\AwakeCoding.OpenSpecs -Force
- $converted = Join-Path $PWD 'converted-specs'
- $publish = Join-Path $PWD 'publish'
- New-Item -Path $publish -ItemType Directory -Force | Out-Null
- Get-ChildItem -LiteralPath $converted -Directory | ForEach-Object {
- $name = $_.Name
- $md = Join-Path $_.FullName "$name.md"
- if (-not (Test-Path -LiteralPath $md)) { $md = Join-Path $_.FullName 'index.md' }
- if (-not (Test-Path -LiteralPath $md)) { return }
- $dest = Join-Path $publish $name
- New-Item -Path $dest -ItemType Directory -Force | Out-Null
- Copy-Item -LiteralPath $md -Destination (Join-Path $dest 'index.md') -Force
- $media = Join-Path $_.FullName 'media'
- if (Test-Path -LiteralPath $media -PathType Container) {
- Copy-Item -LiteralPath $media -Destination $dest -Recurse -Force
- }
- }
- Update-OpenSpecIndex -Path $publish
-
- - name: Zip publish contents
- shell: pwsh
- working-directory: ${{ github.workspace }}
- run: |
- Compress-Archive -Path .\publish\* -DestinationPath .\publish.zip -Force
+ run: .\scripts\Build-Publish.ps1
- name: Upload publish artifact
uses: actions/upload-artifact@v4
with:
name: publish
- path: publish.zip
+ path: Windows_Protocols.zip
- name: Push to orphaned publish branch
shell: pwsh
diff --git a/AGENTS.md b/AGENTS.md
index 1f844450..bd2b6fde 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -4,7 +4,13 @@ Instructions for AI agents working in this repository. For user-facing usage and
## Project summary
-This repo is a PowerShell module that discovers and downloads Microsoft Open Specifications (Windows Protocols) from Learn and converts DOCX/PDF documents to strict GFM Markdown. There is no separate build: the module is `AwakeCoding.OpenSpecs.psd1` + `AwakeCoding.OpenSpecs.psm1` plus dot-sourced `Public/*.ps1` and `Private/*.ps1` on load. Target runtimes are PowerShell 5.1 and 7 (PSEditions Desktop and Core).
+This repo is a PowerShell module that discovers and downloads Microsoft Open Specifications (Windows Protocols) from Learn and converts DOCX/PDF documents to strict GFM Markdown. There is no separate build: the module is `AwakeCoding.OpenSpecs.psd1` + `AwakeCoding.OpenSpecs.psm1` plus dot-sourced `Public/*.ps1` and `Private/*.ps1` on load.
+
+## PowerShell version (required)
+
+- **PowerShell 7 only.** Use the latest stable PowerShell 7 (pwsh) at all times. This is mandatory.
+- **Windows PowerShell (5.1) compatibility is not a goal and is forbidden.** Do not add workarounds, conditional logic, or compatibility shims for Windows PowerShell. Code must assume PowerShell 7+ exclusively.
+- Run all scripts, tests, and module commands with `pwsh`, not `powershell.exe`. CI, local development, and any tooling must target PowerShell 7.
## File and directory structure
@@ -43,7 +49,7 @@ Tests use Pester 5. From repo root:
Invoke-Pester ./tests
```
-Use PowerShell 7 when possible for consistency with CI. Some tests are tagged `Live` and hit the network (Find-OpenSpec, Get-OpenSpecDownloadLink). To skip them:
+Use PowerShell 7 (required; see above). Some tests are tagged `Live` and hit the network (Find-OpenSpec, Get-OpenSpecDownloadLink). To skip them:
```powershell
Invoke-Pester ./tests -Tag '!Live'
@@ -55,4 +61,4 @@ When you add a new exported function, add its name to the `$expected` array in t
- Do not remove or rename exported functions without updating `AwakeCoding.OpenSpecs.psd1` and the exports test.
- Conversion: DOCX is handled in-module via OpenXML; PDF uses external `docling` or `markitdown` when available (see `AwakeCoding.OpenSpecs/Private/Get-OpenSpecToolchain.ps1`). Output is textual (tables, ASCII), not image-based.
-- For bulk or CI conversions, use `-Parallel -ThrottleLimit N` on PowerShell 7 with `Convert-OpenSpecToMarkdown` or `Invoke-OpenSpecConversionPipeline`.
+- For bulk or CI conversions, use `-Parallel -ThrottleLimit N` with `Convert-OpenSpecToMarkdown` or `Invoke-OpenSpecConversionPipeline` (PowerShell 7 only).
diff --git a/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1 b/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1
index 2fce8e68..9a2859f9 100644
--- a/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1
+++ b/AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1
@@ -92,6 +92,24 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
$relationshipMap = Get-OpenSpecOpenXmlRelationshipMap -Archive $archive
$lines = New-Object System.Collections.Generic.List[string]
$emittedAnchors = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase)
+ $linkMetadata = [ordered]@{
+ GuidToSection = @{}
+ SectionToTitle = @{}
+ TocAlias = @{}
+ GuidToGlossarySlug = @{}
+ InternalHyperlinks = New-Object System.Collections.Generic.List[object]
+ Stats = [ordered]@{
+ ParagraphCount = 0
+ HeadingCount = 0
+ BookmarkCount = 0
+ InternalHyperlinkCount = 0
+ GuidSectionMapCount = 0
+ TocAliasCount = 0
+ GlossaryGuidMapCount = 0
+ }
+ }
+ $inGlossary = $false
+ $glossaryHeadingLevel = 0
# Resolve media output directory for image extraction.
$resolvedMediaDir = $null
@@ -101,10 +119,12 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
foreach ($child in $body.ChildNodes) {
if ($child.LocalName -eq 'p') {
+ $linkMetadata.Stats.ParagraphCount++
$text = ConvertFrom-OpenSpecOpenXmlParagraph -ParagraphNode $child -NamespaceManager $nsmgr -RelationshipMap $relationshipMap -Archive $archive -MediaOutputDirectory $resolvedMediaDir
$styleNode = $child.SelectSingleNode('./w:pPr/w:pStyle', $nsmgr)
$style = if ($styleNode -and $styleNode.Attributes) { $styleNode.GetAttribute('val', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main') } else { '' }
- $anchors = Get-OpenSpecOpenXmlParagraphAnchors -ParagraphNode $child -NamespaceManager $nsmgr -ParagraphText $text -HeadingStyle $style
+ $anchorInfo = Get-OpenSpecOpenXmlParagraphAnchorInfo -ParagraphNode $child -NamespaceManager $nsmgr -ParagraphText $text -HeadingStyle $style
+ $anchors = @($anchorInfo.Anchors)
foreach ($anchor in $anchors) {
if ([string]::IsNullOrWhiteSpace($anchor)) {
@@ -117,6 +137,13 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
$lines.Add((''))
}
+ $linkMetadata.Stats.BookmarkCount += @($anchorInfo.BookmarkNames).Count
+
+ $internalLinks = Get-OpenSpecOpenXmlParagraphInternalHyperlinks -ParagraphNode $child -NamespaceManager $nsmgr
+ foreach ($internalLink in $internalLinks) {
+ [void]$linkMetadata.InternalHyperlinks.Add($internalLink)
+ }
+ $linkMetadata.Stats.InternalHyperlinkCount += @($internalLinks).Count
$numberingNode = $child.SelectSingleNode('./w:pPr/w:numPr', $nsmgr)
if ([string]::IsNullOrWhiteSpace($text)) {
@@ -128,11 +155,42 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
if ($style -match '^Heading(?[1-6])$') {
$level = [int]$Matches['level']
+ $linkMetadata.Stats.HeadingCount++
# Strip bold from heading text — the heading style (#) already implies bold.
# Keep italic and code formatting if present.
$headingText = ($text -replace '\*\*(?!\*)', '').Trim()
$lines.Add((('{0} ' -f ('#' * $level)) + $headingText))
$lines.Add('')
+
+ $isGlossaryHeading = $headingText -match '(?i)^\d+(?:\.\d+)*\s+Glossary$'
+ if ($isGlossaryHeading) {
+ $inGlossary = $true
+ $glossaryHeadingLevel = $level
+ }
+ elseif ($inGlossary -and $level -le $glossaryHeadingLevel) {
+ $inGlossary = $false
+ }
+
+ $sectionAnchor = $anchorInfo.SectionAnchor
+ if (-not [string]::IsNullOrWhiteSpace($sectionAnchor)) {
+ if (-not $linkMetadata.SectionToTitle.ContainsKey($sectionAnchor)) {
+ $linkMetadata.SectionToTitle[$sectionAnchor] = $headingText
+ }
+
+ foreach ($bookmarkName in @($anchorInfo.BookmarkNames)) {
+ if ($bookmarkName -match '(?i)^section_(?[a-f0-9]{32})$') {
+ $guid = $Matches['guid'].ToLowerInvariant()
+ if (-not $linkMetadata.GuidToSection.ContainsKey($guid)) {
+ $linkMetadata.GuidToSection[$guid] = $sectionAnchor
+ }
+ }
+ elseif ($bookmarkName -match '^_Toc\d+$') {
+ if (-not $linkMetadata.TocAlias.ContainsKey($bookmarkName)) {
+ $linkMetadata.TocAlias[$bookmarkName] = $sectionAnchor
+ }
+ }
+ }
+ }
}
elseif ($numberingNode) {
$lines.Add(('- ' + $text.Trim()))
@@ -141,6 +199,22 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
$lines.Add($text.Trim())
$lines.Add('')
}
+
+ if ($inGlossary) {
+ $defMatch = [regex]::Match($text, '^\s*\*\*(?[^*]+)\*\*\s*:\s*')
+ if ($defMatch.Success) {
+ $term = $defMatch.Groups['term'].Value.Trim()
+ $slug = Get-OpenSpecGlossarySlugFromTerm -Term $term
+ foreach ($bookmarkName in @($anchorInfo.BookmarkNames)) {
+ if ($bookmarkName -match '(?i)^gt_(?[a-f0-9\-]{36})$') {
+ $guid = $Matches['guid'].ToLowerInvariant()
+ if (-not $linkMetadata.GuidToGlossarySlug.ContainsKey($guid)) {
+ $linkMetadata.GuidToGlossarySlug[$guid] = $slug
+ }
+ }
+ }
+ }
+ }
}
elseif ($child.LocalName -eq 'tbl') {
$tableLines = ConvertFrom-OpenSpecOpenXmlTable -TableNode $child -NamespaceManager $nsmgr -RelationshipMap $relationshipMap -Archive $archive -MediaOutputDirectory $resolvedMediaDir
@@ -157,6 +231,11 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
throw 'OpenXml conversion produced empty markdown output.'
}
+ $linkMetadata.Stats.GuidSectionMapCount = $linkMetadata.GuidToSection.Count
+ $linkMetadata.Stats.TocAliasCount = $linkMetadata.TocAlias.Count
+ $linkMetadata.Stats.GlossaryGuidMapCount = $linkMetadata.GuidToGlossarySlug.Count
+ $notes.Add("Link metadata captured: guidToSection=$($linkMetadata.Stats.GuidSectionMapCount), tocAlias=$($linkMetadata.Stats.TocAliasCount), guidToGlossarySlug=$($linkMetadata.Stats.GlossaryGuidMapCount), internalLinks=$($linkMetadata.Stats.InternalHyperlinkCount).")
+
$markdown | Set-Content -LiteralPath $OutputPath -Encoding UTF8
}
finally {
@@ -165,11 +244,21 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
}
}
+ $linkMetadataOut = [ordered]@{
+ GuidToSection = $linkMetadata.GuidToSection
+ SectionToTitle = $linkMetadata.SectionToTitle
+ TocAlias = $linkMetadata.TocAlias
+ GuidToGlossarySlug = $linkMetadata.GuidToGlossarySlug
+ InternalHyperlinks = @($linkMetadata.InternalHyperlinks.ToArray())
+ Stats = $linkMetadata.Stats
+ }
+
return [pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.ConversionStep'
Strategy = 'openxml-docx'
OutputPath = $OutputPath
Notes = $notes.ToArray()
+ LinkMetadata = $linkMetadataOut
}
}
@@ -679,7 +768,7 @@ function ConvertFrom-OpenSpecOpenXmlRunText {
return ($parts.ToArray() -join '')
}
-function Get-OpenSpecOpenXmlParagraphAnchors {
+function Get-OpenSpecOpenXmlParagraphAnchorInfo {
[CmdletBinding()]
param(
[Parameter(Mandatory)]
@@ -696,6 +785,8 @@ function Get-OpenSpecOpenXmlParagraphAnchors {
)
$anchors = New-Object System.Collections.Generic.List[string]
+ $bookmarkNames = New-Object System.Collections.Generic.List[string]
+ $sectionAnchor = $null
$bookmarkNodes = $ParagraphNode.SelectNodes('.//w:bookmarkStart', $NamespaceManager)
foreach ($bookmarkNode in $bookmarkNodes) {
@@ -709,6 +800,7 @@ function Get-OpenSpecOpenXmlParagraphAnchors {
}
$anchors.Add($bookmarkName)
+ $bookmarkNames.Add($bookmarkName)
}
if ($HeadingStyle -match '^Heading[1-6]$') {
@@ -718,7 +810,82 @@ function Get-OpenSpecOpenXmlParagraphAnchors {
}
}
- return @($anchors.ToArray() | Select-Object -Unique)
+ [pscustomobject]@{
+ Anchors = @($anchors.ToArray() | Select-Object -Unique)
+ BookmarkNames = @($bookmarkNames.ToArray() | Select-Object -Unique)
+ SectionAnchor = $sectionAnchor
+ }
+}
+
+function Get-OpenSpecOpenXmlParagraphAnchors {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [System.Xml.XmlNode]$ParagraphNode,
+
+ [Parameter(Mandatory)]
+ [System.Xml.XmlNamespaceManager]$NamespaceManager,
+
+ [Parameter()]
+ [string]$ParagraphText,
+
+ [Parameter()]
+ [string]$HeadingStyle
+ )
+
+ $info = Get-OpenSpecOpenXmlParagraphAnchorInfo -ParagraphNode $ParagraphNode -NamespaceManager $NamespaceManager -ParagraphText $ParagraphText -HeadingStyle $HeadingStyle
+ return @($info.Anchors)
+}
+
+function Get-OpenSpecOpenXmlParagraphInternalHyperlinks {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [System.Xml.XmlNode]$ParagraphNode,
+
+ [Parameter(Mandatory)]
+ [System.Xml.XmlNamespaceManager]$NamespaceManager
+ )
+
+ $links = New-Object System.Collections.Generic.List[object]
+ $hyperlinkNodes = $ParagraphNode.SelectNodes('.//w:hyperlink[@w:anchor]', $NamespaceManager)
+ foreach ($hyperlinkNode in $hyperlinkNodes) {
+ $anchor = $hyperlinkNode.GetAttribute('anchor', 'http://schemas.openxmlformats.org/wordprocessingml/2006/main')
+ if ([string]::IsNullOrWhiteSpace($anchor)) {
+ continue
+ }
+
+ $textNodes = $hyperlinkNode.SelectNodes('.//w:t', $NamespaceManager)
+ $parts = New-Object System.Collections.Generic.List[string]
+ foreach ($textNode in $textNodes) {
+ if (-not [string]::IsNullOrWhiteSpace($textNode.InnerText)) {
+ [void]$parts.Add($textNode.InnerText)
+ }
+ }
+ $text = (($parts.ToArray() -join '') -replace '\s+', ' ').Trim()
+
+ [void]$links.Add([pscustomobject]@{
+ Anchor = $anchor
+ Text = $text
+ })
+ }
+
+ return @($links.ToArray())
+}
+
+function Get-OpenSpecGlossarySlugFromTerm {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [string]$Term
+ )
+
+ $slug = $Term -replace '\s+', '-' -replace '[^\w\-]', '' -replace '-+', '-' -replace '^-|-$', ''
+ $slug = $slug.ToLowerInvariant()
+ if ([string]::IsNullOrWhiteSpace($slug)) {
+ $slug = 'term'
+ }
+ return "gt_$slug"
}
function Get-OpenSpecSectionAnchorFromHeadingText {
diff --git a/AwakeCoding.OpenSpecs/Private/Get-OpenSpecGuidSectionMapFromLearn.ps1 b/AwakeCoding.OpenSpecs/Private/Get-OpenSpecGuidSectionMapFromLearn.ps1
new file mode 100644
index 00000000..6ef500c7
--- /dev/null
+++ b/AwakeCoding.OpenSpecs/Private/Get-OpenSpecGuidSectionMapFromLearn.ps1
@@ -0,0 +1,69 @@
+<#
+.SYNOPSIS
+ Builds a GUID-to-section map by fetching section pages from Microsoft Learn.
+.DESCRIPTION
+ For Open Specs that have GuidToSection=0 from DOCX conversion (e.g. MS-RDPBCGR),
+ fetches each section page from Learn (openspecs/windows_protocols/protocolId/{guid-with-hyphens}),
+ parses the H1 for the section number (e.g. "2.2.1.4 Server MCS Connect Response PDU..."),
+ and returns a hashtable: guid_no_hyphens -> Section_N.N.
+.PARAMETER ProtocolId
+ Protocol ID (e.g. MS-RDPBCGR).
+.PARAMETER Guids
+ Array of 32-character hex GUIDs (no hyphens) to resolve.
+.PARAMETER ThrottleSeconds
+ Delay between HTTP requests to avoid overloading Learn. Default 1.
+.OUTPUTS
+ Hashtable: lowercase guid (no hyphens) -> Section_N.N
+#>
+function Get-OpenSpecGuidSectionMapFromLearn {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [string]$ProtocolId,
+
+ [Parameter(Mandatory)]
+ [string[]]$Guids,
+
+ [Parameter()]
+ [int]$ThrottleSeconds = 1
+ )
+
+ $ErrorActionPreference = 'Stop'
+
+ function ConvertTo-HyphenatedGuid {
+ param([string]$Hex32)
+ if ($Hex32.Length -ne 32) { return $null }
+ $Hex32.Substring(0, 8) + '-' + $Hex32.Substring(8, 4) + '-' + $Hex32.Substring(12, 4) + '-' + $Hex32.Substring(16, 4) + '-' + $Hex32.Substring(20, 12)
+ }
+
+ $baseUrl = "https://learn.microsoft.com/en-us/openspecs/windows_protocols/$($ProtocolId.ToLowerInvariant())"
+ $map = @{}
+ $uniqueGuids = @($Guids | ForEach-Object { $_.ToLowerInvariant() } | Select-Object -Unique)
+ $total = $uniqueGuids.Count
+ $resolved = 0
+
+ foreach ($i in 0..($uniqueGuids.Count - 1)) {
+ $guidHex = $uniqueGuids[$i]
+ $guidHyphenated = ConvertTo-HyphenatedGuid -Hex32 $guidHex
+ if (-not $guidHyphenated) { continue }
+ $url = "$baseUrl/$guidHyphenated"
+ try {
+ $response = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 15 -ErrorAction Stop
+ $html = $response.Content
+ if ($html -match ']*>\s*(\d+(?:\.\d+)*)\s+' -or $html -match '(?:^|\n)#\s+(\d+(?:\.\d+)*)\s+') {
+ $sectionNum = $Matches[1]
+ $map[$guidHex] = "Section_$sectionNum"
+ $resolved++
+ }
+ }
+ catch {
+ Write-Verbose "Failed to fetch $url : $_"
+ }
+ if ($ThrottleSeconds -gt 0 -and $i -lt $uniqueGuids.Count - 1) {
+ Start-Sleep -Seconds $ThrottleSeconds
+ }
+ }
+
+ Write-Verbose "Resolved $resolved / $total GUIDs from Learn"
+ $map
+}
diff --git a/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1 b/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1
index 8617fea9..40d56686 100644
--- a/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1
+++ b/AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1
@@ -5,12 +5,49 @@ function Invoke-OpenSpecMarkdownCleanup {
[string]$Markdown,
[Parameter(Mandatory)]
- [string]$CurrentProtocolId
+ [string]$CurrentProtocolId,
+
+ [Parameter()]
+ [object]$SourceLinkMetadata,
+
+ [switch]$RemoveDocumentIndex = $true
)
$issues = New-Object System.Collections.Generic.List[object]
$result = $Markdown
+ if ($RemoveDocumentIndex) {
+ $indexResult = Remove-OpenSpecDocumentIndex -Markdown $result
+ $result = $indexResult.Markdown
+ if ($indexResult.Removed) {
+ [void]$issues.Add([pscustomobject]@{
+ Type = 'DocumentIndexRemoved'
+ Severity = 'Info'
+ Reason = 'Back-of-document index section was removed (page numbers are not meaningful in Markdown).'
+ })
+ }
+ }
+
+ $titleResult = Set-OpenSpecDocumentTitle -Markdown $result -CurrentProtocolId $CurrentProtocolId
+ $result = $titleResult.Markdown
+ if ($titleResult.Normalized) {
+ [void]$issues.Add([pscustomobject]@{
+ Type = 'DocumentTitleNormalized'
+ Severity = 'Info'
+ Reason = 'Document title was normalized to a single H1 heading.'
+ })
+ }
+
+ $frontMatterResult = Remove-OpenSpecFrontMatterBoilerplate -Markdown $result
+ $result = $frontMatterResult.Markdown
+ if ($frontMatterResult.Removed) {
+ [void]$issues.Add([pscustomobject]@{
+ Type = 'FrontMatterBoilerplateRemoved'
+ Severity = 'Info'
+ Reason = 'IP notice, revision history, and support boilerplate were removed after the title; last updated date retained when present.'
+ })
+ }
+
$tableResult = ConvertFrom-OpenSpecHtmlTables -Markdown $result
$result = $tableResult.Markdown
foreach ($issue in $tableResult.Issues) { [void]$issues.Add($issue) }
@@ -31,16 +68,84 @@ function Invoke-OpenSpecMarkdownCleanup {
$result = $tocResult.Markdown
foreach ($issue in $tocResult.Issues) { [void]$issues.Add($issue) }
- $guidResult = Resolve-OpenSpecGuidSectionAnchors -Markdown $result
+ $sourceGuidToSection = if ($SourceLinkMetadata -and $SourceLinkMetadata.PSObject.Properties['GuidToSection']) { $SourceLinkMetadata.GuidToSection } else { $null }
+ $guidResult = Resolve-OpenSpecGuidSectionAnchors -Markdown $result -GuidToSectionMap $sourceGuidToSection
$result = $guidResult.Markdown
foreach ($issue in $guidResult.Issues) { [void]$issues.Add($issue) }
+ $crossSpecResult = Repair-OpenSpecCrossSpecLinks -Markdown $result -CurrentProtocolId $CurrentProtocolId
+ $result = $crossSpecResult.Markdown
+ foreach ($issue in $crossSpecResult.Issues) { [void]$issues.Add($issue) }
+
+ $sectionNumResult = Repair-OpenSpecSectionNumberLinks -Markdown $result
+ $result = $sectionNumResult.Markdown
+ foreach ($issue in $sectionNumResult.Issues) { [void]$issues.Add($issue) }
+
$mathResult = ConvertTo-OpenSpecNormalizedMathText -Markdown $result
$result = $mathResult.Markdown
foreach ($issue in $mathResult.Issues) { [void]$issues.Add($issue) }
$result = Convert-OpenSpecInlineHtmlToMarkdown -Text $result
$result = Remove-OpenSpecStandaloneTableTagLines -Text $result
+
+ $anchorResult = Add-OpenSpecSectionAnchors -Markdown $result
+ $result = $anchorResult.Markdown
+ if ($anchorResult.InjectedCount -gt 0) {
+ [void]$issues.Add([pscustomobject]@{
+ Type = 'SectionAnchorsInjected'
+ Severity = 'Info'
+ Count = $anchorResult.InjectedCount
+ Reason = 'Section anchor tags were added so TOC and in-document links resolve correctly.'
+ })
+ }
+
+ $tocAnchorResult = Add-OpenSpecMissingSectionAnchorsFromToc -Markdown $result
+ $result = $tocAnchorResult.Markdown
+ if ($tocAnchorResult.InjectedCount -gt 0) {
+ [void]$issues.Add([pscustomobject]@{
+ Type = 'MissingSectionAnchorsFromToc'
+ Severity = 'Info'
+ Count = $tocAnchorResult.InjectedCount
+ Reason = 'Missing section anchors were injected using TOC titles so linked section numbers resolve.'
+ })
+ }
+
+ $sourceSectionToTitle = if ($SourceLinkMetadata -and $SourceLinkMetadata.PSObject.Properties['SectionToTitle']) { $SourceLinkMetadata.SectionToTitle } else { $null }
+ $guidByHeadingResult = Repair-OpenSpecSectionGuidLinksByHeadingMatch -Markdown $result -SectionToTitleMap $sourceSectionToTitle
+ $result = $guidByHeadingResult.Markdown
+ if ($guidByHeadingResult.LinksRepaired -gt 0) {
+ [void]$issues.Add([pscustomobject]@{
+ Type = 'SectionGuidLinksRepairedByHeading'
+ Severity = 'Info'
+ Count = $guidByHeadingResult.LinksRepaired
+ Reason = 'Section GUID links were rewritten to section numbers by matching link text to headings.'
+ })
+ }
+
+ $sourceGuidToGlossarySlug = if ($SourceLinkMetadata -and $SourceLinkMetadata.PSObject.Properties['GuidToGlossarySlug']) { $SourceLinkMetadata.GuidToGlossarySlug } else { $null }
+ $glossaryResult = Add-OpenSpecGlossaryAnchorsAndRepairLinks -Markdown $result -GuidToGlossarySlugMap $sourceGuidToGlossarySlug
+ $result = $glossaryResult.Markdown
+ if ($glossaryResult.AnchorsInjected -gt 0 -or $glossaryResult.LinksRepaired -gt 0) {
+ [void]$issues.Add([pscustomobject]@{
+ Type = 'GlossaryAnchorsAndLinks'
+ Severity = 'Info'
+ AnchorsInjected = $glossaryResult.AnchorsInjected
+ LinksRepaired = $glossaryResult.LinksRepaired
+ SourceMapLinksRepaired = if ($glossaryResult.PSObject.Properties['SourceMapLinksRepaired']) { $glossaryResult.SourceMapLinksRepaired } else { 0 }
+ Reason = 'Glossary term anchors were added and #gt_ links were rewritten so they resolve.'
+ })
+ }
+
+ $tocGitHubResult = ConvertTo-OpenSpecGitHubFriendlyToc -Markdown $result
+ $result = $tocGitHubResult.Markdown
+ if ($tocGitHubResult.Rewritten) {
+ [void]$issues.Add([pscustomobject]@{
+ Type = 'TocGitHubFriendly'
+ Severity = 'Info'
+ Reason = 'Table of contents was rewritten as collapsible sections for better GitHub rendering.'
+ })
+ }
+
$newLine = [Environment]::NewLine
$result = [regex]::Replace($result, "(`r?`n){3,}", "$newLine$newLine")
@@ -739,6 +844,97 @@ function ConvertTo-OpenSpecNormalizedTocLinks {
}
}
+function ConvertTo-OpenSpecGitHubFriendlyToc {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [string]$Markdown
+ )
+
+ $newLine = [Environment]::NewLine
+ $tocLineRegex = [regex]::new('^\s*\[(?\d+(?:\.\d+)*)\s+(?[^\]]*)\]\(#Section_(?\d+(?:\.\d+)*)\)\s*$')
+ $lines = $Markdown -split '\r?\n'
+ $tocTitleIndex = -1
+ for ($i = 0; $i -lt $lines.Count; $i++) {
+ if ($lines[$i].Trim() -ceq 'Table of Contents') {
+ $tocTitleIndex = $i
+ break
+ }
+ }
+ if ($tocTitleIndex -lt 0) {
+ return [pscustomobject]@{ Markdown = $Markdown; Rewritten = $false }
+ }
+ $tocEndIndex = -1
+ for ($i = $tocTitleIndex + 1; $i -lt $lines.Count; $i++) {
+ if ($tocLineRegex.IsMatch($lines[$i])) {
+ $tocEndIndex = $i
+ } elseif ($lines[$i].Trim() -ne '' -and $tocEndIndex -ge 0) {
+ break
+ }
+ }
+ if ($tocEndIndex -lt $tocTitleIndex) {
+ return [pscustomobject]@{ Markdown = $Markdown; Rewritten = $false }
+ }
+
+ $entries = [System.Collections.Generic.List[object]]::new()
+ for ($i = $tocTitleIndex + 1; $i -le $tocEndIndex; $i++) {
+ $line = $lines[$i]
+ $m = $tocLineRegex.Match($line)
+ if ($m.Success) {
+ [void]$entries.Add([pscustomobject]@{
+ SectionNum = $m.Groups['num'].Value
+ Title = $m.Groups['title'].Value.Trim()
+ FullLink = $line.Trim()
+ })
+ }
+ }
+ if ($entries.Count -eq 0) {
+ return [pscustomobject]@{ Markdown = $Markdown; Rewritten = $false }
+ }
+
+ $topLevelToTitle = @{}
+ foreach ($e in $entries) {
+ $first = $e.SectionNum -replace '\..*$', ''
+ if (-not $topLevelToTitle.ContainsKey($first)) {
+ $topLevelToTitle[$first] = $e.Title
+ }
+ }
+ $groups = @{}
+ foreach ($e in $entries) {
+ $first = $e.SectionNum -replace '\..*$', ''
+ if (-not $groups.ContainsKey($first)) {
+ $groups[$first] = [System.Collections.Generic.List[object]]::new()
+ }
+ [void]$groups[$first].Add($e)
+ }
+ $sb = [System.Text.StringBuilder]::new()
+ [void]$sb.AppendLine('Table of Contents')
+ [void]$sb.AppendLine()
+ $firstKeys = $groups.Keys | Sort-Object { [int]$_ }
+ foreach ($key in $firstKeys) {
+ $title = $topLevelToTitle[$key]
+ [void]$sb.AppendLine('')
+ [void]$sb.AppendLine("$key $title
")
+ [void]$sb.AppendLine()
+ foreach ($e in $groups[$key]) {
+ $indent = ' ' * (($e.SectionNum -split '\.').Count - 1)
+ [void]$sb.AppendLine("$indent- $($e.FullLink)")
+ }
+ [void]$sb.AppendLine(' ')
+ [void]$sb.AppendLine()
+ }
+ $newToc = $sb.ToString().TrimEnd($newLine.ToCharArray())
+ $before = ($lines[0..($tocTitleIndex - 1)] -join $newLine).TrimEnd()
+ $afterStart = $tocEndIndex + 1
+ $after = if ($afterStart -lt $lines.Count) { $newLine + ($lines[$afterStart..($lines.Count - 1)] -join $newLine) } else { '' }
+ $result = $before + $newLine + $newLine + $newToc + $after
+
+ [pscustomobject]@{
+ Markdown = $result
+ Rewritten = $true
+ }
+}
+
function ConvertTo-OpenSpecNormalizedEncodedBracketUrls {
[CmdletBinding()]
param(
@@ -825,7 +1021,10 @@ function Resolve-OpenSpecGuidSectionAnchors {
[CmdletBinding()]
param(
[Parameter(Mandatory)]
- [string]$Markdown
+ [string]$Markdown,
+
+ [Parameter()]
+ [object]$GuidToSectionMap
)
$issues = New-Object System.Collections.Generic.List[object]
@@ -843,6 +1042,20 @@ function Resolve-OpenSpecGuidSectionAnchors {
# lowercase "section_" while the hyperlink uses "Section_"). Replacing
# these with the Section_X.Y.Z form fixes both issues.
$guidToSection = @{}
+ $sourceMapCount = 0
+ if ($GuidToSectionMap) {
+ foreach ($entry in $GuidToSectionMap.GetEnumerator()) {
+ $guid = ([string]$entry.Key).ToLowerInvariant()
+ $section = [string]$entry.Value
+ if ([string]::IsNullOrWhiteSpace($guid) -or [string]::IsNullOrWhiteSpace($section)) {
+ continue
+ }
+ if (-not $guidToSection.ContainsKey($guid)) {
+ $guidToSection[$guid] = $section
+ $sourceMapCount++
+ }
+ }
+ }
# Order 1: GUID anchor followed by Section anchor (most common)
$pairRegex1 = [regex]::new(
@@ -901,6 +1114,7 @@ function Resolve-OpenSpecGuidSectionAnchors {
Severity = 'Info'
Count = $rewriteCount
MappedAnchors = $guidToSection.Count
+ SourceMappedAnchors = $sourceMapCount
Reason = 'GUID-based section anchors were resolved to section number anchors.'
})
}
@@ -911,6 +1125,105 @@ function Resolve-OpenSpecGuidSectionAnchors {
}
}
+function Repair-OpenSpecCrossSpecLinks {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [string]$Markdown,
+
+ [Parameter(Mandatory)]
+ [string]$CurrentProtocolId
+ )
+
+ $issues = New-Object System.Collections.Generic.List[object]
+ $result = $Markdown
+ $rewriteCount = 0
+
+ # Links like ](#Section_<32hex>) are Word bookmark IDs. When they point to another
+ # spec (cross-reference), the GUID is not in the current document, so they break.
+ # Rewrite them to ](../ProtocolId/ProtocolId.md) using [MS-XXX] from the link text
+ # or from the same line (e.g. References: "[MS-RDPBCGR] ... \"[Title](#Section_guid)\"").
+ $pattern = '\[([^\]]+)\]\(#Section_([a-f0-9]{32})\)'
+ $matches = [regex]::Matches($result, $pattern)
+ $currentIdUpper = $CurrentProtocolId.ToUpperInvariant()
+
+ foreach ($m in ($matches | Sort-Object -Property { $_.Index } -Descending)) {
+ $linkText = $m.Groups[1].Value
+ $nlIdx = $result.LastIndexOf("`n", [Math]::Min($m.Index, $result.Length - 1))
+ $lineStart = if ($nlIdx -ge 0) { $nlIdx + 1 } else { 0 }
+ $lineEndIdx = $result.IndexOf("`n", $m.Index)
+ $lineEnd = if ($lineEndIdx -ge 0) { $lineEndIdx } else { $result.Length }
+ $line = $result.Substring($lineStart, $lineEnd - $lineStart)
+
+ $protocolId = $null
+ if ($linkText -match '^(MS|MC)-[A-Z0-9\-]+$') {
+ $protocolId = $linkText
+ }
+ elseif ($line -match '\[(MS-[A-Z0-9\-]+|MC-[A-Z0-9\-]+)\]') {
+ $protocolId = $Matches[1]
+ }
+
+ if ($protocolId -and $protocolId.ToUpperInvariant() -ne $currentIdUpper) {
+ $replacement = "[$linkText](../$protocolId/$protocolId.md)"
+ $result = $result.Substring(0, $m.Index) + $replacement + $result.Substring($m.Index + $m.Length)
+ $rewriteCount++
+ }
+ }
+
+ if ($rewriteCount -gt 0) {
+ [void]$issues.Add([pscustomobject]@{
+ Type = 'CrossSpecLinksRepaired'
+ Severity = 'Info'
+ Count = $rewriteCount
+ Reason = 'Cross-spec references (GUID anchors) were rewritten to relative spec paths.'
+ })
+ }
+
+ [pscustomobject]@{
+ Markdown = $result
+ Issues = $issues.ToArray()
+ }
+}
+
+function Repair-OpenSpecSectionNumberLinks {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [string]$Markdown
+ )
+
+ $issues = New-Object System.Collections.Generic.List[object]
+ $result = $Markdown
+
+ # In-document links like [5.3.8](#Section_guid) often have no guid->section mapping
+ # (Word bookmark pair missing in converted output). When the link text is a section
+ # number, rewrite to [5.3.8](#Section_5.3.8) so they resolve to our injected anchors.
+ $pattern = [regex]::new(
+ '\[(?\d+(?:\.\d+)*)\]\(#Section_[a-f0-9]{32}\)',
+ [System.Text.RegularExpressions.RegexOptions]::IgnoreCase
+ )
+ $rewriteCount = $pattern.Matches($result).Count
+ $result = $pattern.Replace($result, {
+ param($m)
+ $num = $m.Groups['num'].Value
+ "[$num](#Section_$num)"
+ })
+
+ if ($rewriteCount -gt 0) {
+ [void]$issues.Add([pscustomobject]@{
+ Type = 'SectionNumberLinksRepaired'
+ Severity = 'Info'
+ Count = $rewriteCount
+ Reason = 'In-document section links (GUID anchors) were rewritten to section number anchors.'
+ })
+ }
+
+ [pscustomobject]@{
+ Markdown = $result
+ Issues = $issues.ToArray()
+ }
+}
+
function Resolve-OpenSpecLinkTarget {
[CmdletBinding()]
param(
@@ -998,3 +1311,499 @@ function Remove-OpenSpecStandaloneTableTagLines {
return $result
}
+
+function Remove-OpenSpecDocumentIndex {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [string]$Markdown
+ )
+
+ $result = $Markdown
+ $removed = $false
+
+ # Match the back-of-document index section: heading "# N Index" (e.g. "# 8 Index", "# 9 Index").
+ # Do not match "Index of Security Parameters" or other subsections.
+ $indexHeadingRegex = [regex]::new('(?m)^# \d+ Index\s*$')
+ $match = $indexHeadingRegex.Match($result)
+ if ($match.Success) {
+ $result = $result.Substring(0, $match.Index).TrimEnd()
+ $removed = $true
+
+ # Remove any trailing anchor line(s) that only served the index heading (optional).
+ $trailingAnchorRegex = [regex]::new('(?ms)(\r?\n)(\s*)+$')
+ $result = $trailingAnchorRegex.Replace($result, '')
+
+ # Remove the "N Index" TOC entry so we don't leave a dead link.
+ $result = [regex]::Replace($result, '(?m)^\s*\[\d+ Index\]\(#Section_\d+\)\s*\r?\n', '')
+ }
+
+ [pscustomobject]@{
+ Markdown = $result
+ Removed = $removed
+ }
+}
+
+function Set-OpenSpecDocumentTitle {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [string]$Markdown,
+
+ [Parameter(Mandatory)]
+ [string]$CurrentProtocolId
+ )
+
+ $result = $Markdown
+ $normalized = $false
+
+ # Replace leading "**[MS-XXX]:**\n\n**Full Title**" with a single "# [MS-XXX]: Full Title" H1.
+ $escapedId = [regex]::Escape($CurrentProtocolId)
+ $titlePattern = [regex]::new(
+ '^\s*\*\*(?:\[' + $escapedId + '\]|' + $escapedId + ')\s*:\s*\*\*\s*\r?\n\r?\n\*\*(?[^*]+)\*\*',
+ [System.Text.RegularExpressions.RegexOptions]::Multiline
+ )
+ $match = $titlePattern.Match($result)
+ if ($match.Success) {
+ $title = $match.Groups['title'].Value.Trim()
+ $replacement = "# [$CurrentProtocolId]: $title"
+ $result = $result.Substring(0, $match.Index) + $replacement + $result.Substring($match.Index + $match.Length)
+ $normalized = $true
+ }
+
+ [pscustomobject]@{
+ Markdown = $result
+ Normalized = $normalized
+ }
+}
+
+function Remove-OpenSpecFrontMatterBoilerplate {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [string]$Markdown
+ )
+
+ $result = $Markdown
+ $removed = $false
+ $newLine = [Environment]::NewLine
+
+ # Block from "Intellectual Property Rights Notice" (or similar) through the revision table, ending before "Table of Contents".
+ $blockRegex = [regex]::new(
+ '(?s)(\r?\n)(Intellectual Property Rights Notice.*?)(\r?\n\r?\n)(Table of Contents)',
+ [System.Text.RegularExpressions.RegexOptions]::IgnoreCase
+ )
+ $match = $blockRegex.Match($result)
+ if ($match.Success) {
+ $blockContent = $match.Groups[2].Value
+ $lastUpdated = $null
+ $dateRowRegex = [regex]::new('\|\s*(\d{1,2}/\d{1,2}/\d{4})\s*\|')
+ $dateMatches = $dateRowRegex.Matches($blockContent)
+ if ($dateMatches.Count -gt 0) {
+ $lastMatch = $dateMatches[$dateMatches.Count - 1]
+ $lastUpdated = $lastMatch.Groups[1].Value
+ }
+ $replacement = $match.Groups[1].Value
+ if ($lastUpdated) {
+ $replacement += "Last updated: $lastUpdated" + $newLine + $newLine
+ } else {
+ $replacement += $match.Groups[3].Value
+ }
+ $replacement += $match.Groups[4].Value
+ $result = $result.Substring(0, $match.Index) + $replacement + $result.Substring($match.Index + $match.Length)
+ $removed = $true
+ }
+
+ [pscustomobject]@{
+ Markdown = $result
+ Removed = $removed
+ }
+}
+
+function Add-OpenSpecSectionAnchors {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [string]$Markdown
+ )
+
+ $newLine = [Environment]::NewLine
+ $lines = [System.Collections.Generic.List[string]]::new()
+ $injectedCount = 0
+
+ # Heading pattern: optional leading whitespace, 1-6 hashes, space, section number (e.g. 1, 1.1, 2.2.2.2.1.1.1), space, rest.
+ $headingRegex = [regex]::new('^\s*(#{1,6})\s+(\d+(?:\.\d+)*)\s+(.+)$')
+
+ $i = 0
+ $lineArray = $Markdown -split '\r?\n'
+ while ($i -lt $lineArray.Count) {
+ $line = $lineArray[$i]
+ $headingMatch = $headingRegex.Match($line)
+ if ($headingMatch.Success) {
+ $sectionNum = $headingMatch.Groups[2].Value
+ $anchorId = "Section_$sectionNum"
+ $anchorLine = ""
+ $prevLine = if ($lines.Count -gt 0) { $lines[$lines.Count - 1].Trim() } else { '' }
+ if ($prevLine -ne $anchorLine) {
+ [void]$lines.Add($anchorLine)
+ $injectedCount++
+ }
+ }
+ [void]$lines.Add($line)
+ $i++
+ }
+
+ [pscustomobject]@{
+ Markdown = $lines -join $newLine
+ InjectedCount = $injectedCount
+ }
+}
+
+function Add-OpenSpecMissingSectionAnchorsFromToc {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [string]$Markdown
+ )
+
+ $newLine = [Environment]::NewLine
+ $lines = [System.Collections.Generic.List[string]]::new($Markdown -split '\r?\n')
+ $injectedCount = 0
+
+ # Collect (sectionNum, title) from TOC lines: [N.N Title](#Section_N.N)
+ $tocEntryRegex = [regex]::new('^\s*\[(?\d+(?:\.\d+)*)\s+(?[^\]]*)\]\(#Section_(?\d+(?:\.\d+)*)\)\s*$')
+ $tocEntries = [System.Collections.Generic.List[object]]::new()
+ foreach ($line in $lines) {
+ $m = $tocEntryRegex.Match($line)
+ if ($m.Success -and $m.Groups['num'].Value -eq $m.Groups['sec'].Value) {
+ [void]$tocEntries.Add([pscustomobject]@{ SectionNum = $m.Groups['num'].Value; Title = $m.Groups['title'].Value.Trim() })
+ }
+ }
+
+ # Which Section_N.N anchors already exist?
+ $existingAnchors = [System.Collections.Generic.HashSet[string]]::new([StringComparer]::OrdinalIgnoreCase)
+ foreach ($line in $lines) {
+ if ($line -match '^\s*\s*$') {
+ [void]$existingAnchors.Add($Matches[1])
+ }
+ }
+
+ # Missing: (sectionNum, title) from TOC where anchor is missing. Keep TOC order.
+ $missingList = [System.Collections.Generic.List[object]]::new()
+ $seen = [System.Collections.Generic.HashSet[string]]::new()
+ foreach ($e in $tocEntries) {
+ $id = "Section_$($e.SectionNum)"
+ if (-not $existingAnchors.Contains($id) -and -not $seen.Contains($id)) {
+ [void]$seen.Add($id)
+ [void]$missingList.Add([pscustomobject]@{ SectionNum = $e.SectionNum; Title = $e.Title })
+ }
+ }
+ if ($missingList.Count -eq 0) {
+ return [pscustomobject]@{ Markdown = $Markdown; InjectedCount = 0 }
+ }
+
+ # Assign each missing section to the first line (in doc order) that matches its title.
+ # Prefer heading lines that contain the title; else use a non-heading line that equals the title exactly.
+ $lineIndexToSection = @{}
+ $assignedLines = [System.Collections.Generic.HashSet[int]]::new()
+ foreach ($entry in $missingList) {
+ $title = $entry.Title
+ if ([string]::IsNullOrWhiteSpace($title)) { continue }
+ $found = $false
+ for ($i = 0; $i -lt $lines.Count -and -not $found; $i++) {
+ if ($assignedLines.Contains($i)) { continue }
+ $line = $lines[$i]
+ $lineTrim = $line.Trim()
+ $isHeading = $line -match '^\s*#{1,6}\s+(.+)$'
+ $content = if ($isHeading) { $Matches[1].Trim() } else { $lineTrim }
+ $matchesTitle = $content -like "*$title*"
+ $exactMatch = $content -ceq $title
+ if (-not $matchesTitle -and -not $exactMatch) { continue }
+ if (-not $isHeading -and -not $exactMatch) { continue }
+ $lineIndexToSection[$i] = $entry.SectionNum
+ [void]$assignedLines.Add($i)
+ $found = $true
+ }
+ }
+
+ # Insert anchors in reverse line order so indices stay valid.
+ foreach ($idx in ($lineIndexToSection.Keys | Sort-Object -Descending)) {
+ $sectionNum = $lineIndexToSection[$idx]
+ $anchorId = "Section_$sectionNum"
+ $lines.Insert($idx, "")
+ $injectedCount++
+ }
+
+ [pscustomobject]@{
+ Markdown = $lines -join $newLine
+ InjectedCount = $injectedCount
+ }
+}
+
+function Repair-OpenSpecSectionGuidLinksByHeadingMatch {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [string]$Markdown,
+
+ [Parameter()]
+ [object]$SectionToTitleMap
+ )
+
+ $newLine = [Environment]::NewLine
+ $lineArray = $Markdown -split '\r?\n'
+ $titleToSection = @{}
+ $anchorIdRegex = [regex]::new('', 'IgnoreCase')
+
+ # Collect all existing anchors
+ $existingAnchors = [System.Collections.Generic.HashSet[string]]::new([StringComparer]::OrdinalIgnoreCase)
+ foreach ($m in $anchorIdRegex.Matches($Markdown)) {
+ [void]$existingAnchors.Add($m.Groups[1].Value)
+ }
+
+ if ($SectionToTitleMap) {
+ foreach ($entry in $SectionToTitleMap.GetEnumerator()) {
+ $sectionId = [string]$entry.Key
+ $title = [string]$entry.Value
+ if ([string]::IsNullOrWhiteSpace($sectionId) -or [string]::IsNullOrWhiteSpace($title)) { continue }
+ $norm = ($title -replace '\s+', ' ').Trim()
+ if (-not $titleToSection.ContainsKey($norm)) { $titleToSection[$norm] = $sectionId }
+ $withoutNum = $title -replace '^\d+(?:\.\d+)*\s+', ''
+ $normWithout = ($withoutNum -replace '\s+', ' ').Trim()
+ if ($normWithout -and -not $titleToSection.ContainsKey($normWithout)) { $titleToSection[$normWithout] = $sectionId }
+ }
+ }
+
+ # From lines with existing Section_N.N anchors + following line (heading or plain title)
+ for ($i = 0; $i -lt $lineArray.Count; $i++) {
+ $line = $lineArray[$i]
+ if ($line -match '^\s*\s*$') {
+ $sectionId = $Matches[1]
+ $nextLine = if ($i + 1 -lt $lineArray.Count) { $lineArray[$i + 1].Trim() } else { '' }
+ if ([string]::IsNullOrWhiteSpace($nextLine)) { continue }
+ $title = if ($nextLine -match '^\s*#{1,6}\s+(?.+)$') { $Matches['title'].Trim() } else { $nextLine }
+ $norm = ($title -replace '\s+', ' ').Trim()
+ if (-not $titleToSection.ContainsKey($norm)) { $titleToSection[$norm] = $sectionId }
+ $withoutNum = $title -replace '^\d+(?:\.\d+)*\s+', ''
+ $normWithout = ($withoutNum -replace '\s+', ' ').Trim()
+ if ($normWithout -and -not $titleToSection.ContainsKey($normWithout)) { $titleToSection[$normWithout] = $sectionId }
+ $withoutParen = $title -replace '\s*\([^)]*\)\s*$', '' # "Share Control Header (TS_SHARECONTROLHEADER)" -> "Share Control Header"
+ $normNoParen = ($withoutParen -replace '\s+', ' ').Trim()
+ if ($normNoParen -and -not $titleToSection.ContainsKey($normNoParen)) { $titleToSection[$normNoParen] = $sectionId }
+ }
+ }
+
+ # From ALL headings that start with section number (e.g. ## 2.2.8.1.1.1 Share Control Header)
+ $headingNumRegex = [regex]::new('^\s*#{1,6}\s+(\d+(?:\.\d+)*)\s+(?.+)$')
+ for ($i = 0; $i -lt $lineArray.Count; $i++) {
+ $line = $lineArray[$i]
+ $hm = $headingNumRegex.Match($line)
+ if ($hm.Success) {
+ $sectionNum = $hm.Groups[1].Value
+ $sectionId = "Section_$sectionNum"
+ $title = $hm.Groups['title'].Value.Trim()
+ $norm = ($title -replace '\s+', ' ').Trim()
+ if (-not $titleToSection.ContainsKey($norm)) { $titleToSection[$norm] = $sectionId }
+ $withoutNum = ($title -replace '^\d+(?:\.\d+)*\s+', '') -replace '\s*\([^)]*\)\s*$', ''
+ $normWithout = ($withoutNum -replace '\s+', ' ').Trim()
+ if ($normWithout -and -not $titleToSection.ContainsKey($normWithout)) { $titleToSection[$normWithout] = $sectionId }
+ }
+ }
+
+ # Find best section for link text: exact match, prefix match, or extract "(section N.N.N)" from link text.
+ $findSectionForLinkText = {
+ param($norm, $titleToSection, $existingAnchors)
+ if ($titleToSection.ContainsKey($norm)) { return $titleToSection[$norm] }
+ # Extract section number from link text like "Share Control Header (section 2.2.8.1.1.1)"
+ if ($norm -match '\(section\s+(\d+(?:\.\d+)*)\)') {
+ $extractedId = "Section_$($Matches[1])"
+ if ($existingAnchors.Contains($extractedId)) { return $extractedId }
+ }
+ $candidates = @()
+ foreach ($key in $titleToSection.Keys) {
+ if ($key -eq $norm) { return $titleToSection[$key] }
+ if ($key.StartsWith($norm + ' ') -or $key.StartsWith($norm + '(')) { $candidates += [pscustomobject]@{ Key = $key; SectionId = $titleToSection[$key] } }
+ elseif ($norm.StartsWith($key + ' ') -or $norm.StartsWith($key + '(')) { $candidates += [pscustomobject]@{ Key = $key; SectionId = $titleToSection[$key] } }
+ elseif ($key.StartsWith($norm) -or $norm.StartsWith($key)) { $candidates += [pscustomobject]@{ Key = $key; SectionId = $titleToSection[$key] } }
+ }
+ if ($candidates.Count -eq 1) { return $candidates[0].SectionId }
+ if ($candidates.Count -gt 1) {
+ # Prefer shortest key (most specific match), e.g. "Status Info PDU" over "Status Info PDU Data (TS_...)"
+ $best = $candidates | Sort-Object -Property { $_.Key.Length } | Select-Object -First 1
+ return $best.SectionId
+ }
+ return $null
+ }
+ $guidLinkRegex = [regex]::new('\[(?[^\]]+)\]\(#Section_[a-fA-F0-9]{32}\)')
+ $result = $guidLinkRegex.Replace($Markdown, {
+ param($m)
+ $rawText = $m.Groups['text'].Value
+ $norm = ($rawText -replace '\*+', '' -replace '\s+', ' ').Trim()
+ $sectionId = & $findSectionForLinkText $norm $titleToSection $existingAnchors
+ if (-not $sectionId -and $rawText.Trim() -ne $norm) { $sectionId = & $findSectionForLinkText $rawText.Trim() $titleToSection $existingAnchors }
+ if ($sectionId) { "[$rawText](#$sectionId)" } else { $m.Value }
+ })
+ $linksRepaired = 0
+ foreach ($m in $guidLinkRegex.Matches($Markdown)) {
+ $norm = ($m.Groups['text'].Value -replace '\*+', '' -replace '\s+', ' ').Trim()
+ $sid = & $findSectionForLinkText $norm $titleToSection $existingAnchors
+ if (-not $sid) { $sid = & $findSectionForLinkText $m.Groups['text'].Value.Trim() $titleToSection $existingAnchors }
+ if ($sid) { $linksRepaired++ }
+ }
+
+ [pscustomobject]@{
+ Markdown = $result
+ LinksRepaired = $linksRepaired
+ }
+}
+
+function Add-OpenSpecGlossaryAnchorsAndRepairLinks {
+ [CmdletBinding()]
+ param(
+ [Parameter(Mandatory)]
+ [string]$Markdown,
+
+ [Parameter()]
+ [object]$GuidToGlossarySlugMap
+ )
+
+ $newLine = [Environment]::NewLine
+ $lineArray = [System.Collections.Generic.List[string]]::new($Markdown -split '\r?\n')
+ $termToSlug = @{}
+ $insertedSlugs = @{}
+ $injectedCount = 0
+
+ # Find the Glossary section: heading like "1.1 Glossary" or "## 1.1 Glossary".
+ $glossaryHeadingRegex = [regex]::new('^\s*(#{1,6})\s+(?\d+(?:\.\d+)*)\s+Glossary\s*$')
+ $anyHeadingRegex = [regex]::new('^\s*(#+)\s+.+$')
+ $glossaryDefRegex = [regex]::new('^\s*\*\*(?[^*]+)\*\*\s*:\s*')
+
+ $i = 0
+ $inGlossary = $false
+ $glossaryLevel = 0
+ while ($i -lt $lineArray.Count) {
+ $line = $lineArray[$i]
+ $headMatch = $glossaryHeadingRegex.Match($line)
+ if ($headMatch.Success) {
+ $inGlossary = $true
+ $glossaryLevel = $headMatch.Groups[1].Value.Length
+ $i++
+ continue
+ }
+ if ($inGlossary) {
+ $headOnly = $anyHeadingRegex.Match($line)
+ if ($headOnly.Success -and $headOnly.Groups[1].Value.Length -le $glossaryLevel) {
+ $inGlossary = $false
+ }
+ }
+ if ($inGlossary) {
+ $defMatch = $glossaryDefRegex.Match($line)
+ if ($defMatch.Success) {
+ $term = $defMatch.Groups['term'].Value.Trim()
+ $slug = $term -replace '\s+', '-' -replace '[^\w\-]', '' -replace '-+', '-' -replace '^-|-$', ''
+ $slug = $slug.ToLowerInvariant()
+ if ([string]::IsNullOrWhiteSpace($slug)) { $slug = "term-$i" }
+ $slug = "gt_$slug"
+ $prevLine = if ($i -gt 0) { $lineArray[$i - 1].Trim() } else { '' }
+ $alreadyHasAnchor = $prevLine -match ('^\s*\s*$')
+ if (-not $insertedSlugs.ContainsKey($slug) -and -not $alreadyHasAnchor) {
+ $insertedSlugs[$slug] = $true
+ $anchorLine = ""
+ $lineArray.Insert($i, $anchorLine)
+ $injectedCount++
+ $i++
+ }
+ $normalizedTerm = $term.Trim()
+ $termToSlug[$normalizedTerm] = $slug
+ if ($term -match '^(.+?)\s+\(([^)]+)\)\s*$') {
+ $abbrev = $Matches[2].Trim()
+ $termBeforeParen = $Matches[1].Trim()
+ $termToSlug[$abbrev] = $slug
+ $termToSlug[$termBeforeParen] = $slug
+ if ($abbrev.Length -gt 0 -and -not $abbrev.EndsWith('s')) {
+ $termToSlug["$abbrev`s"] = $slug
+ }
+ # Plural phrasing used in body links: "Message Authentication Codes (MAC)", "input method editors (IMEs)", "Multipoint Communication Services (MCS)".
+ if (-not $termBeforeParen.EndsWith('s')) {
+ $termToSlug["$termBeforeParen`s ($abbrev)"] = $slug
+ $abbrevPlural = if ($abbrev.EndsWith('s')) { $abbrev } else { "$abbrev`s" }
+ $termToSlug["$termBeforeParen`s ($abbrevPlural)"] = $slug
+ }
+ }
+ if ($normalizedTerm.EndsWith('s') -eq $false -and $normalizedTerm.Length -gt 1) {
+ $termToSlug["$normalizedTerm`s"] = $slug
+ }
+ }
+ }
+ $i++
+ }
+
+ $result = $lineArray -join $newLine
+
+ # Rewrite [text](#gt_guid) to [text](#gt_slug) using source map first (deterministic), then link text -> slug map.
+ $linkRegex = [regex]::new('\[(?[^\]]+)\]\(#gt_(?[a-f0-9\-]{36})\)')
+ $linksRepaired = 0
+ $sourceGuidToSlug = @{}
+ if ($GuidToGlossarySlugMap) {
+ foreach ($entry in $GuidToGlossarySlugMap.GetEnumerator()) {
+ $guid = ([string]$entry.Key).ToLowerInvariant()
+ $slug = [string]$entry.Value
+ if ([string]::IsNullOrWhiteSpace($guid) -or [string]::IsNullOrWhiteSpace($slug)) { continue }
+ if (-not $sourceGuidToSlug.ContainsKey($guid)) { $sourceGuidToSlug[$guid] = $slug }
+ }
+ }
+ $matchesBeforeRewrite = $linkRegex.Matches($result)
+ # Case-insensitive fallback: build lower-key map so link text "RSA" / "rsa" resolve when abbrev is "RSA".
+ $slugByLower = @{}
+ foreach ($k in $termToSlug.Keys) {
+ $lower = $k.ToLowerInvariant()
+ if (-not $slugByLower.ContainsKey($lower)) { $slugByLower[$lower] = $termToSlug[$k] }
+ }
+ $result = $linkRegex.Replace($result, {
+ param($m)
+ $rawText = $m.Groups['text'].Value
+ $normalized = ($rawText -replace '\*+', '').Trim()
+ $guid = $m.Groups['guid'].Value.ToLowerInvariant()
+ $slug = $null
+ if ($sourceGuidToSlug.ContainsKey($guid)) {
+ $slug = $sourceGuidToSlug[$guid]
+ }
+ elseif ($termToSlug.ContainsKey($normalized)) {
+ $slug = $termToSlug[$normalized]
+ }
+ elseif ($termToSlug.ContainsKey($rawText.Trim())) {
+ $slug = $termToSlug[$rawText.Trim()]
+ }
+ elseif ($slugByLower.ContainsKey($normalized.ToLowerInvariant())) {
+ $slug = $slugByLower[$normalized.ToLowerInvariant()]
+ }
+ if ($slug) {
+ "[$rawText](#$slug)"
+ }
+ else {
+ $m.Value
+ }
+ })
+
+ foreach ($match in $matchesBeforeRewrite) {
+ $guid = $match.Groups['guid'].Value.ToLowerInvariant()
+ $norm = ($match.Groups['text'].Value -replace '\*+', '').Trim()
+ if ($sourceGuidToSlug.ContainsKey($guid) -or $termToSlug.ContainsKey($norm) -or $termToSlug.ContainsKey($match.Groups['text'].Value.Trim()) -or $slugByLower.ContainsKey($norm.ToLowerInvariant())) {
+ $linksRepaired++
+ }
+ }
+ $sourceMapLinksRepaired = 0
+ foreach ($match in $matchesBeforeRewrite) {
+ $guid = $match.Groups['guid'].Value.ToLowerInvariant()
+ if ($sourceGuidToSlug.ContainsKey($guid)) {
+ $sourceMapLinksRepaired++
+ }
+ }
+
+ [pscustomobject]@{
+ Markdown = $result
+ AnchorsInjected = $injectedCount
+ LinksRepaired = $linksRepaired
+ SourceMapLinksRepaired = $sourceMapLinksRepaired
+ }
+}
diff --git a/AwakeCoding.OpenSpecs/Public/Convert-OpenSpecToMarkdown.ps1 b/AwakeCoding.OpenSpecs/Public/Convert-OpenSpecToMarkdown.ps1
index 5be2c2e7..da8f2b73 100644
--- a/AwakeCoding.OpenSpecs/Public/Convert-OpenSpecToMarkdown.ps1
+++ b/AwakeCoding.OpenSpecs/Public/Convert-OpenSpecToMarkdown.ps1
@@ -15,7 +15,9 @@ function Convert-OpenSpecToMarkdown {
[switch]$Parallel,
- [int]$ThrottleLimit = 4
+ [int]$ThrottleLimit = 4,
+
+ [switch]$RemoveDocumentIndex = $true
)
begin {
@@ -49,9 +51,10 @@ function Convert-OpenSpecToMarkdown {
$outputPathArg = $OutputPath
$forceArg = $Force
$sourceFormatArg = $SourceFormat
+ $removeIndexArg = $RemoveDocumentIndex
$items | ForEach-Object -Parallel {
Import-Module (Join-Path $using:moduleBase 'AwakeCoding.OpenSpecs.psd1') -Force | Out-Null
- Convert-OpenSpecToMarkdown -Path $_.Path -OutputPath $using:outputPathArg -Force:$using:forceArg -SourceFormat $using:sourceFormatArg
+ Convert-OpenSpecToMarkdown -Path $_.Path -OutputPath $using:outputPathArg -Force:$using:forceArg -SourceFormat $using:sourceFormatArg -RemoveDocumentIndex:$using:removeIndexArg
} -ThrottleLimit $ThrottleLimit
return
}
@@ -141,7 +144,8 @@ function Convert-OpenSpecToMarkdown {
$rawMarkdown = Get-Content -LiteralPath $conversionStep.OutputPath -Raw
$normalized = ConvertTo-OpenSpecTextLayout -Markdown $rawMarkdown
- $cleaned = Invoke-OpenSpecMarkdownCleanup -Markdown $normalized.Markdown -CurrentProtocolId $protocolId
+ $sourceLinkMetadata = if ($conversionStep.PSObject.Properties['LinkMetadata']) { $conversionStep.LinkMetadata } else { $null }
+ $cleaned = Invoke-OpenSpecMarkdownCleanup -Markdown $normalized.Markdown -CurrentProtocolId $protocolId -RemoveDocumentIndex:$RemoveDocumentIndex -SourceLinkMetadata $sourceLinkMetadata
$allIssues = @()
if ($normalized.Issues) {
@@ -201,8 +205,14 @@ function Convert-OpenSpecToMarkdown {
HasDocling = $toolchain.HasDocling
HasMarkItDown = $toolchain.HasMarkItDown
}
+ SourceLinkMetadataPath = if ($sourceLinkMetadata) { (Join-Path -Path $artifactDirectory -ChildPath 'source-link-metadata.json') } else { $null }
} | ConvertTo-Json -Depth 8 | Set-Content -LiteralPath $sourceManifestPath -Encoding UTF8
+ if ($sourceLinkMetadata) {
+ $sourceLinkMetadataPath = Join-Path -Path $artifactDirectory -ChildPath 'source-link-metadata.json'
+ $sourceLinkMetadata | ConvertTo-Json -Depth 10 | Set-Content -LiteralPath $sourceLinkMetadataPath -Encoding UTF8
+ }
+
$reportPath = Join-Path -Path $artifactDirectory -ChildPath 'conversion-report.json'
[pscustomobject]@{
ProtocolId = $protocolId
diff --git a/AwakeCoding.OpenSpecs/Public/Get-OpenSpecCatalog.ps1 b/AwakeCoding.OpenSpecs/Public/Get-OpenSpecCatalog.ps1
index 1d0f49f8..56c23c75 100644
--- a/AwakeCoding.OpenSpecs/Public/Get-OpenSpecCatalog.ps1
+++ b/AwakeCoding.OpenSpecs/Public/Get-OpenSpecCatalog.ps1
@@ -1,7 +1,18 @@
+$script:OpenSpecReferenceDocsUri = 'https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-winprotlp/1593dc07-6116-4e9e-8aeb-85c7438fab0a'
+
+# Reference specs (MS-DTYP, MS-ERREF, MS-LCID, MS-UCODEREF) from https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-winprotlp/1593dc07-6116-4e9e-8aeb-85c7438fab0a
+$script:OpenSpecReferenceSpecs = @(
+ [pscustomobject]@{ ProtocolId = 'MS-DTYP'; Title = 'Windows Data Types'; Slug = 'ms-dtyp'; SpecPageUrl = 'https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-dtyp/cca27429-5689-4a16-b2b4-9325d93e4ba2' }
+ [pscustomobject]@{ ProtocolId = 'MS-ERREF'; Title = 'Windows Error Codes'; Slug = 'ms-erref'; SpecPageUrl = 'https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-erref/1bc92ddf-b79e-413c-bbaa-99a5281a6c90' }
+ [pscustomobject]@{ ProtocolId = 'MS-LCID'; Title = 'Windows Language Code Identifier (LCID) Reference'; Slug = 'ms-lcid'; SpecPageUrl = 'https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f' }
+ [pscustomobject]@{ ProtocolId = 'MS-UCODEREF'; Title = 'Windows Protocols Unicode Reference'; Slug = 'ms-ucoderef'; SpecPageUrl = 'https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-ucoderef/4a045e08-fc29-4f22-baf4-16f38c2825fb' }
+)
+
function Get-OpenSpecCatalog {
[CmdletBinding()]
param(
- [string]$Uri = 'https://learn.microsoft.com/en-us/openspecs/windows_protocols/MS-WINPROTLP/e36c976a-6263-42a8-b119-7a3cc41ddd2a'
+ [string]$Uri = 'https://learn.microsoft.com/en-us/openspecs/windows_protocols/MS-WINPROTLP/e36c976a-6263-42a8-b119-7a3cc41ddd2a',
+ [switch]$IncludeReferenceSpecs
)
$response = Invoke-OpenSpecRequest -Uri $Uri
@@ -79,5 +90,21 @@ function Get-OpenSpecCatalog {
}
}
+ if ($IncludeReferenceSpecs) {
+ foreach ($ref in $script:OpenSpecReferenceSpecs) {
+ if ($seen.Add($ref.ProtocolId)) {
+ $entries.Add([pscustomobject]@{
+ PSTypeName = 'AwakeCoding.OpenSpecs.Entry'
+ ProtocolId = $ref.ProtocolId
+ Title = $ref.Title
+ Description = ''
+ SpecPageUrl = $ref.SpecPageUrl
+ Slug = $ref.Slug
+ SourcePage = $script:OpenSpecReferenceDocsUri
+ })
+ }
+ }
+ }
+
$entries
}
diff --git a/AwakeCoding.OpenSpecs/Public/Invoke-OpenSpecConversionPipeline.ps1 b/AwakeCoding.OpenSpecs/Public/Invoke-OpenSpecConversionPipeline.ps1
index c0d052c2..f4e52567 100644
--- a/AwakeCoding.OpenSpecs/Public/Invoke-OpenSpecConversionPipeline.ps1
+++ b/AwakeCoding.OpenSpecs/Public/Invoke-OpenSpecConversionPipeline.ps1
@@ -16,7 +16,9 @@ function Invoke-OpenSpecConversionPipeline {
[switch]$Parallel,
- [int]$ThrottleLimit = 4
+ [int]$ThrottleLimit = 4,
+
+ [switch]$RemoveDocumentIndex = $true
)
if (-not $ProtocolId -and -not $Query) {
@@ -31,5 +33,5 @@ function Invoke-OpenSpecConversionPipeline {
}
$toConvert = $downloadResults | Where-Object { $_.Status -in 'Downloaded', 'Exists' }
- $toConvert | Convert-OpenSpecToMarkdown -OutputPath $OutputPath -Force:$Force -Parallel:$Parallel -ThrottleLimit $ThrottleLimit
+ $toConvert | Convert-OpenSpecToMarkdown -OutputPath $OutputPath -Force:$Force -Parallel:$Parallel -ThrottleLimit $ThrottleLimit -RemoveDocumentIndex:$RemoveDocumentIndex
}
diff --git a/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1 b/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1
index 63e1cbb9..02599a37 100644
--- a/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1
+++ b/AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1
@@ -17,7 +17,10 @@ function Save-OpenSpecDocument {
[switch]$AllVersions,
- [switch]$Force
+ [switch]$Force,
+
+ [switch]$Parallel,
+ [int]$ThrottleLimit = 8
)
begin {
@@ -45,8 +48,15 @@ function Save-OpenSpecDocument {
}
if ($item.ProtocolId) {
- foreach ($link in (Get-OpenSpecDownloadLink -ProtocolId $item.ProtocolId -Format $Format -AllVersions:$AllVersions -IncludePrevious:$IncludePrevious)) {
- [void]$links.Add($link)
+ if ($item.SpecPageUrl) {
+ foreach ($link in (Get-OpenSpecDownloadLink -InputObject $item -Format $Format -AllVersions:$AllVersions -IncludePrevious:$IncludePrevious)) {
+ [void]$links.Add($link)
+ }
+ }
+ else {
+ foreach ($link in (Get-OpenSpecDownloadLink -ProtocolId $item.ProtocolId -Format $Format -AllVersions:$AllVersions -IncludePrevious:$IncludePrevious)) {
+ [void]$links.Add($link)
+ }
}
}
}
@@ -73,6 +83,7 @@ function Save-OpenSpecDocument {
}
}
+ $toDownload = [System.Collections.Generic.List[object]]::new()
foreach ($link in $links) {
$fileName = $link.FileName
if ([string]::IsNullOrWhiteSpace($fileName)) {
@@ -94,15 +105,17 @@ function Save-OpenSpecDocument {
continue
}
- if (-not $PSCmdlet.ShouldProcess($link.Url, "Download to $destination")) {
- continue
+ if ($PSCmdlet.ShouldProcess($link.Url, "Download to $destination")) {
+ [void]$toDownload.Add([pscustomobject]@{ Link = $link; Destination = $destination })
}
+ }
+ $downloadOne = {
+ param($link, $destination)
try {
$attempt = 0
$maxRetries = 4
$delay = 1
-
while ($true) {
$attempt++
try {
@@ -114,17 +127,12 @@ function Save-OpenSpecDocument {
if ($_.Exception.Response -and $_.Exception.Response.StatusCode) {
$statusCode = [int]$_.Exception.Response.StatusCode
}
-
$transient = ($statusCode -in 429, 500, 502, 503, 504) -or (-not $statusCode)
- if ($attempt -ge $maxRetries -or -not $transient) {
- throw
- }
-
+ if ($attempt -ge $maxRetries -or -not $transient) { throw }
Start-Sleep -Seconds $delay
$delay = [Math]::Min($delay * 2, 16)
}
}
-
[pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.DownloadResult'
ProtocolId = $link.ProtocolId
@@ -148,5 +156,61 @@ function Save-OpenSpecDocument {
}
}
}
+
+ $useParallel = $Parallel -and $PSVersionTable.PSVersion.Major -ge 7 -and $toDownload.Count -gt 1
+ if ($useParallel) {
+ $toDownload | ForEach-Object -Parallel {
+ $link = $_.Link
+ $destination = $_.Destination
+ try {
+ $attempt = 0
+ $maxRetries = 4
+ $delay = 1
+ while ($true) {
+ $attempt++
+ try {
+ Invoke-WebRequest -Uri $link.Url -OutFile $destination -MaximumRedirection 8 -ErrorAction Stop
+ break
+ }
+ catch {
+ $statusCode = $null
+ if ($_.Exception.Response -and $_.Exception.Response.StatusCode) {
+ $statusCode = [int]$_.Exception.Response.StatusCode
+ }
+ $transient = ($statusCode -in 429, 500, 502, 503, 504) -or (-not $statusCode)
+ if ($attempt -ge $maxRetries -or -not $transient) { throw }
+ Start-Sleep -Seconds $delay
+ $delay = [Math]::Min($delay * 2, 16)
+ }
+ }
+ [pscustomobject]@{
+ PSTypeName = 'AwakeCoding.OpenSpecs.DownloadResult'
+ ProtocolId = $link.ProtocolId
+ Format = $link.Format
+ Url = $link.Url
+ Path = $destination
+ Status = 'Downloaded'
+ Size = (Get-Item -LiteralPath $destination).Length
+ }
+ }
+ catch {
+ [pscustomobject]@{
+ PSTypeName = 'AwakeCoding.OpenSpecs.DownloadResult'
+ ProtocolId = $link.ProtocolId
+ Format = $link.Format
+ Url = $link.Url
+ Path = $destination
+ Status = 'Failed'
+ Error = $_.Exception.Message
+ Size = $null
+ }
+ }
+ } -ThrottleLimit $ThrottleLimit
+ }
+ else {
+ foreach ($item in $toDownload) {
+ & $downloadOne -link $item.Link -destination $item.Destination
+ }
+ }
}
}
diff --git a/AwakeCoding.OpenSpecs/Public/Update-OpenSpecIndex.ps1 b/AwakeCoding.OpenSpecs/Public/Update-OpenSpecIndex.ps1
index 671e63aa..4f3a82a4 100644
--- a/AwakeCoding.OpenSpecs/Public/Update-OpenSpecIndex.ps1
+++ b/AwakeCoding.OpenSpecs/Public/Update-OpenSpecIndex.ps1
@@ -4,9 +4,13 @@ function Update-OpenSpecIndex {
[Parameter(Mandatory)]
[string]$Path,
+ [string]$Title = 'Microsoft Open Specifications',
+
[switch]$UseCatalogTitles = $true,
- [switch]$IncludeDescription = $false
+ [switch]$IncludeDescription = $false,
+
+ [string[]]$OverviewProtocolIds = @()
)
if (-not (Test-Path -LiteralPath $Path)) {
@@ -36,6 +40,9 @@ function Update-OpenSpecIndex {
$specName = $dir.Name
$mdFile = Join-Path -Path $dir.FullName -ChildPath "$specName.md"
+ if (-not (Test-Path -LiteralPath $mdFile)) {
+ $mdFile = Join-Path -Path $dir.FullName -ChildPath 'README.md'
+ }
if (-not (Test-Path -LiteralPath $mdFile)) {
$mdFile = Join-Path -Path $dir.FullName -ChildPath 'index.md'
}
@@ -45,18 +52,18 @@ function Update-OpenSpecIndex {
}
$mdFileName = [System.IO.Path]::GetFileName($mdFile)
- $title = ''
+ $entryTitle = ''
$description = ''
$catalogEntry = $catalogMap[$specName]
if ($catalogEntry) {
- $title = $catalogEntry.Title
+ $entryTitle = $catalogEntry.Title
if ($IncludeDescription -and $catalogEntry.Description) {
$description = $catalogEntry.Description
}
}
- if ([string]::IsNullOrWhiteSpace($title)) {
+ if ([string]::IsNullOrWhiteSpace($entryTitle)) {
$lines = Get-Content -LiteralPath $mdFile -TotalCount 30 -ErrorAction SilentlyContinue
$protocolLabelRegex = [regex]::new('^\*\*\[?(?:MS|MC)-[A-Z0-9-]+\]?\s*:\s*\*\*$', 'IgnoreCase')
$boldLineRegex = [regex]::new('^\*\*(.+)\*\*$')
@@ -73,45 +80,75 @@ function Update-OpenSpecIndex {
if ($candidate -like "*$pat*") { $isBoilerplate = $true; break }
}
if (-not $isBoilerplate -and $candidate.Length -gt 2) {
- $title = $candidate
+ $entryTitle = $candidate
break
}
}
}
}
- if ([string]::IsNullOrWhiteSpace($title)) {
- $title = $specName
+ if ([string]::IsNullOrWhiteSpace($entryTitle)) {
+ $entryTitle = $specName
}
[void]$entries.Add([pscustomobject]@{
Name = $specName
- Title = $title
+ Title = $entryTitle
Description = $description
Link = "$specName/$mdFileName"
})
}
+ $overviewIds = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase)
+ foreach ($id in $OverviewProtocolIds) { [void]$overviewIds.Add($id.Trim()) }
+
+ $overviewEntries = @($entries | Where-Object { $overviewIds.Contains($_.Name) })
+ $specEntries = @($entries | Where-Object { -not $overviewIds.Contains($_.Name) })
+
$sb = New-Object System.Text.StringBuilder
- [void]$sb.AppendLine('# Microsoft Open Specifications')
+ [void]$sb.AppendLine("# $Title")
[void]$sb.AppendLine()
- [void]$sb.AppendLine("$($entries.Count) protocol specifications converted to Markdown.")
+ $totalCount = $entries.Count
+ if ($overviewEntries.Count -gt 0 -and $specEntries.Count -gt 0) {
+ [void]$sb.AppendLine("$totalCount documents converted to Markdown (overview and protocol specifications).")
+ }
+ else {
+ [void]$sb.AppendLine("$totalCount protocol specifications converted to Markdown.")
+ }
[void]$sb.AppendLine()
- if ($IncludeDescription) {
- [void]$sb.AppendLine('| Protocol | Title | Description |')
- [void]$sb.AppendLine('|---|---|---|')
- foreach ($entry in $entries) {
- $descEscaped = ($entry.Description -replace '\|', ', ' -replace '\r?\n', ' ').Trim()
- [void]$sb.AppendLine("| [$($entry.Name)]($($entry.Link)) | $($entry.Title) | $descEscaped |")
+ $writeTable = {
+ param($list, $includeDesc)
+ if ($includeDesc) {
+ [void]$sb.AppendLine('| Protocol | Title | Description |')
+ [void]$sb.AppendLine('|---|---|---|')
+ foreach ($entry in $list) {
+ $descEscaped = ($entry.Description -replace '\|', ', ' -replace '\r?\n', ' ').Trim()
+ [void]$sb.AppendLine("| [$($entry.Name)]($($entry.Link)) | $($entry.Title) | $descEscaped |")
+ }
+ }
+ else {
+ [void]$sb.AppendLine('| Protocol | Title |')
+ [void]$sb.AppendLine('|---|---|')
+ foreach ($entry in $list) {
+ [void]$sb.AppendLine("| [$($entry.Name)]($($entry.Link)) | $($entry.Title) |")
+ }
}
}
- else {
- [void]$sb.AppendLine('| Protocol | Title |')
- [void]$sb.AppendLine('|---|---|')
- foreach ($entry in $entries) {
- [void]$sb.AppendLine("| [$($entry.Name)]($($entry.Link)) | $($entry.Title) |")
+
+ if ($overviewEntries.Count -gt 0) {
+ [void]$sb.AppendLine('## Overview')
+ [void]$sb.AppendLine()
+ & $writeTable $overviewEntries $IncludeDescription
+ [void]$sb.AppendLine()
+ }
+
+ if ($specEntries.Count -gt 0) {
+ if ($overviewEntries.Count -gt 0) {
+ [void]$sb.AppendLine('## Protocol specifications')
+ [void]$sb.AppendLine()
}
+ & $writeTable $specEntries $IncludeDescription
}
$readmePath = Join-Path -Path $Path -ChildPath 'README.md'
diff --git a/README.md b/README.md
index 7157c469..a9efeea1 100644
--- a/README.md
+++ b/README.md
@@ -28,13 +28,13 @@ These folders are tracked with `.gitkeep`, while their contents are ignored via
## Cmdlets
-- `Get-OpenSpecCatalog` - Gets Windows Protocol technical document entries from the Learn catalog page.
+- `Get-OpenSpecCatalog` - Gets Windows Protocol technical document entries from the Learn catalog page. Use `-IncludeReferenceSpecs` to also include reference docs (MS-DTYP, MS-ERREF, MS-LCID, MS-UCODEREF) from [Reference Documents](https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-winprotlp/1593dc07-6116-4e9e-8aeb-85c7438fab0a).
- `Find-OpenSpec` - Filters catalog entries by query or protocol ID.
- `Get-OpenSpecVersion` - Resolves latest (or all) version rows for a spec page.
- `Get-OpenSpecDownloadLink` - Gets download URLs for PDF and/or DOCX.
- `Save-OpenSpecDocument` - Downloads selected documents (accepts pipeline from `Get-OpenSpecCatalog` or `Get-OpenSpecDownloadLink`).
- `Test-OpenSpecDownload` - End-to-end validation for a set of protocol IDs.
-- `Convert-OpenSpecToMarkdown` - Converts downloaded DOCX/PDF files to Markdown (supports `-Parallel -ThrottleLimit N` on PowerShell 7+).
+- `Convert-OpenSpecToMarkdown` - Converts downloaded DOCX/PDF files to Markdown (supports `-Parallel -ThrottleLimit N` on PowerShell 7+). By default removes the back-of-document index section (page numbers are not meaningful in Markdown); use `-RemoveDocumentIndex:$false` to keep it.
- `Invoke-OpenSpecConversionPipeline` - Download + convert in one step; use `-Parallel -ThrottleLimit N` to run conversions in parallel.
- `Get-OpenSpecConversionReport` - Reads conversion report artifacts from a converted-specs output tree.
- `Test-OpenSpecMarkdownFidelity` - Runs lightweight fidelity checks on generated Markdown (headings, tables, anchors, TOC links).
@@ -79,6 +79,13 @@ Test-OpenSpecMarkdownFidelity -OutputPath $ConvertedPath
# Generate an index README for the converted specs (e.g. for publish branch)
Update-OpenSpecIndex -Path $ConvertedPath -UseCatalogTitles
Update-OpenSpecIndex -Path $ConvertedPath -UseCatalogTitles -IncludeDescription
+Update-OpenSpecIndex -Path $ConvertedPath -Title 'RDP Specifications' -UseCatalogTitles -IncludeDescription # custom title
+Update-OpenSpecIndex -Path $ConvertedPath -Title 'RDP Specifications' -UseCatalogTitles -IncludeDescription -OverviewProtocolIds MS-RDSOD # overview first
+
+# Include overview documents (e.g. Remote Desktop Services Overview MS-RDSOD; not in main catalog)
+$overview = [pscustomobject]@{ ProtocolId='MS-RDSOD'; SpecPageUrl='https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-rdsod/072543f9-4bd4-4dc6-ab97-9a04bf9d2c6a' }
+Get-OpenSpecDownloadLink -InputObject $overview -Format DOCX | Save-OpenSpecDocument -OutputPath $DownloadPath -Force
+Convert-OpenSpecToMarkdown -Path "$DownloadPath/[MS-RDSOD]-230313.docx" -OutputPath $ConvertedPath -Force
# Compare converted markdown structure to live Learn pages
Compare-OpenSpecToLiveHtml -OutputPath $ConvertedPath -ProtocolId MS-RDPEWA,MS-RDPBCGR
diff --git a/scripts/Build-Publish.ps1 b/scripts/Build-Publish.ps1
index 31e13025..ed442ccf 100644
--- a/scripts/Build-Publish.ps1
+++ b/scripts/Build-Publish.ps1
@@ -2,8 +2,17 @@
.SYNOPSIS
Builds the publish tree the same way convert-and-publish.yml does, for local validation.
.DESCRIPTION
- Downloads all Open Specs DOCX, converts to markdown, builds the publish directory,
- and generates the README index. Use this to validate the build locally before pushing.
+ Downloads all Open Specs DOCX, converts to markdown, repairs broken links,
+ builds the publish directory, generates the README index, and optionally creates
+ Windows_Protocols.zip (Microsoft publishes a PDF zip with the same name; this is the markdown equivalent).
+ Use -Filter for faster local iteration (e.g. -Filter 'MS-RDP' for RDP-related specs).
+.EXAMPLE
+ .\Build-Publish.ps1
+ .\Build-Publish.ps1 -ZipPath '' # skip zip, publish folder only
+.EXAMPLE
+ .\Build-Publish.ps1 -Filter 'MS-RDP' # RDP-related specs only (faster local iteration)
+.EXAMPLE
+ .\Build-Publish.ps1 -Filter 'MS-RDP','MS-NLMP','MS-KILE' # RDP + auth specs
#>
[CmdletBinding()]
param(
@@ -11,7 +20,10 @@ param(
[string]$DownloadsPath = 'downloads-convert',
[string]$ConvertedPath = 'converted-specs',
[string]$PublishPath = 'publish',
- [int]$ThrottleLimit = 4,
+ [string]$ZipPath = 'Windows_Protocols.zip',
+ [string]$IndexTitle = 'Microsoft Open Specifications',
+ [string[]]$Filter = @(),
+ [int]$ThrottleLimit = 8,
[switch]$SkipOpenXmlInstall
)
@@ -36,8 +48,25 @@ try {
Import-Module (Join-Path $root 'AwakeCoding.OpenSpecs') -Force
Write-Host 'Downloading DOCX files...'
- $downloadResults = Get-OpenSpecCatalog |
- Save-OpenSpecDocument -Format DOCX -OutputPath $dlPath -Force |
+ $catalog = Get-OpenSpecCatalog -IncludeReferenceSpecs
+ $patterns = @()
+ if ($Filter.Count -gt 0) {
+ $patterns = @($Filter | Where-Object { $_ } | ForEach-Object {
+ if ($_ -match '[*?\[\]]') { $_ } else { "$_*" }
+ })
+ }
+ if ($patterns.Count -gt 0) {
+ $catalog = $catalog | Where-Object {
+ $pid = $_.ProtocolId
+ foreach ($p in $patterns) {
+ if ($pid -like $p) { return $true }
+ }
+ return $false
+ }
+ Write-Host "Filter ($($Filter -join ', ')) -> $($catalog.Count) specs"
+ }
+ $downloadResults = $catalog |
+ Save-OpenSpecDocument -Format DOCX -OutputPath $dlPath -Force -Parallel -ThrottleLimit $ThrottleLimit |
Where-Object { $_.Status -in 'Downloaded', 'Exists' }
$toConvert = @($downloadResults)
@@ -46,20 +75,23 @@ try {
Write-Host 'Converting to markdown (parallel)...'
$toConvert | Convert-OpenSpecToMarkdown -OutputPath $convPath -Force -Parallel -ThrottleLimit $ThrottleLimit | Out-Null
+ Write-Host 'Repairing broken links...'
+ $repairScript = Join-Path $root 'scripts\Repair-AllBrokenLinks.ps1'
+ & $repairScript -Path $convPath -Parallel -ThrottleLimit $ThrottleLimit
+
Write-Host 'Building publish directory...'
New-Item -Path $pubPath -ItemType Directory -Force | Out-Null
Get-ChildItem -LiteralPath $convPath -Directory | ForEach-Object {
$name = $_.Name
$md = Join-Path $_.FullName "$name.md"
- if (-not (Test-Path -LiteralPath $md)) {
- $md = Join-Path $_.FullName 'index.md'
- }
+ if (-not (Test-Path -LiteralPath $md)) { $md = Join-Path $_.FullName 'README.md' }
+ if (-not (Test-Path -LiteralPath $md)) { $md = Join-Path $_.FullName 'index.md' }
if (-not (Test-Path -LiteralPath $md)) { return }
$dest = Join-Path $pubPath $name
New-Item -Path $dest -ItemType Directory -Force | Out-Null
- Copy-Item -LiteralPath $md -Destination (Join-Path $dest 'index.md') -Force
+ Copy-Item -LiteralPath $md -Destination $dest -Force
$media = Join-Path $_.FullName 'media'
if (Test-Path -LiteralPath $media -PathType Container) {
@@ -68,7 +100,14 @@ try {
}
Write-Host 'Updating index (README.md)...'
- Update-OpenSpecIndex -Path $pubPath
+ Update-OpenSpecIndex -Path $pubPath -Title $IndexTitle
+
+ if ($ZipPath) {
+ $zipFull = if ([System.IO.Path]::IsPathRooted($ZipPath)) { $ZipPath } else { Join-Path $root $ZipPath }
+ Write-Host "Creating $zipFull ..."
+ Compress-Archive -Path (Join-Path $pubPath '*') -DestinationPath $zipFull -Force
+ Write-Host "Zip created: $zipFull"
+ }
$entryCount = (Get-Content (Join-Path $pubPath 'README.md') | Select-String '^\| \[.*\]').Count
Write-Host "Done. Publish folder: $pubPath ($entryCount specs)"
diff --git a/scripts/Convert-TocToGitHubFriendly.ps1 b/scripts/Convert-TocToGitHubFriendly.ps1
new file mode 100644
index 00000000..5da299d9
--- /dev/null
+++ b/scripts/Convert-TocToGitHubFriendly.ps1
@@ -0,0 +1,12 @@
+# Run ConvertTo-OpenSpecGitHubFriendlyToc on a spec file.
+param([Parameter(Mandatory)][string]$Path)
+$ErrorActionPreference = 'Stop'
+$repoRoot = (Get-Item $PSScriptRoot).Parent.FullName
+. (Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private\Invoke-OpenSpecMarkdownCleanup.ps1')
+$md = Get-Content -LiteralPath $Path -Raw -Encoding UTF8
+$r = ConvertTo-OpenSpecGitHubFriendlyToc -Markdown $md
+Write-Host 'Rewritten:' $r.Rewritten
+if ($r.Rewritten) {
+ Set-Content -LiteralPath $Path -Value $r.Markdown -Encoding UTF8 -NoNewline
+ Write-Host 'File updated.'
+}
diff --git a/scripts/DownloadAndConvertAll.ps1 b/scripts/DownloadAndConvertAll.ps1
new file mode 100644
index 00000000..5f841e9f
--- /dev/null
+++ b/scripts/DownloadAndConvertAll.ps1
@@ -0,0 +1,15 @@
+# Download and convert all Windows protocol specs
+$ErrorActionPreference = 'Stop'
+$repoRoot = Split-Path -Parent $PSScriptRoot
+Set-Location $repoRoot
+
+Import-Module ./AwakeCoding.OpenSpecs/AwakeCoding.OpenSpecs.psd1 -Force
+
+$dl = Join-Path $repoRoot 'artifacts/downloads'
+$out = Join-Path $repoRoot 'artifacts/converted-specs'
+New-Item -Path $dl, $out -ItemType Directory -Force | Out-Null
+
+Get-OpenSpecCatalog -IncludeReferenceSpecs |
+ Save-OpenSpecDocument -Format DOCX -OutputPath $dl -Force |
+ Where-Object { $_.Status -in 'Downloaded', 'Exists' } |
+ Convert-OpenSpecToMarkdown -OutputPath $out -Force -Parallel -ThrottleLimit 4
diff --git a/scripts/Get-BrokenLinksReport.ps1 b/scripts/Get-BrokenLinksReport.ps1
new file mode 100644
index 00000000..08cd3cca
--- /dev/null
+++ b/scripts/Get-BrokenLinksReport.ps1
@@ -0,0 +1,142 @@
+<#
+.SYNOPSIS
+ Reports markdown link targets that have no matching anchor in the document.
+.DESCRIPTION
+ Scans one or more .md files for [text](#fragment) links and anchors,
+ then lists link targets with no matching anchor, grouped by category (gt_, Section_guid, Section_N.N).
+.EXAMPLE
+ .\Get-BrokenLinksReport.ps1 -Path artifacts\converted-specs\MS-RDPBCGR\MS-RDPBCGR.md
+.EXAMPLE
+ .\Get-BrokenLinksReport.ps1 -Path artifacts\converted-specs -OutputReport
+#>
+[CmdletBinding()]
+param(
+ [Parameter(Mandatory)]
+ [string]$Path,
+ [switch]$OutputReport
+)
+
+$ErrorActionPreference = 'Stop'
+$files = if (Test-Path -LiteralPath $Path -PathType Container) {
+ Get-ChildItem -LiteralPath $Path -Recurse -Filter '*.md' -File | Select-Object -ExpandProperty FullName
+} elseif (Test-Path -LiteralPath $Path -PathType Leaf) {
+ [System.IO.Path]::GetFullPath($Path)
+} else {
+ Write-Error "Path not found: $Path"
+}
+
+$linkRegex = [regex]::new('\[(?[^\]]+)\]\(#(?[^)]+)\)')
+$anchorRegex = [regex]::new('', 'IgnoreCase')
+
+foreach ($mdPath in $files) {
+ $content = Get-Content -LiteralPath $mdPath -Raw -Encoding UTF8
+ $anchors = [System.Collections.Generic.HashSet[string]]::new([StringComparer]::OrdinalIgnoreCase)
+ foreach ($m in $anchorRegex.Matches($content)) {
+ [void]$anchors.Add($m.Groups[1].Value)
+ }
+ $linkTargets = [System.Collections.Generic.Dictionary[string, [System.Collections.Generic.List[string]]]]::new([StringComparer]::OrdinalIgnoreCase)
+ $linkTextsByTarget = [System.Collections.Generic.Dictionary[string, [System.Collections.Generic.List[string]]]]::new([StringComparer]::OrdinalIgnoreCase)
+ foreach ($m in $linkRegex.Matches($content)) {
+ $target = $m.Groups['target'].Value
+ $text = ($m.Groups['text'].Value -replace '\*+', '').Trim()
+ if (-not $linkTargets.ContainsKey($target)) {
+ $linkTargets[$target] = [System.Collections.Generic.List[string]]::new()
+ }
+ $linkTargets[$target].Add($target) | Out-Null
+ if (-not $linkTextsByTarget.ContainsKey($target)) {
+ $linkTextsByTarget[$target] = [System.Collections.Generic.List[string]]::new()
+ }
+ if (-not [string]::IsNullOrWhiteSpace($text)) {
+ $linkTextsByTarget[$target].Add($text) | Out-Null
+ }
+ }
+ $broken = [System.Collections.Generic.List[string]]::new()
+ foreach ($t in $linkTargets.Keys) {
+ if (-not $anchors.Contains($t)) {
+ $broken.Add($t)
+ }
+ }
+ $gt = [System.Collections.Generic.List[string]]::new()
+ $sectionGuid = [System.Collections.Generic.List[string]]::new()
+ $sectionNum = [System.Collections.Generic.List[string]]::new()
+ foreach ($b in $broken) {
+ if ($b -match '^gt_[a-f0-9\-]{36}$') { $gt.Add($b) }
+ elseif ($b -match '^Section_[a-f0-9]{32}$') { $sectionGuid.Add($b) }
+ elseif ($b -match '^Section_\d+(?:\.\d+)*$') { $sectionNum.Add($b) }
+ }
+ $sectionGuidTextCounts = @{}
+ foreach ($target in $sectionGuid) {
+ if (-not $linkTextsByTarget.ContainsKey($target)) { continue }
+ foreach ($text in $linkTextsByTarget[$target]) {
+ if ([string]::IsNullOrWhiteSpace($text)) { continue }
+ if (-not $sectionGuidTextCounts.ContainsKey($text)) { $sectionGuidTextCounts[$text] = 0 }
+ $sectionGuidTextCounts[$text]++
+ }
+ }
+ $protocolId = [System.IO.Path]::GetFileNameWithoutExtension($mdPath)
+ $report = @"
+# Broken Links Report: $protocolId
+
+Generated from link targets that do not have a matching ```` in the document.
+
+## Summary
+
+| Category | Count | Description |
+|----------|-------|-------------|
+| **gt_ GUID** | $($gt.Count) | Glossary links still using Word bookmark IDs |
+| **Section_<32hex>** | $($sectionGuid.Count) | Section links using Word GUIDs |
+| **Section_X.Y.Z** (numeric) | $($sectionNum.Count) | Section number links with no anchor in doc |
+| **Other** | $($broken.Count - $gt.Count - $sectionGuid.Count - $sectionNum.Count) | Other unresolved fragments |
+| **Total broken** | **$($broken.Count)** | Unique link targets with no matching anchor |
+
+"@
+ if ($gt.Count -gt 0) {
+ $report += "`n## 1. Glossary (gt_) links - $($gt.Count) broken`n`n"
+ $report += ($gt | Sort-Object | ForEach-Object { "- ``$_``" }) -join "`n"
+ $report += "`n"
+ }
+ if ($sectionGuid.Count -gt 0) {
+ $report += "`n## 2. Section GUID links - $($sectionGuid.Count) broken`n`n"
+ $report += "Sample: " + (($sectionGuid | Sort-Object | Select-Object -First 5) -join ", ")
+ if ($sectionGuid.Count -gt 5) { $report += " ... and $($sectionGuid.Count - 5) more" }
+ $report += "`n"
+ if ($sectionGuidTextCounts.Count -gt 0) {
+ $topPatterns = $sectionGuidTextCounts.GetEnumerator() | Sort-Object -Property Value -Descending | Select-Object -First 10
+ $report += "`nTop unresolved Section GUID link texts:`n"
+ foreach ($p in $topPatterns) {
+ $report += "- ``$($p.Key)`` ($($p.Value))`n"
+ }
+ }
+ }
+ if ($sectionNum.Count -gt 0) {
+ $report += "`n## 3. Section number links - $($sectionNum.Count) broken`n`n"
+ $report += "Sample: " + (($sectionNum | Sort-Object | Select-Object -First 10) -join ", ")
+ if ($sectionNum.Count -gt 10) { $report += " ... and $($sectionNum.Count - 10) more" }
+ $report += "`n"
+ }
+ Write-Host "=== $protocolId ==="
+ Write-Host "Broken: $($broken.Count) (gt_: $($gt.Count), Section_guid: $($sectionGuid.Count), Section_N.N: $($sectionNum.Count))"
+ $conversionReportPath = Join-Path -Path ([System.IO.Path]::GetDirectoryName($mdPath)) -ChildPath 'artifacts\conversion-report.json'
+ if (Test-Path -LiteralPath $conversionReportPath) {
+ $conv = Get-Content -LiteralPath $conversionReportPath -Raw -Encoding UTF8 | ConvertFrom-Json
+ $deterministic = 0
+ $heuristic = 0
+ foreach ($issue in @($conv.Issues)) {
+ switch ([string]$issue.Type) {
+ 'GuidAnchorResolved' { $deterministic += [int]$issue.Count; break }
+ 'GlossaryAnchorsAndLinks' { $deterministic += [int]$issue.SourceMapLinksRepaired; $heuristic += ([int]$issue.LinksRepaired - [int]$issue.SourceMapLinksRepaired); break }
+ 'SectionGuidLinksRepairedByHeading' { $heuristic += [int]$issue.Count; break }
+ 'SectionNumberLinksRepaired' { $heuristic += [int]$issue.Count; break }
+ }
+ }
+ Write-Host "Repairs (conversion report): deterministic=$deterministic heuristic=$heuristic"
+ $report += "`n## Repair source diagnostics`n`n"
+ $report += "- Deterministic repairs (source-map driven): **$deterministic**`n"
+ $report += "- Heuristic repairs (text/title matching): **$heuristic**`n"
+ }
+ if ($OutputReport) {
+ $reportPath = [System.IO.Path]::Combine([System.IO.Path]::GetDirectoryName($mdPath), 'broken-links-report.md')
+ $report | Set-Content -LiteralPath $reportPath -Encoding UTF8 -NoNewline
+ Write-Host "Report written: $reportPath"
+ }
+}
diff --git a/scripts/Get-BrokenLinksSummary.ps1 b/scripts/Get-BrokenLinksSummary.ps1
new file mode 100644
index 00000000..cb63ee74
--- /dev/null
+++ b/scripts/Get-BrokenLinksSummary.ps1
@@ -0,0 +1,33 @@
+# Aggregate broken link counts from Get-BrokenLinksReport output
+param([string]$Path = (Join-Path (Get-Location) 'artifacts\converted-specs'))
+
+$reportScript = Join-Path $PSScriptRoot 'Get-BrokenLinksReport.ps1'
+$tmp = [System.IO.Path]::GetTempFileName()
+try {
+ & $reportScript -Path $Path -OutputReport *> $tmp
+ $lines = Get-Content -LiteralPath $tmp -Encoding utf8
+} finally { Remove-Item -LiteralPath $tmp -ErrorAction SilentlyContinue }
+$total = 0
+$gtTotal = 0
+$sectionGuidTotal = 0
+$sectionNumTotal = 0
+$specsWithBroken = 0
+
+foreach ($line in $lines) {
+ $s = [string]$line
+ if ($s -match 'Broken: (\d+).*gt_: (\d+).*Section_guid: (\d+).*Section_N\.N: (\d+)') {
+ $n = [int]$Matches[1]
+ if ($n -gt 0) {
+ $total += $n
+ $gtTotal += [int]$Matches[2]
+ $sectionGuidTotal += [int]$Matches[3]
+ $sectionNumTotal += [int]$Matches[4]
+ $specsWithBroken++
+ }
+ }
+}
+Write-Host "Total broken link targets: $total"
+Write-Host " - gt_ (glossary): $gtTotal"
+Write-Host " - Section_<32hex>: $sectionGuidTotal"
+Write-Host " - Section_N.N: $sectionNumTotal"
+Write-Host "Files with broken links: $specsWithBroken"
diff --git a/scripts/Remove-FrontMatterBoilerplate.ps1 b/scripts/Remove-FrontMatterBoilerplate.ps1
new file mode 100644
index 00000000..9cc3531f
--- /dev/null
+++ b/scripts/Remove-FrontMatterBoilerplate.ps1
@@ -0,0 +1,12 @@
+# One-off: run Remove-OpenSpecFrontMatterBoilerplate on a spec file.
+param([Parameter(Mandatory)][string]$Path)
+$ErrorActionPreference = 'Stop'
+$repoRoot = (Get-Item $PSScriptRoot).Parent.FullName
+. (Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private\Invoke-OpenSpecMarkdownCleanup.ps1')
+$md = Get-Content -LiteralPath $Path -Raw -Encoding UTF8
+$r = Remove-OpenSpecFrontMatterBoilerplate -Markdown $md
+Write-Host 'Removed:' $r.Removed
+if ($r.Removed) {
+ Set-Content -LiteralPath $Path -Value $r.Markdown -Encoding UTF8 -NoNewline
+ Write-Host 'File updated.'
+}
diff --git a/scripts/Repair-AllBrokenLinks.ps1 b/scripts/Repair-AllBrokenLinks.ps1
new file mode 100644
index 00000000..1b4aa72c
--- /dev/null
+++ b/scripts/Repair-AllBrokenLinks.ps1
@@ -0,0 +1,96 @@
+<#
+.SYNOPSIS
+ Runs Section GUID and Glossary link repairs on all converted specs.
+.DESCRIPTION
+ Iterates over main .md files in converted-specs, runs Repair-OpenSpecSectionGuidLinksByHeadingMatch
+ and Add-OpenSpecGlossaryAnchorsAndRepairLinks, and overwrites files when repairs are made.
+.EXAMPLE
+ .\Repair-AllBrokenLinks.ps1 -Path artifacts\converted-specs
+#>
+[CmdletBinding()]
+param(
+ [Parameter()]
+ [string]$Path = (Join-Path (Get-Location) 'artifacts\converted-specs'),
+ [switch]$WhatIf,
+ [switch]$Parallel,
+ [int]$ThrottleLimit = 8
+)
+
+$ErrorActionPreference = 'Stop'
+$repoRoot = (Get-Item $PSScriptRoot).Parent.FullName
+$cleanupPath = Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private\Invoke-OpenSpecMarkdownCleanup.ps1'
+if (-not (Test-Path -LiteralPath $cleanupPath)) {
+ Write-Error "Cleanup script not found: $cleanupPath"
+}
+. $cleanupPath
+
+$resolved = [System.IO.Path]::GetFullPath($Path)
+if (-not (Test-Path -LiteralPath $resolved -PathType Container)) {
+ Write-Error "Path not found: $resolved"
+}
+
+# Main spec files: /.md, exclude artifacts subdirs and reports
+$specFiles = @(Get-ChildItem -LiteralPath $resolved -Directory | ForEach-Object {
+ $dir = $_
+ $name = $dir.Name
+ $mdPath = Join-Path $dir.FullName "$name.md"
+ if (Test-Path -LiteralPath $mdPath -PathType Leaf) { $mdPath }
+} | Where-Object { $_ })
+
+$useParallel = $Parallel -and $PSVersionTable.PSVersion.Major -ge 7 -and $specFiles.Count -gt 1
+$whatIfArg = $WhatIf
+
+if ($useParallel) {
+ $results = $specFiles | ForEach-Object -Parallel {
+ . $using:cleanupPath
+ $mdPath = $_
+ $content = Get-Content -LiteralPath $mdPath -Raw -Encoding UTF8
+ $sectionResult = Repair-OpenSpecSectionGuidLinksByHeadingMatch -Markdown $content
+ $content = $sectionResult.Markdown
+ $glossaryResult = Add-OpenSpecGlossaryAnchorsAndRepairLinks -Markdown $content
+ $content = $glossaryResult.Markdown
+ $changed = ($sectionResult.LinksRepaired -gt 0) -or ($glossaryResult.AnchorsInjected -gt 0) -or ($glossaryResult.LinksRepaired -gt 0)
+ if ($changed -and -not $using:whatIfArg) {
+ Set-Content -LiteralPath $mdPath -Value $content -Encoding UTF8 -NoNewline
+ }
+ [pscustomobject]@{
+ SectionRepaired = $sectionResult.LinksRepaired
+ GlossaryRepaired = $glossaryResult.LinksRepaired
+ AnchorsInjected = $glossaryResult.AnchorsInjected
+ Updated = $changed -and -not $using:whatIfArg
+ SpecName = [System.IO.Path]::GetFileName([System.IO.Path]::GetDirectoryName($mdPath))
+ }
+ } -ThrottleLimit $ThrottleLimit
+
+ $totalSection = ($results | Measure-Object -Property SectionRepaired -Sum).Sum
+ $totalGlossary = ($results | Measure-Object -Property GlossaryRepaired -Sum).Sum
+ $updated = ($results | Where-Object Updated).Count
+ foreach ($r in ($results | Where-Object Updated)) {
+ Write-Host "Updated: $($r.SpecName) (Section:$($r.SectionRepaired) Glossary:$($r.GlossaryRepaired)+$($r.AnchorsInjected))"
+ }
+}
+else {
+ $totalSection = 0
+ $totalGlossary = 0
+ $updated = 0
+ foreach ($mdPath in $specFiles) {
+ $content = Get-Content -LiteralPath $mdPath -Raw -Encoding UTF8
+ $sectionResult = Repair-OpenSpecSectionGuidLinksByHeadingMatch -Markdown $content
+ $content = $sectionResult.Markdown
+ $totalSection += $sectionResult.LinksRepaired
+
+ $glossaryResult = Add-OpenSpecGlossaryAnchorsAndRepairLinks -Markdown $content
+ $content = $glossaryResult.Markdown
+ $totalGlossary += $glossaryResult.LinksRepaired
+
+ $changed = ($sectionResult.LinksRepaired -gt 0) -or ($glossaryResult.AnchorsInjected -gt 0) -or ($glossaryResult.LinksRepaired -gt 0)
+ if ($changed -and -not $WhatIf) {
+ Set-Content -LiteralPath $mdPath -Value $content -Encoding UTF8 -NoNewline
+ $updated++
+ $rel = [System.IO.Path]::GetFileName([System.IO.Path]::GetDirectoryName($mdPath))
+ Write-Host "Updated: $rel (Section:$($sectionResult.LinksRepaired) Glossary:$($glossaryResult.LinksRepaired)+$($glossaryResult.AnchorsInjected))"
+ }
+ }
+}
+
+Write-Host "`nTotal: Section GUID links repaired=$totalSection, Glossary links repaired=$totalGlossary, Files updated=$updated"
diff --git a/scripts/Repair-GlossaryLinks.ps1 b/scripts/Repair-GlossaryLinks.ps1
new file mode 100644
index 00000000..dada2c32
--- /dev/null
+++ b/scripts/Repair-GlossaryLinks.ps1
@@ -0,0 +1,12 @@
+# Run Add-OpenSpecGlossaryAnchorsAndRepairLinks on a spec file to fix gt_ GUID links.
+param([Parameter(Mandatory)][string]$Path)
+$ErrorActionPreference = 'Stop'
+$repoRoot = (Get-Item $PSScriptRoot).Parent.FullName
+. (Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private\Invoke-OpenSpecMarkdownCleanup.ps1')
+$md = Get-Content -LiteralPath $Path -Raw -Encoding UTF8
+$r = Add-OpenSpecGlossaryAnchorsAndRepairLinks -Markdown $md
+Write-Host 'AnchorsInjected:' $r.AnchorsInjected 'LinksRepaired:' $r.LinksRepaired
+if ($r.LinksRepaired -gt 0) {
+ Set-Content -LiteralPath $Path -Value $r.Markdown -Encoding UTF8 -NoNewline
+ Write-Host 'File updated.'
+}
diff --git a/scripts/Repair-MissingSectionAnchors.ps1 b/scripts/Repair-MissingSectionAnchors.ps1
new file mode 100644
index 00000000..cdc7fdb1
--- /dev/null
+++ b/scripts/Repair-MissingSectionAnchors.ps1
@@ -0,0 +1,36 @@
+<#
+.SYNOPSIS
+ Injects missing Section_N.N anchors into an already-converted spec using TOC titles.
+.DESCRIPTION
+ Runs Add-OpenSpecMissingSectionAnchorsFromToc on the given markdown file and overwrites it.
+ Use this to fix "Section_X.Y.Z (numeric)" broken links without reconverting from DOCX.
+.EXAMPLE
+ .\Repair-MissingSectionAnchors.ps1 -Path artifacts\converted-specs\MS-RDPBCGR\MS-RDPBCGR.md
+#>
+[CmdletBinding()]
+param(
+ [Parameter(Mandatory)]
+ [string]$Path
+)
+
+$ErrorActionPreference = 'Stop'
+$fullPath = [System.IO.Path]::GetFullPath($Path)
+if (-not (Test-Path -LiteralPath $fullPath -PathType Leaf)) {
+ Write-Error "File not found: $fullPath"
+}
+
+$repoRoot = (Get-Item $PSScriptRoot).Parent.FullName
+$privateScript = Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private\Invoke-OpenSpecMarkdownCleanup.ps1'
+if (-not (Test-Path -LiteralPath $privateScript -PathType Leaf)) {
+ Write-Error "Cleanup script not found: $privateScript"
+}
+
+. $privateScript
+$markdown = Get-Content -LiteralPath $fullPath -Raw -Encoding UTF8
+$result = Add-OpenSpecMissingSectionAnchorsFromToc -Markdown $markdown
+if ($result.InjectedCount -gt 0) {
+ $result.Markdown | Set-Content -LiteralPath $fullPath -Encoding UTF8 -NoNewline
+ Write-Host "Injected $($result.InjectedCount) missing section anchor(s). File updated: $fullPath"
+} else {
+ Write-Host "No missing section anchors to inject."
+}
diff --git a/scripts/Repair-SectionGuidLinks.ps1 b/scripts/Repair-SectionGuidLinks.ps1
new file mode 100644
index 00000000..50c7b98a
--- /dev/null
+++ b/scripts/Repair-SectionGuidLinks.ps1
@@ -0,0 +1,12 @@
+# Run Repair-OpenSpecSectionGuidLinksByHeadingMatch on a spec file.
+param([Parameter(Mandatory)][string]$Path)
+$ErrorActionPreference = 'Stop'
+$repoRoot = (Get-Item $PSScriptRoot).Parent.FullName
+. (Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private\Invoke-OpenSpecMarkdownCleanup.ps1')
+$md = Get-Content -LiteralPath $Path -Raw -Encoding UTF8
+$r = Repair-OpenSpecSectionGuidLinksByHeadingMatch -Markdown $md
+Write-Host 'LinksRepaired:' $r.LinksRepaired
+if ($r.LinksRepaired -gt 0) {
+ Set-Content -LiteralPath $Path -Value $r.Markdown -Encoding UTF8 -NoNewline
+ Write-Host 'File updated.'
+}
diff --git a/scripts/Test-DocxLinkMetadataCapture.ps1 b/scripts/Test-DocxLinkMetadataCapture.ps1
new file mode 100644
index 00000000..da269204
--- /dev/null
+++ b/scripts/Test-DocxLinkMetadataCapture.ps1
@@ -0,0 +1,49 @@
+[CmdletBinding()]
+param(
+ [Parameter(Mandatory)]
+ [string]$DocxPath,
+
+ [Parameter(Mandatory)]
+ [string]$OutputMarkdownPath
+)
+
+$ErrorActionPreference = 'Stop'
+$repoRoot = (Get-Item $PSScriptRoot).Parent.FullName
+$privateDir = Join-Path $repoRoot 'AwakeCoding.OpenSpecs\Private'
+Get-ChildItem -LiteralPath $privateDir -Filter '*.ps1' | ForEach-Object {
+ . $_.FullName
+}
+
+$toolchain = [pscustomobject]@{
+ HasOpenXml = $false
+}
+
+try {
+ $result = ConvertFrom-OpenSpecDocxWithOpenXml -InputPath $DocxPath -OutputPath $OutputMarkdownPath -Toolchain $toolchain
+}
+catch {
+ Write-Host "Exception type: $($_.Exception.GetType().FullName)"
+ Write-Host "Message: $($_.Exception.Message)"
+ if ($_.InvocationInfo) {
+ Write-Host "Position: $($_.InvocationInfo.PositionMessage)"
+ }
+ if ($_.ScriptStackTrace) {
+ Write-Host "Stack:"
+ Write-Host $_.ScriptStackTrace
+ }
+ if ($_.Exception.InnerException) {
+ Write-Host "Inner: $($_.Exception.InnerException.GetType().FullName): $($_.Exception.InnerException.Message)"
+ }
+ throw
+}
+
+if (-not $result.PSObject.Properties['LinkMetadata']) {
+ throw 'LinkMetadata was not returned from DOCX conversion step.'
+}
+
+$meta = $result.LinkMetadata
+Write-Host "GuidToSection: $($meta.GuidToSection.Count)"
+Write-Host "SectionToTitle: $($meta.SectionToTitle.Count)"
+Write-Host "TocAlias: $($meta.TocAlias.Count)"
+Write-Host "GuidToGlossarySlug: $($meta.GuidToGlossarySlug.Count)"
+Write-Host "InternalHyperlinks: $(@($meta.InternalHyperlinks).Count)"