Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
artifacts/
downloads*/
converted*/
reports*/
reports*/
publish/
95 changes: 95 additions & 0 deletions AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
}
$inGlossary = $false
$glossaryHeadingLevel = 0
$pendingSectionGuids = New-Object System.Collections.Generic.List[string]

# Resolve media output directory for image extraction.
$resolvedMediaDir = $null
Expand Down Expand Up @@ -173,6 +174,14 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {

$sectionAnchor = $anchorInfo.SectionAnchor
if (-not [string]::IsNullOrWhiteSpace($sectionAnchor)) {
# Strategy B: Resolve cross-paragraph section GUIDs from previous paragraph
foreach ($g in $pendingSectionGuids) {
if (-not $linkMetadata.GuidToSection.ContainsKey($g)) {
$linkMetadata.GuidToSection[$g] = $sectionAnchor
}
}
$pendingSectionGuids.Clear()

if (-not $linkMetadata.SectionToTitle.ContainsKey($sectionAnchor)) {
$linkMetadata.SectionToTitle[$sectionAnchor] = $headingText
}
Expand Down Expand Up @@ -215,6 +224,18 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
}
}
}

# Strategy B: Paragraph has section_<guid> bookmarks but no SectionAnchor — defer to next paragraph
if ([string]::IsNullOrWhiteSpace($anchorInfo.SectionAnchor)) {
foreach ($bookmarkName in @($anchorInfo.BookmarkNames)) {
if ($bookmarkName -match '(?i)^section_(?<guid>[a-f0-9]{32})$') {
$g = $Matches['guid'].ToLowerInvariant()
if (-not $linkMetadata.GuidToSection.ContainsKey($g) -and -not $pendingSectionGuids.Contains($g)) {
[void]$pendingSectionGuids.Add($g)
}
}
}
}
}
elseif ($child.LocalName -eq 'tbl') {
$tableLines = ConvertFrom-OpenSpecOpenXmlTable -TableNode $child -NamespaceManager $nsmgr -RelationshipMap $relationshipMap -Archive $archive -MediaOutputDirectory $resolvedMediaDir
Expand All @@ -231,6 +252,80 @@ function ConvertFrom-OpenSpecDocxWithOpenXml {
throw 'OpenXml conversion produced empty markdown output.'
}

# Strategy A: Build GuidToSection from InternalHyperlinks by matching link text to SectionToTitle
if ($linkMetadata.SectionToTitle.Count -eq 0) {
$headingRegex = [regex]::new('^(?<level>#{1,6})\s+(?<num>\d+(?:\.\d+)*)\s+(?<title>.+)$', [System.Text.RegularExpressions.RegexOptions]::Multiline)
foreach ($m in $headingRegex.Matches($markdown)) {
$sectionAnchor = "Section_$($m.Groups['num'].Value)"
$fullTitle = "$($m.Groups['num'].Value) $($m.Groups['title'].Value.Trim())"
if (-not $linkMetadata.SectionToTitle.ContainsKey($sectionAnchor)) {
$linkMetadata.SectionToTitle[$sectionAnchor] = $fullTitle
}
}
}
$titleToSection = @{}
foreach ($entry in $linkMetadata.SectionToTitle.GetEnumerator()) {
$key = [string]$entry.Key
$val = ([string]$entry.Value -replace '\s+', ' ').Trim()
if (-not [string]::IsNullOrWhiteSpace($val)) {
$titleToSection[$val] = $key
$withoutNum = ($val -replace '^\d+(?:\.\d+)*\s+', '').Trim()
if ($withoutNum -and -not $titleToSection.ContainsKey($withoutNum)) {
$titleToSection[$withoutNum] = $key
}
}
}
$sectionGuidRegex = [regex]::new('^(?:[Ss]ection_)?([a-f0-9]{32})$')
$internalLinksArray = $linkMetadata.InternalHyperlinks.ToArray()
foreach ($link in $internalLinksArray) {
$anchor = [string]$link.Anchor
$text = ([string]$link.Text -replace '\s+', ' ').Trim()
$m = $sectionGuidRegex.Match($anchor)
if (-not $m.Success) { continue }
$guid = $m.Groups[1].Value.ToLowerInvariant()
if ($linkMetadata.GuidToSection.ContainsKey($guid)) { continue }
$matchedSection = $null
if ($titleToSection.ContainsKey($text)) {
$matchedSection = $titleToSection[$text]
}
else {
foreach ($tit in $titleToSection.Keys) {
if ($tit -eq $text) { $matchedSection = $titleToSection[$tit]; break }
$textEsc = [Management.Automation.WildcardPattern]::Escape($text)
$titEsc = [Management.Automation.WildcardPattern]::Escape($tit)
if ($tit -like "*$textEsc*" -and $text.Length -ge 8) { $matchedSection = $titleToSection[$tit]; break }
if ($text -like "*$titEsc*" -and $tit.Length -ge 8) { $matchedSection = $titleToSection[$tit]; break }
}
}
if ($matchedSection) {
$linkMetadata.GuidToSection[$guid] = $matchedSection
}
}

# Strategy D: Build GuidToGlossarySlug from InternalHyperlinks with gt_<guid> anchors
$termToSlug = @{}
$glossaryDefRegex = [regex]::new('^\s*\*\*(?<term>[^*]+)\*\*\s*:\s*', [System.Text.RegularExpressions.RegexOptions]::Multiline)
foreach ($gm in $glossaryDefRegex.Matches($markdown)) {
$term = $gm.Groups['term'].Value.Trim()
$slug = Get-OpenSpecGlossarySlugFromTerm -Term $term
$termToSlug[$term] = $slug
if ($term -match '^(.+?)\s+\(([^)]+)\)\s*$') {
$termToSlug[$Matches[2].Trim()] = $slug
}
}
$gtGuidRegex = [regex]::new('^gt_([a-f0-9\-]{36})$', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
foreach ($link in $internalLinksArray) {
$anchor = [string]$link.Anchor
$text = ([string]$link.Text -replace '\s+', ' ').Trim()
$m = $gtGuidRegex.Match($anchor)
if (-not $m.Success) { continue }
$guid = $m.Groups[1].Value.ToLowerInvariant()
if ($linkMetadata.GuidToGlossarySlug.ContainsKey($guid)) { continue }
if ($termToSlug.ContainsKey($text)) {
$linkMetadata.GuidToGlossarySlug[$guid] = $termToSlug[$text]
}
}

$linkMetadata.Stats.GuidSectionMapCount = $linkMetadata.GuidToSection.Count
$linkMetadata.Stats.TocAliasCount = $linkMetadata.TocAlias.Count
$linkMetadata.Stats.GlossaryGuidMapCount = $linkMetadata.GuidToGlossarySlug.Count
Expand Down
136 changes: 124 additions & 12 deletions AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ function Invoke-OpenSpecMarkdownCleanup {
$result = $tocResult.Markdown
foreach ($issue in $tocResult.Issues) { [void]$issues.Add($issue) }

$sourceGuidToSection = if ($SourceLinkMetadata -and $SourceLinkMetadata.PSObject.Properties['GuidToSection']) { $SourceLinkMetadata.GuidToSection } else { $null }
$sourceGuidToSection = if ($SourceLinkMetadata -and $SourceLinkMetadata.GuidToSection) { $SourceLinkMetadata.GuidToSection } else { $null }
$guidResult = Resolve-OpenSpecGuidSectionAnchors -Markdown $result -GuidToSectionMap $sourceGuidToSection
$result = $guidResult.Markdown
foreach ($issue in $guidResult.Issues) { [void]$issues.Add($issue) }
Expand Down Expand Up @@ -110,7 +110,7 @@ function Invoke-OpenSpecMarkdownCleanup {
})
}

$sourceSectionToTitle = if ($SourceLinkMetadata -and $SourceLinkMetadata.PSObject.Properties['SectionToTitle']) { $SourceLinkMetadata.SectionToTitle } else { $null }
$sourceSectionToTitle = if ($SourceLinkMetadata -and $null -ne $SourceLinkMetadata.SectionToTitle) { $SourceLinkMetadata.SectionToTitle } else { $null }
$guidByHeadingResult = Repair-OpenSpecSectionGuidLinksByHeadingMatch -Markdown $result -SectionToTitleMap $sourceSectionToTitle
$result = $guidByHeadingResult.Markdown
if ($guidByHeadingResult.LinksRepaired -gt 0) {
Expand All @@ -122,7 +122,7 @@ function Invoke-OpenSpecMarkdownCleanup {
})
}

$sourceGuidToGlossarySlug = if ($SourceLinkMetadata -and $SourceLinkMetadata.PSObject.Properties['GuidToGlossarySlug']) { $SourceLinkMetadata.GuidToGlossarySlug } else { $null }
$sourceGuidToGlossarySlug = if ($SourceLinkMetadata -and $null -ne $SourceLinkMetadata.GuidToGlossarySlug) { $SourceLinkMetadata.GuidToGlossarySlug } else { $null }
$glossaryResult = Add-OpenSpecGlossaryAnchorsAndRepairLinks -Markdown $result -GuidToGlossarySlugMap $sourceGuidToGlossarySlug
$result = $glossaryResult.Markdown
if ($glossaryResult.AnchorsInjected -gt 0 -or $glossaryResult.LinksRepaired -gt 0) {
Expand All @@ -146,14 +146,26 @@ function Invoke-OpenSpecMarkdownCleanup {
})
}

$extractedRev = $frontMatterResult.ExtractedRevisionHistory
if ($extractedRev) {
$result = Add-OpenSpecRevisionHistorySectionAtEnd -Markdown $result -RevisionHistory $extractedRev
}

$legalResult = Add-LegalNoticeLinkAfterToc -Markdown $result -LastUpdated $frontMatterResult.LastUpdated -HasRevisionHistory:($null -ne $extractedRev)
$result = $legalResult.Markdown

$newLine = [Environment]::NewLine
$result = [regex]::Replace($result, "(`r?`n){3,}", "$newLine$newLine")

[pscustomobject]@{
$out = [pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.MarkdownCleanupResult'
Markdown = $result
Issues = $issues.ToArray()
}
if ($frontMatterResult.Removed -and $frontMatterResult.PSObject.Properties['ExtractedBoilerplate']) {
Add-Member -InputObject $out -NotePropertyName 'ExtractedBoilerplate' -NotePropertyValue $frontMatterResult.ExtractedBoilerplate
}
$out
}

function ConvertFrom-OpenSpecHtmlTables {
Expand Down Expand Up @@ -935,6 +947,75 @@ function ConvertTo-OpenSpecGitHubFriendlyToc {
}
}

function Add-OpenSpecRevisionHistorySectionAtEnd {
[CmdletBinding()]
param(
[Parameter(Mandatory)]
[string]$Markdown,
[Parameter(Mandatory)]
[string]$RevisionHistory
)
$newLine = [Environment]::NewLine
$section = $newLine + $newLine + "<a id=`"revision-history`"></a>" + $newLine + $newLine + "## Revision History" + $newLine + $newLine + $RevisionHistory.Trim()
return $Markdown.TrimEnd() + $section
}

function Add-LegalNoticeLinkAfterToc {
[CmdletBinding()]
param(
[Parameter(Mandatory)]
[string]$Markdown,
[Parameter()]
[string]$LastUpdated,
[Parameter()]
[switch]$HasRevisionHistory
)

$newLine = [Environment]::NewLine
$lines = New-Object System.Collections.Generic.List[string]
$lines.Add("For the legal notice and IP terms, see [LEGAL.md](../LEGAL.md).")
if ($LastUpdated) {
$lines.Add("Last updated: $LastUpdated.")
}
if ($HasRevisionHistory) {
$lines.Add("See [Revision History](#revision-history) for full version history.")
}
$legalBlock = $lines -join $newLine

$sectionAnchorRegex = [regex]::new('<a\s+id="Section_\d', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
$firstSectionMatch = $sectionAnchorRegex.Match($Markdown)
if (-not $firstSectionMatch.Success) {
return [pscustomobject]@{ Markdown = $Markdown }
}
$beforeContent = $Markdown.Substring(0, $firstSectionMatch.Index)

$detailsCloseRegex = [regex]::new('</details>', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
$lastDetailsMatch = $null
foreach ($m in $detailsCloseRegex.Matches($beforeContent)) {
$lastDetailsMatch = $m
}
if (-not $lastDetailsMatch) {
return [pscustomobject]@{ Markdown = $Markdown }
}

$insertEnd = $lastDetailsMatch.Index + $lastDetailsMatch.Length
$trailing = $beforeContent.Substring($insertEnd)
$trailingNewlines = ''
if ($trailing -match '^(\r?\n)+') {
$trailingNewlines = $Matches[1]
$insertEnd += $Matches[1].Length
}
$before = $Markdown.Substring(0, $insertEnd)
$after = $Markdown.Substring($insertEnd)

$insertion = $trailingNewlines + $legalBlock + $newLine + $newLine
$result = $before + $insertion + $after

[pscustomobject]@{
Markdown = $result
}
}

function ConvertTo-OpenSpecNormalizedEncodedBracketUrls {
[CmdletBinding()]
param(
Expand Down Expand Up @@ -1387,6 +1468,9 @@ function Remove-OpenSpecFrontMatterBoilerplate {
$result = $Markdown
$removed = $false
$newLine = [Environment]::NewLine
$blockContent = $null
$extractedRevisionHistory = $null
$lastUpdated = $null

# Block from "Intellectual Property Rights Notice" (or similar) through the revision table, ending before "Table of Contents".
$blockRegex = [regex]::new(
Expand All @@ -1396,28 +1480,56 @@ function Remove-OpenSpecFrontMatterBoilerplate {
$match = $blockRegex.Match($result)
if ($match.Success) {
$blockContent = $match.Groups[2].Value
$lastUpdated = $null
$dateRowRegex = [regex]::new('\|\s*(\d{1,2}/\d{1,2}/\d{4})\s*\|')
$dateMatches = $dateRowRegex.Matches($blockContent)
if ($dateMatches.Count -gt 0) {
$lastMatch = $dateMatches[$dateMatches.Count - 1]
$lastUpdated = $lastMatch.Groups[1].Value
}
$replacement = $match.Groups[1].Value
if ($lastUpdated) {
$replacement += "Last updated: $lastUpdated" + $newLine + $newLine
} else {
$replacement += $match.Groups[3].Value

# Split into legal part (for LEGAL.md) and revision history (for end of document).
$revisionStartRegex = [regex]::new('\*\*Revision Summary\*\*', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
$revMatch = $revisionStartRegex.Match($blockContent)
if ($revMatch.Success) {
$legalPart = $blockContent.Substring(0, $revMatch.Index).TrimEnd()
$revisionPart = $blockContent.Substring($revMatch.Index).Trim()
$extractedRevisionHistory = ConvertTo-OpenSpecGfmRevisionTable -RevisionMarkdown $revisionPart
$blockContent = $legalPart
}
$replacement += $match.Groups[4].Value

$replacement = $match.Groups[1].Value + $match.Groups[4].Value
$result = $result.Substring(0, $match.Index) + $replacement + $result.Substring($match.Index + $match.Length)
$removed = $true
}

[pscustomobject]@{
$out = [pscustomobject]@{
Markdown = $result
Removed = $removed
}
if ($removed -and $blockContent) {
Add-Member -InputObject $out -NotePropertyName 'ExtractedBoilerplate' -NotePropertyValue $blockContent
}
if ($extractedRevisionHistory) {
Add-Member -InputObject $out -NotePropertyName 'ExtractedRevisionHistory' -NotePropertyValue $extractedRevisionHistory
}
if ($lastUpdated) {
Add-Member -InputObject $out -NotePropertyName 'LastUpdated' -NotePropertyValue $lastUpdated
}
$out
}

function ConvertTo-OpenSpecGfmRevisionTable {
[CmdletBinding()]
param(
[Parameter(Mandatory)]
[string]$RevisionMarkdown
)
$result = $RevisionMarkdown.Trim()
# Remove standalone **Revision Summary** or **Revision History** line (heading comes from Add-OpenSpecRevisionHistorySectionAtEnd).
$result = $result -replace '(?im)^\s*\*\*Revision (?:Summary|History)\*\*\s*\r?\n', ''
# Rename table header column "Revision History" to "Version" for GFM clarity (version numbers column).
$result = $result -replace '(?m)^(\|\s*Date\s*\|)\s*Revision History\s*(\|\s*Revision Class\s*\|)', '$1 Version $2'
return $result.Trim()
}

function Add-OpenSpecSectionAnchors {
Expand Down
13 changes: 13 additions & 0 deletions AwakeCoding.OpenSpecs/Public/Convert-OpenSpecToMarkdown.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,19 @@ function Convert-OpenSpecToMarkdown {

$cleaned.Markdown | Set-Content -LiteralPath $markdownPath -Encoding UTF8

if ($protocolId -eq 'MS-DTYP' -and $cleaned.PSObject.Properties['ExtractedBoilerplate']) {
$legalDir = Join-Path -Path $OutputPath -ChildPath '_legal'
if (-not (Test-Path -LiteralPath $legalDir)) {
[void](New-Item -Path $legalDir -ItemType Directory -Force)
}
$legalContent = $cleaned.ExtractedBoilerplate.Trim()
if ($legalContent -and -not ($legalContent -match '^(#|\*\*[^*]+\*\*)')) {
$legalContent = "# Intellectual Property Rights Notice for Open Specifications Documentation`n`n" + $legalContent
}
$legalPath = Join-Path -Path $legalDir -ChildPath 'LEGAL.md'
$legalContent | Set-Content -LiteralPath $legalPath -Encoding UTF8
}

$layoutModelPath = Join-Path -Path $artifactDirectory -ChildPath 'layout-model.json'
$allIssues | ConvertTo-Json -Depth 8 | Set-Content -LiteralPath $layoutModelPath -Encoding UTF8

Expand Down
Loading