Skip to content
Merged
  •  
  •  
  •  
37 changes: 33 additions & 4 deletions .github/workflows/convert-and-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,55 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v4

- name: Install oxipng
shell: pwsh
run: |
$release = Invoke-RestMethod -Uri 'https://api.github.com/repos/oxipng/oxipng/releases/latest'
$asset = $release.assets | Where-Object { $_.name -match 'x86_64-pc-windows-msvc\.zip$' } | Select-Object -First 1
if (-not $asset) {
throw 'Could not find Windows x86_64 zip asset in latest oxipng release.'
}
$zipPath = Join-Path $env:RUNNER_TEMP $asset.name
Invoke-WebRequest -Uri $asset.browser_download_url -OutFile $zipPath
$extractPath = Join-Path $env:RUNNER_TEMP 'oxipng'
if (Test-Path -LiteralPath $extractPath) {
Remove-Item -LiteralPath $extractPath -Recurse -Force
}
Expand-Archive -LiteralPath $zipPath -DestinationPath $extractPath -Force
$binPath = Get-ChildItem -LiteralPath $extractPath -Recurse -File -Filter 'oxipng.exe' | Select-Object -First 1
if (-not $binPath) {
throw 'oxipng.exe was not found after extracting release archive.'
}
$binDir = Split-Path -Path $binPath.FullName -Parent
$env:PATH = "$binDir;$env:PATH"
Add-Content -Path $env:GITHUB_PATH -Value $binDir
& $binPath.FullName --version

- name: Install OpenXML module
shell: pwsh
run: |
Set-PSRepository -Name PSGallery -InstallationPolicy Trusted
Install-Module -Name OpenXML -Force -Scope CurrentUser

- name: Build publish tree and Windows_Protocols.zip
- name: Build publish tree and windows-protocols.zip
shell: pwsh
working-directory: ${{ github.workspace }}
run: .\scripts\Build-Publish.ps1
run: .\scripts\Build-Publish.ps1 -ThrottleLimit 4 -AllowPartial

- name: Upload publish artifact
uses: actions/upload-artifact@v4
with:
name: publish
path: Windows_Protocols.zip
path: windows-protocols.zip

- name: Stage downloadable bundle in publish tree
shell: pwsh
working-directory: ${{ github.workspace }}
run: Copy-Item -LiteralPath .\windows-protocols.zip -Destination .\skills\windows-protocols\windows-protocols.zip -Force

- name: Push to orphaned publish branch
shell: pwsh
working-directory: publish
working-directory: skills/windows-protocols
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ artifacts/
downloads*/
converted*/
reports*/
publish/
publish/
windows-protocols.zip
78 changes: 78 additions & 0 deletions AwakeCoding.OpenSpecs/Private/Get-OpenSpecDocxFallbackUrls.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Returns fallback DOCX URLs for a protocol by fetching its RSS feed.
# Used when the primary Learn-page link fails (e.g. stale dates on MS-THCH, MS-MQOD).
# Azure Front Door works when blob storage returns 404; we try it first, then blob.
function Get-OpenSpecDocxFallbackUrls {
[CmdletBinding()]
param(
[Parameter(Mandatory)]
[string]$ProtocolId
)

$rssBase = 'https://winprotocoldocs-bhdugrdyduf5h2e4.b02.azurefd.net'
$encodedBrackets = [System.Uri]::EscapeDataString("[$ProtocolId]")
$rssUrl = "$rssBase/$ProtocolId/$encodedBrackets.rss"

try {
$response = Invoke-WebRequest -Uri $rssUrl -MaximumRedirection 8 -TimeoutSec 30 -ErrorAction Stop
}
catch {
Write-Verbose "RSS fetch failed for $ProtocolId : $($_.Exception.Message)"
return @()
}

$content = $response.Content
if ($content -is [byte[]]) {
$content = [System.Text.Encoding]::UTF8.GetString($content)
}
# Strip BOM if present (can break XML parsing)
if ($content.Length -gt 0 -and [int][char]$content[0] -eq 0xFEFF) {
$content = $content.Substring(1)
}
try {
[xml]$xml = $content
}
catch {
Write-Verbose "RSS parse failed for $ProtocolId : $($_.Exception.Message)"
return @()
}

$items = $xml.SelectNodes('//item')
if (-not $items -or $items.Count -eq 0) {
return @()
}

$fallbacks = [System.Collections.Generic.List[string]]::new()
foreach ($item in $items) {
$desc = $item.description
if (-not $desc) { continue }

# Match DOCX href (blob or azurefd), typically .../[MS-XYZ]-YYMMDD.docx
$docxMatch = [regex]::Match($desc, 'href="([^"]+\.docx)(?:\?[^"]*)?"', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
if (-not $docxMatch.Success) { continue }

$url = [System.Net.WebUtility]::HtmlDecode($docxMatch.Groups[1].Value)
if (-not $url -or $url -notmatch '\.docx') { continue }

# Extract date suffix (YYMMDD) for Azure FD URL construction
$dateMatch = [regex]::Match($url, '-(\d{6})\.docx')
$dateSuffix = if ($dateMatch.Success) { $dateMatch.Groups[1].Value } else { $null }

# Azure Front Door works when blob returns 404; add first
if ($dateSuffix) {
$azureFdUrl = "$rssBase/$ProtocolId/$encodedBrackets-$dateSuffix.docx"
if ($azureFdUrl -notin $fallbacks) {
[void]$fallbacks.Add($azureFdUrl)
}
}

# Include original blob URL as fallback
if ($url -match 'winprotocoldoc\.blob\.core\.windows\.net' -and $url -notin $fallbacks) {
[void]$fallbacks.Add($url)
}

# First item is latest version; we have enough fallbacks
if ($fallbacks.Count -gt 0) { break }
}

return @($fallbacks)
}
125 changes: 73 additions & 52 deletions AwakeCoding.OpenSpecs/Public/Get-OpenSpecCatalog.ps1
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
$script:OpenSpecReferenceDocsUri = 'https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-winprotlp/1593dc07-6116-4e9e-8aeb-85c7438fab0a'
$script:OpenSpecOverviewDocsUri = 'https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-winprotlp/4a1806f9-2979-491d-af3c-f82ed0a4c1ba'

# Reference specs (MS-DTYP, MS-ERREF, MS-LCID, MS-UCODEREF) from https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-winprotlp/1593dc07-6116-4e9e-8aeb-85c7438fab0a
$script:OpenSpecReferenceSpecs = @(
Expand All @@ -12,12 +13,10 @@ function Get-OpenSpecCatalog {
[CmdletBinding()]
param(
[string]$Uri = 'https://learn.microsoft.com/en-us/openspecs/windows_protocols/MS-WINPROTLP/e36c976a-6263-42a8-b119-7a3cc41ddd2a',
[switch]$IncludeReferenceSpecs
[switch]$IncludeReferenceSpecs,
[switch]$IncludeOverviewDocs
)

$response = Invoke-OpenSpecRequest -Uri $Uri
$html = $response.Content

$rowRegex = [regex]::new('(?is)<tr[^>]*>(?<row>.*?)</tr>')
$specLinkRegex = [regex]::new(
'(?is)<a\b[^>]*href\s*=\s*["''](?<href>\.\./(?<slug>(?:ms|mc)-[a-z0-9-]+)/(?<guid>[0-9a-f-]{36}))(?:["''][^>]*)?>(?<text>.*?)</a>'
Expand All @@ -28,66 +27,88 @@ function Get-OpenSpecCatalog {
$seen = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase)
$entries = New-Object System.Collections.Generic.List[object]

foreach ($rowMatch in $rowRegex.Matches($html)) {
$rowHtml = $rowMatch.Groups['row'].Value
$linkMatch = $specLinkRegex.Match($rowHtml)
if (-not $linkMatch.Success) {
continue
}
$addFromPage = {
param(
[string]$SourceUri,
[switch]$AllowFallback
)

$labelText = ConvertFrom-OpenSpecHtml -Html $linkMatch.Groups['text'].Value
$idMatch = $idRegex.Match($labelText)
if (-not $idMatch.Success) {
continue
}
$response = Invoke-OpenSpecRequest -Uri $SourceUri
$html = $response.Content
$addedCount = 0

$protocolId = $idMatch.Groups['id'].Value.ToUpperInvariant()
if (-not $seen.Add($protocolId)) {
continue
}
foreach ($rowMatch in $rowRegex.Matches($html)) {
$rowHtml = $rowMatch.Groups['row'].Value
$linkMatch = $specLinkRegex.Match($rowHtml)
if (-not $linkMatch.Success) {
continue
}

$slug = $linkMatch.Groups['slug'].Value.ToLowerInvariant()
$specPageUrl = Resolve-OpenSpecAbsoluteUrl -BaseUrl $Uri -RelativeOrAbsoluteUrl ([System.Net.WebUtility]::HtmlDecode($linkMatch.Groups['href'].Value))
$title = ($labelText -replace '^\s*\[(?:MS|MC)-[A-Z0-9-]+\]\s*:\s*', '').Trim()
if ([string]::IsNullOrWhiteSpace($title)) {
$title = $protocolId
}
$labelText = ConvertFrom-OpenSpecHtml -Html $linkMatch.Groups['text'].Value
$idMatch = $idRegex.Match($labelText)
if (-not $idMatch.Success) {
continue
}

$description = ''
$cells = [regex]::Matches($rowHtml, $cellRegex)
if ($cells.Count -ge 2) {
$description = (ConvertFrom-OpenSpecHtml -Html $cells[1].Groups['content'].Value).Trim()
}
$protocolId = $idMatch.Groups['id'].Value.ToUpperInvariant()
if (-not $seen.Add($protocolId)) {
continue
}

$entries.Add([pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.Entry'
ProtocolId = $protocolId
Title = $title
Description = $description
SpecPageUrl = $specPageUrl
Slug = $slug
SourcePage = $Uri
})
}
$slug = $linkMatch.Groups['slug'].Value.ToLowerInvariant()
$specPageUrl = Resolve-OpenSpecAbsoluteUrl -BaseUrl $SourceUri -RelativeOrAbsoluteUrl ([System.Net.WebUtility]::HtmlDecode($linkMatch.Groups['href'].Value))
$title = ($labelText -replace '^\s*\[(?:MS|MC)-[A-Z0-9-]+\]\s*:\s*', '').Trim()
if ([string]::IsNullOrWhiteSpace($title)) {
$title = $protocolId
}

if ($entries.Count -eq 0) {
$protocolPattern = '\[(?<id>(?:MS|MC)-[A-Z0-9-]+)\]'
$idMatches = [regex]::Matches($html, $protocolPattern, 'IgnoreCase')
$protocolIds = $idMatches |
ForEach-Object { $_.Groups['id'].Value.ToUpperInvariant() } |
Sort-Object -Unique
$description = ''
$cells = [regex]::Matches($rowHtml, $cellRegex)
if ($cells.Count -ge 2) {
$description = (ConvertFrom-OpenSpecHtml -Html $cells[1].Groups['content'].Value).Trim()
}

foreach ($protocolId in $protocolIds) {
$entries.Add([pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.Entry'
ProtocolId = $protocolId
Title = $protocolId
Description = ''
SpecPageUrl = "https://learn.microsoft.com/en-us/openspecs/windows_protocols/$($protocolId.ToLowerInvariant())"
Slug = $protocolId.ToLowerInvariant()
SourcePage = $Uri
Title = $title
Description = $description
SpecPageUrl = $specPageUrl
Slug = $slug
SourcePage = $SourceUri
})
$addedCount++
}

if ($AllowFallback -and $addedCount -eq 0) {
$protocolPattern = '\[(?<id>(?:MS|MC)-[A-Z0-9-]+)\]'
$idMatches = [regex]::Matches($html, $protocolPattern, 'IgnoreCase')
$protocolIds = $idMatches |
ForEach-Object { $_.Groups['id'].Value.ToUpperInvariant() } |
Sort-Object -Unique

foreach ($protocolId in $protocolIds) {
if (-not $seen.Add($protocolId)) {
continue
}

$entries.Add([pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.Entry'
ProtocolId = $protocolId
Title = $protocolId
Description = ''
SpecPageUrl = "https://learn.microsoft.com/en-us/openspecs/windows_protocols/$($protocolId.ToLowerInvariant())"
Slug = $protocolId.ToLowerInvariant()
SourcePage = $SourceUri
})
}
}
}

& $addFromPage -SourceUri $Uri -AllowFallback

if ($IncludeOverviewDocs) {
& $addFromPage -SourceUri $script:OpenSpecOverviewDocsUri
}

if ($IncludeReferenceSpecs) {
Expand Down
43 changes: 40 additions & 3 deletions AwakeCoding.OpenSpecs/Public/Save-OpenSpecDocument.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ function Save-OpenSpecDocument {
}

$toDownload = [System.Collections.Generic.List[object]]::new()
$existsResults = [System.Collections.Generic.List[object]]::new()
foreach ($link in $links) {
$fileName = $link.FileName
if ([string]::IsNullOrWhiteSpace($fileName)) {
Expand All @@ -93,15 +94,15 @@ function Save-OpenSpecDocument {
$destination = Join-Path -Path $OutputPath -ChildPath $fileName

if ((Test-Path -LiteralPath $destination) -and -not $Force) {
[pscustomobject]@{
[void]$existsResults.Add([pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.DownloadResult'
ProtocolId = $link.ProtocolId
Format = $link.Format
Url = $link.Url
Path = $destination
Status = 'Exists'
Size = (Get-Item -LiteralPath $destination).Length
}
})
continue
}

Expand Down Expand Up @@ -157,8 +158,31 @@ function Save-OpenSpecDocument {
}
}

$tryDocxFallback = {
param($result, $destination)
if ($result.Status -ne 'Failed' -or $result.Format -ne 'DOCX' -or -not $result.ProtocolId) { return $result }
$fallbacks = Get-OpenSpecDocxFallbackUrls -ProtocolId $result.ProtocolId
foreach ($url in $fallbacks) {
if ($url -eq $result.Url) { continue }
try {
Invoke-WebRequest -Uri $url -OutFile $destination -MaximumRedirection 8 -ErrorAction Stop
return [pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.DownloadResult'
ProtocolId = $result.ProtocolId
Format = $result.Format
Url = $url
Path = $destination
Status = 'Downloaded'
Size = (Get-Item -LiteralPath $destination).Length
}
}
catch { continue }
}
return $result
}

$useParallel = $Parallel -and $PSVersionTable.PSVersion.Major -ge 7 -and $toDownload.Count -gt 1
if ($useParallel) {
$results = if ($useParallel) {
$toDownload | ForEach-Object -Parallel {
$link = $_.Link
$destination = $_.Destination
Expand Down Expand Up @@ -212,5 +236,18 @@ function Save-OpenSpecDocument {
& $downloadOne -link $item.Link -destination $item.Destination
}
}

# Retry failed DOCX via RSS fallback URLs (e.g. MS-THCH, MS-MQOD with stale Learn-page links)
$downloadResults = New-Object System.Collections.Generic.List[object]
$i = 0
foreach ($r in @($results)) {
$dest = $toDownload[$i].Destination
$r = & $tryDocxFallback -result $r -destination $dest
[void]$downloadResults.Add($r)
$i++
}

$existsResults
$downloadResults
}
}
Loading
Loading