Skip to content

Commit 418d00a

Browse files
trackdJustinGrote
andauthored
Update HtmlAgilityPack, Pester Tests, refactor (#11)
* update HtmlAgilityPack to 1.11.60 + refactor + update tests to support pester v5 * refactor + update tests to support pester v5 * pester fixes + minor changes * update * cleanup * set instead of add * Remove Export-ModuleMember, unnecessary since it is done in manifest * Fix buildenvironment for Pester 5 * Remove Versioning from Build * Pester v4 -> v5 * Disable decrepit versioning stuff --------- Co-authored-by: trackd <[email protected]> Co-authored-by: Justin Grote <[email protected]>
1 parent 0153f95 commit 418d00a

12 files changed

Lines changed: 218 additions & 263 deletions

PSModule.build.ps1

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -105,33 +105,33 @@ Enter-Build {
105105
Get-Variable | select-object name, value, visibility | format-table -autosize | out-string | write-verbose
106106
}
107107

108-
#Register Nuget
109-
if (!(get-packageprovider "Nuget" -ForceBootstrap -ErrorAction silentlycontinue)) {
110-
write-verbose "Nuget Provider Not found. Fetching..."
111-
Install-PackageProvider Nuget -forcebootstrap -scope currentuser @PassThruParams | out-string | write-verbose
112-
write-verboseheader "Installed Nuget Provider Info"
113-
Get-PackageProvider Nuget @PassThruParams | format-list | out-string | write-verbose
114-
}
115-
116-
#Fix a bug with the Appveyor 2017 image having a broken nuget (points to v3 URL but installed packagemanagement doesn't query v3 correctly)
117-
#Next command will add this back
118-
if ($ENV:APPVEYOR -and ($ENV:APPVEYOR_BUILD_WORKER_IMAGE -eq 'Visual Studio 2017')) {
119-
write-verbose "Detected Appveyor VS2017 Image, using v2 Nuget API"
120-
UnRegister-PackageSource -Name nuget.org
121-
}
122-
123-
#Add the nuget repository so we can download things like GitVersion
124-
if (!(Get-PackageSource "nuget.org" -erroraction silentlycontinue)) {
125-
write-verbose "Registering nuget.org as package source"
126-
Register-PackageSource -provider NuGet -name nuget.org -location http://www.nuget.org/api/v2 -Trusted @PassThruParams | out-string | write-verbose
127-
}
128-
else {
129-
$nugetOrgPackageSource = Set-PackageSource -name 'nuget.org' -Trusted @PassThruParams
130-
if ($PassThruParams.Verbose) {
131-
write-verboseheader "Nuget.Org Package Source Info "
132-
$nugetOrgPackageSource | format-table | out-string | write-verbose
133-
}
134-
}
108+
# #Register Nuget
109+
# if (!(get-packageprovider "Nuget" -ForceBootstrap -ErrorAction silentlycontinue)) {
110+
# write-verbose "Nuget Provider Not found. Fetching..."
111+
# Install-PackageProvider Nuget -forcebootstrap -scope currentuser @PassThruParams | out-string | write-verbose
112+
# write-verboseheader "Installed Nuget Provider Info"
113+
# Get-PackageProvider Nuget @PassThruParams | format-list | out-string | write-verbose
114+
# }
115+
116+
# #Fix a bug with the Appveyor 2017 image having a broken nuget (points to v3 URL but installed packagemanagement doesn't query v3 correctly)
117+
# #Next command will add this back
118+
# if ($ENV:APPVEYOR -and ($ENV:APPVEYOR_BUILD_WORKER_IMAGE -eq 'Visual Studio 2017')) {
119+
# write-verbose "Detected Appveyor VS2017 Image, using v2 Nuget API"
120+
# UnRegister-PackageSource -Name nuget.org
121+
# }
122+
123+
# #Add the nuget repository so we can download things like GitVersion
124+
# if (!(Get-PackageSource "nuget.org" -erroraction silentlycontinue)) {
125+
# write-verbose "Registering nuget.org as package source"
126+
# Register-PackageSource -provider NuGet -name nuget.org -location http://www.nuget.org/api/v2 -Trusted @PassThruParams | out-string | write-verbose
127+
# }
128+
# else {
129+
# $nugetOrgPackageSource = Set-PackageSource -name 'nuget.org' -Trusted @PassThruParams
130+
# if ($PassThruParams.Verbose) {
131+
# write-verboseheader "Nuget.Org Package Source Info "
132+
# $nugetOrgPackageSource | format-table | out-string | write-verbose
133+
# }
134+
# }
135135

136136
#Move to the Project Directory if we aren't there already
137137
Set-Location $buildRoot
@@ -476,7 +476,7 @@ task PublishPSGallery -if (-not $SkipPublish) {
476476
### SuperTasks
477477
# These are the only supported items to run directly from Invoke-Build
478478
task Deploy PreDeploymentChecks,Package,PublishGitHubRelease,PublishPSGallery
479-
task Build Clean,CopyFilesToBuildDir,UpdateMetadata
479+
task Build Clean, CopyFilesToBuildDir
480480
task Test Pester
481481

482482
#Default Task - Build, Test with Pester, Deploy

PowerHTML.psd1

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
RootModule = 'PowerHTML.psm1'
1313

1414
# Version number of this module.
15-
ModuleVersion = '0.0.1'
15+
ModuleVersion = '0.1.8'
1616

1717
# Supported PSEditions
1818
# CompatiblePSEditions = @()
@@ -63,7 +63,7 @@ Description = 'Provides a wrapper for HTML Agility Pack for use where the IE HTM
6363
# TypesToProcess = @()
6464

6565
# Format files (.ps1xml) to be loaded when importing this module
66-
FormatsToProcess = @('.\Types\*.ps1xml')
66+
FormatsToProcess = @('.\Types\HtmlAgilityPack.HtmlTextNode.ps1xml')
6767

6868
# Modules to import as nested modules of the module specified in RootModule/ModuleToProcess
6969
# NestedModules = @()
@@ -120,5 +120,3 @@ PrivateData = @{
120120
# DefaultCommandPrefix = ''
121121

122122
}
123-
124-

PowerHTML.psm1

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#Get public and private function definition files.
2-
$PublicFunctions = @( Get-ChildItem -Path $PSScriptRoot\Public\*.ps1 -ErrorAction SilentlyContinue )
3-
$PrivateFunctions = @( Get-ChildItem -Path $PSScriptRoot\Private\*.ps1 -ErrorAction SilentlyContinue )
2+
$PublicFunctions = @( Get-ChildItem -Path $PSScriptRoot\Public\*.ps1 -ErrorAction Ignore )
3+
$PrivateFunctions = @( Get-ChildItem -Path $PSScriptRoot\Private\*.ps1 -ErrorAction Ignore )
44

55
#Get JSON settings files
6-
$ModuleSettings = @( Get-ChildItem -Path $PSScriptRoot\Settings\*.json -ErrorAction SilentlyContinue )
6+
$ModuleSettings = @( Get-ChildItem -Path $PSScriptRoot\Settings\*.json -ErrorAction Ignore )
77

88
#Determine which assembly versions to load
99
#See if .Net Standard 2.0 is available on the system and if not, load the legacy Net 4.0 library
@@ -40,23 +40,15 @@ if ($AssembliesToLoad) {
4040
}
4141

4242
#Dot source the files
43-
Foreach($FunctionToImport in @($PublicFunctions + $PrivateFunctions))
44-
{
45-
Try
46-
{
43+
foreach ($FunctionToImport in @($PublicFunctions + $PrivateFunctions)) {
44+
try {
4745
. $FunctionToImport.fullname
48-
}
49-
Catch
50-
{
46+
} catch {
5147
Write-Error -Message "Failed to import function $($import.fullname): $_"
5248
}
5349
}
5450

5551
#Import Settings files as global objects based on their filename
56-
foreach ($ModuleSettingsItem in $ModuleSettings)
57-
{
52+
foreach ($ModuleSettingsItem in $ModuleSettings) {
5853
New-Variable -Name "$($ModuleSettingsItem.basename)" -Scope Global -Value (convertfrom-json (Get-Content -raw $ModuleSettingsItem.fullname)) -Force
59-
}
60-
61-
#Export the public functions. This requires them to match the standard Noun-Verb powershell cmdlet format as a safety mechanism
62-
Export-ModuleMember -Function ($PublicFunctions.Basename | where {$PSitem -match '^\w+-\w+$'})
54+
}

Public/ConvertFrom-HTML.ps1

Lines changed: 90 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1,122 +1,109 @@
1-
<#
2-
.SYNOPSIS
3-
Takes an HTML input and converts it to an HTMLAgilityPack htmlNode object that can be navigated using Linq
4-
.DESCRIPTION
5-
Long description
6-
.EXAMPLE
7-
PS C:\> $HTMLString = @"
8-
<!DOCTYPE html>
9-
<html>
10-
<body>
11-
<h1>My First Heading</h1>
12-
<p>My first paragraph.</p>d
13-
</body>
14-
</html>
15-
"@
16-
PS C:\> $HTMLString | ConvertFrom-HTML -OutVariable result
17-
18-
NodeType Name AttributeCount ChildNodeCount ContentLength InnerText
19-
-------- ---- -------------- -------------- ------------- ---------
20-
Document #document 0 4 103 …
21-
22-
PS C:\> $result.SelectSingleNode("//body/h1")
23-
24-
NodeType Name AttributeCount ChildNodeCount ContentLength InnerText
25-
-------- ---- -------------- -------------- ------------- ---------
26-
Element h1 0 1 16 My First Heading
27-
28-
Convert HTML string to a HtmlNode via the pipeline.
29-
30-
.EXAMPLE
31-
PS C:\> $uri = "https://www.powershellgallery.com/"
32-
PS C:\> $result = ConvertFrom-HTML -uri $uri
33-
PS C:\> $result
34-
35-
NodeType Name AttributeCount ChildNodeCount ContentLength InnerText
36-
-------- ---- -------------- -------------- ------------- ---------
37-
Document #document 0 4 17550 …
38-
39-
Fetch and parse $uri directly via the URI pipeline.
40-
.EXAMPLE
41-
PS C:\> Get-Item $testFilePath | ConvertFrom-Html
42-
43-
NodeType Name AttributeCount ChildNodeCount ContentLength InnerText
44-
-------- ---- -------------- -------------- ------------- ---------
45-
Document #document 0 5 105 …
46-
47-
Parse an HTML file piped from Get-Item.
48-
.INPUTS
49-
[String[]]
50-
[System.IO.FileInfo[]]
51-
.OUTPUTS
52-
[HtmlAgilityPack.HtmlDocument]
53-
[HtmlAgilityPack.HtmlNode]
54-
.NOTES
55-
General notes
56-
#>
1+
572
function ConvertFrom-Html {
58-
[CmdletBinding(DefaultParameterSetName="String")]
59-
param (
3+
<#
4+
.SYNOPSIS
5+
Takes an HTML input and converts it to an HTMLAgilityPack htmlNode object that can be navigated using Linq
6+
.DESCRIPTION
7+
Long description
8+
.EXAMPLE
9+
$HTMLString = @'
10+
<!DOCTYPE html>
11+
<html>
12+
<body>
13+
<h1>My First Heading</h1>
14+
<p>My first paragraph.</p>d
15+
</body>
16+
</html>
17+
'@ | ConvertFrom-HTML
18+
19+
$HTMLString
20+
21+
NodeType Name AttributeCount ChildNodeCount ContentLength InnerText
22+
-------- ---- -------------- -------------- ------------- ---------
23+
Document #document 0 4 103 …
24+
25+
$HTMLString.SelectSingleNode('//body/h1')
26+
27+
NodeType Name AttributeCount ChildNodeCount ContentLength InnerText
28+
-------- ---- -------------- -------------- ------------- ---------
29+
Element h1 0 1 16 My First Heading
30+
31+
Convert HTML string to a HtmlNode via the pipeline.
32+
33+
.EXAMPLE
34+
$uri = [Uri]'https://www.powershellgallery.com/' | ConvertFrom-HTML
35+
$uri
36+
37+
NodeType Name AttributeCount ChildNodeCount ContentLength InnerText
38+
-------- ---- -------------- -------------- ------------- ---------
39+
Document #document 0 4 17550 …
40+
41+
Fetch and parse a url.
42+
.EXAMPLE
43+
Get-Item $testFilePath | ConvertFrom-Html
44+
45+
NodeType Name AttributeCount ChildNodeCount ContentLength InnerText
46+
-------- ---- -------------- -------------- ------------- ---------
47+
Document #document 0 5 105 …
48+
49+
Parse an HTML file piped from Get-Item.
50+
.INPUTS
51+
[String[]]
52+
[System.IO.FileInfo[]]
53+
[System.URI[]]
54+
.OUTPUTS
55+
[HtmlAgilityPack.HtmlDocument]
56+
[HtmlAgilityPack.HtmlNode]
57+
.NOTES
58+
General notes
59+
#>
60+
[OutputType([HtmlAgilityPack.HtmlNode])]
61+
[OutputType([HtmlAgilityPack.HtmlDocument])]
62+
[CmdletBinding(DefaultParameterSetName = 'String')]
63+
param(
6064
#The HTML text to parse. Accepts multiple separate documents as an array. This also accepts pipeline from Invoke-WebRequest
61-
[Parameter(ParameterSetName="String",Mandatory,ValueFromPipeline,ValueFromPipelineByPropertyName,Position=0)]
62-
[String[]]$Content,
65+
[Parameter(ParameterSetName = 'String', Mandatory, ValueFromPipeline, ValueFromPipelineByPropertyName, Position = 0)]
66+
[String[]] $Content,
6367

6468
#The URI or URIs from which to retrieve content. This may be faster than using Invoke-WebRequest but is less flexible in the method of retrieval (for instance, no POST)
65-
[Parameter(ParameterSetName="URI",Mandatory,ValueFromPipeline,ValueFromPipelineByPropertyName)]
66-
[System.URI[]]$URI,
69+
[Parameter(ParameterSetName = 'URI', Mandatory, ValueFromPipeline, ValueFromPipelineByPropertyName, Position = 0)]
70+
[System.URI[]] $URI,
6771

6872
#Path to file or files containing HTML content to convert. This accepts pipeline from Get-Childitem or Get-Item
69-
[Parameter(ParameterSetName="Path",Mandatory,ValueFromPipeline,ValueFromPipelineByPropertyName)]
70-
[System.IO.FileInfo[]]$Path,
73+
[Parameter(ParameterSetName = 'Path', Mandatory, ValueFromPipeline, ValueFromPipelineByPropertyName, Position = 0)]
74+
[System.IO.FileInfo[]] $Path,
7175

7276
#Do not return the Linq documentnode, instead return the HTMLDocument object. This is useful if you want to do XPath queries instead of Linq queries
73-
[switch]$Raw
74-
77+
[switch] $Raw
7578
)
76-
7779
begin {
80+
$html = [HtmlAgilityPack.HtmlDocument]::new()
81+
$web = [HtmlAgilityPack.HtmlWeb]::new()
7882
}
79-
8083
process {
81-
#Find the type of input and bind it to inputObject
82-
$inputObject = $null
83-
foreach ($contentType in "Content","URI","Path") {
84-
if ((Get-Variable -erroraction SilentlyContinue $contentType).value) {
85-
$inputObject = (Get-Variable $contentType).value
86-
break
87-
}
88-
}
89-
if (-not $inputObject) {write-error "Input Object Type Not Identified. If you see this then ConvertFrom-HTML needs better input validation"}
90-
91-
#Unwrap any arrays. This allows us to accept both pipeline and parameter input
92-
$inputObject | ForEach-Object {
93-
$inputItem = $PSItem
94-
$htmlDoc = new-object HtmlAgilityPack.HtmlDocument
95-
96-
#Process all object types into a common HTML document format
97-
switch ($inputItem.GetType().FullName) {
98-
"System.String" {
99-
$htmlDoc.LoadHtml($inputItem)
84+
switch ($PSCmdlet.ParameterSetName) {
85+
'String' {
86+
$Content | ForEach-Object {
87+
Write-Verbose "Loading HTML"
88+
$html.LoadHtml($_)
89+
if ($Raw) { $html } else { $html.DocumentNode }
10090
}
101-
"System.Uri" {
102-
$htmlDoc = (new-object HtmlAgilityPack.HtmlWeb).Load($inputItem)
103-
}
104-
"System.IO.FileInfo" {
105-
$htmlDoc.Load($inputItem)
106-
}
107-
Default {
108-
write-error "Object Type not supported or implemented. If you see this error then ConvertFrom-HTML has improper input validation"
109-
continue
91+
}
92+
'URI' {
93+
$URI | ForEach-Object {
94+
Write-Verbose "Loading URI $_"
95+
$site = $web.Load($_)
96+
if ($Raw) { $site } else { $site.DocumentNode }
11097
}
11198
}
112-
if ($inputItem) {
113-
if ($Raw) {
114-
$htmlDoc
115-
} else {
116-
$htmlDoc.DocumentNode
99+
'Path' {
100+
$Path | ForEach-Object {
101+
Write-Verbose "Loading File $_"
102+
$html.Load($_.FullName)
103+
if ($Raw) { $html } else { $html.DocumentNode }
117104
}
118105
}
119106
}
120-
121107
}
122-
}
108+
109+
}

0 commit comments

Comments
 (0)