commands.ps1
function ConvertFrom-MdBlock { <# .SYNOPSIS Converts special blocks defined in markdown into html. .DESCRIPTION Converts special blocks defined in markdown into html. The resultant html is appended to the stringbuilder specified. The conversion logic is provided by Register-EBMarkdownBlock. Returns whether the next line should be a first paragraph or a regular paragraph. .PARAMETER Type What kind of block is this? .PARAMETER Lines The lines of text contained in the block. .PARAMETER Attributes Any attributes provided to the block. .PARAMETER StringBuilder The stringbuilder containing the overall html string being built. .EXAMPLE PS C:\> ConvertFrom-MdBlock -Type $type -Lines $lines -Attributes @{ } -StringBuilder $builder Converts the provided block data to html and appends it to the stringbuilder. Returns whether the next line should be a first paragraph or a regular paragraph. #> [CmdletBinding()] param ( [parameter(Mandatory = $true)] [string] $Type, [parameter(Mandatory = $true)] [AllowEmptyCollection()] [AllowEmptyString()] [string[]] $Lines, [parameter(Mandatory = $true)] [System.Collections.Hashtable] $Attributes, [parameter(Mandatory = $true)] [System.Text.StringBuilder] $StringBuilder ) process { $converter = $script:mdBlockTypes[$Type] if (-not $converter) { Stop-PSFFunction -Message "Converter for block $Type not found! Make sure it is properly registered using Register-EBMarkdownBlock" -EnableException $true -Cmdlet $PSCmdlet -Category InvalidArgument } $data = [pscustomobject]($PSBoundParameters | ConvertTo-PSFHashtable) $converter.Invoke($data) -as [bool] } } function ConvertTo-MarkdownLine { <# .SYNOPSIS Converts an input html paragraph to a markdown line of text. .DESCRIPTION Converts an input html paragraph to a markdown line of text. .PARAMETER Line The line of text to convert. .EXAMPLE PS C:\> ConvertTo-MarkdownLine -Line $Line Converts the HTML $Line to markdown #> [CmdletBinding()] param ( [Parameter(ValueFromPipeline = $true, Mandatory = $true)] [AllowEmptyString()] [string[]] $Line ) begin { $mapping = @{ '</{0,1}em>' = '_' '</{0,1}i>' = '_' '</{0,1}strong>' = '**' '</{0,1}b>' = '**' '<br>' = '<br />' '<span style="font-weight: 400">(.+?)</span>' = '$1' } } process { foreach ($string in $Line) { foreach ($pair in $mapping.GetEnumerator()) { $string = $string -replace $pair.Key, $pair.Value } ($string -replace '</{0,1}p.{0,}?>').Trim() } } } function Read-RRChapter { <# .SYNOPSIS Reads a Royal Road chapter and breaks it down into its components. .DESCRIPTION Reads a Royal Road chapter and breaks it down into its components. Part of the parsing process to convert Royal Road books into eBooks. .PARAMETER Url Url to the specific RR page to process. .PARAMETER Index The chapter index to include in the return object .PARAMETER NoHeader The book does not include a header in the text portion. Will take the chapter-name as header instead. .PARAMETER Replacements A hashtable with replacements. At the root level, either use the "Global" index for replacements that apply to all chapters or the number of the chapter it applies to. Each value of those key/value pairs contains yet another hashtable, using a label as key (they label is ignored, use this for human documentation in the file) and yet another hashtable as value. That hashtable may contain three keys: - Pattern (mandatory) - Text (mandatory) - Weight (optional) The Pattern is a piece of text used to find matching text within the current chapter. Uses Regex. The Text is what we replace matched content with. The Weight - if specified - is the processing order in case of multiple replacements - the lower the number, the earlier is it processed. .EXAMPLE PS C:\> Read-RRChapter -Url https://www.royalroad.com/fiction/12345/evil-incarnate/chapter/666666/1-end-of-all-days Reads and converts the first chapter of evil incarnate (hint: does not exist) #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [string] $Url, [int] $Index, [switch] $NoHeader, [hashtable] $Replacements ) begin { #region functions function Get-NextLink { [OutputType([string])] [CmdletBinding()] param ( [parameter(ValueFromPipeline = $true)] [string] $Line ) process { if ($Line -notlike '*<a class="btn btn-primary*>Next <br class="visible-xs" />Chapter</a>*') { return } $Line -replace '^.+href="(.+?)".+$', 'https://www.royalroad.com$1' } } function Get-Title { [OutputType([string])] [CmdletBinding()] param ( [parameter(ValueFromPipeline = $true)] [string] $Line ) process { if ($Line -notmatch '<h1 .+?>(.+?)</h1>') { return } $matches[1] } } function ConvertTo-Markdown { [OutputType([string])] [CmdletBinding()] param ( [Parameter(ValueFromPipeline = $true)] [string] $Line, [switch] $NoHeader, [string] $Title ) begin { $firstLineCompleted = $false $badQuotes = @( [char]8220 [char]8221 [char]8222 [char]8223 ) $badQuotesPattern = $badQuotes -join "|" $badSingleQuotes = @( [char]8216 [char]8217 [char]8218 [char]8219 ) $badSingleQuotesPattern = $badSingleQuotes -join "|" if ($NoHeader) { '# {0}' -f $Title '' } } process { $lineNormalized = ($Line -replace $badQuotesPattern, '"' -replace $badSingleQuotesPattern, "'").Trim() if (-not $firstLineCompleted -and -not $NoHeader) { '# {0}' -f ($lineNormalized -replace '</{0,1}p.{0,}?>' -replace '</{0,1}b>' -replace '</{0,1}strong>' -replace '<br>', '<br />') '' $firstLineCompleted = $true return } if ($lineNormalized -eq '<p style="text-align: center">* * *</p>') { @' ## <divide> * * * ## </divide> '@ return } $lineNormalized | ConvertTo-MarkdownLine '' } } function ConvertTo-MarkdownFinal { [OutputType([string])] [CmdletBinding()] param ( [Parameter(ValueFromPipeline = $true)] [string] $Text, [Hashtable] $Replacements, [int] $ChapterIndex ) begin { $mapping = @($Replacements.Global.Values) + @($Replacements[$ChapterIndex].Values) | Sort-Object Weight } process { foreach ($item in $mapping) { $Text = $Text -replace $item.Pattern, $item.Text } $Text } } #endregion functions } process { $found = $false try { $allLines = (Invoke-WebRequest -Uri $Url -UseBasicParsing -ErrorAction Stop).Content -split "`n" } catch { if ($_.ErrorDetails.Message -ne 'Slow down!') { throw } Start-Sleep -Seconds 1 $allLines = (Invoke-WebRequest -Uri $Url -UseBasicParsing -ErrorAction Stop).Content -split "`n" } $lines = $allLines | Where-Object { if ($_ -like '*<div class="chapter-inner chapter-content">*') { $found = $true } if ($_ -like '*<h6 class="bold uppercase text-center">Advertisement</h6>*') { $found = $false } # Remove all pictures, they don't close the tags correctly if ( $_ -like '*<img*' -or $_ -like '*<input*' ) { return } $found } $title = $allLines | Get-Title [pscustomobject]@{ Index = $Index Title = $title RawText = $allLines -join "`n" Text = $lines -join "`n" -replace '<br>', '<br />' -replace '<div class="chapter-inner chapter-content">', '<div>' TextMD = $lines[1 .. ($lines.Length - 2)] | ConvertTo-Markdown -NoHeader:$NoHeader -Title $Title | Join-String "`n" | ConvertTo-MarkdownFinal -Replacements $Replacements -ChapterIndex $Index NextLink = $allLines | Get-NextLink } } } function ConvertFrom-EBMarkdown { <# .SYNOPSIS A limited convertion from markdown to html. .DESCRIPTION A limited convertion from markdown to html. This command will process multiple lines into useful html. It is however limited in scope: + Paragraphs + Italic/emphasized text + Bold text + Bullet Points Other elements, such as comments (">") or headers ("#") are being ignored. This is due to this command being scoped not to converting whole pages, but instead for fairly small passages of markdown. Especially as a tool used within Blocks. .PARAMETER Line The lines of markdown string to convert. .PARAMETER EmphasisClass Which class to use for emphasized pieces of text. This is particularly intended for emphasis in text that is in italics by default. By default, emphasized text is wrapped into "<i>" and "</i>". However, when offerign a class instead a span tag is used: '<span class="EmphasisClass">' and '</span>'. .PARAMETER ClassFirstParagraph Which class to use for the first paragraph found. This affects the very first paragraph as well as any first paragraph after bulletpoints. Defaults to the same class as used for the ClassParagraph parameter. .PARAMETER ClassParagraph Which class to use for all paragraph but the first one. Defaults to: No class at all. .PARAMETER Classes A hashtable for mapping html tags to class names. Ignored for paragraphs, italic and bold, but can be used for example to add a class to "<li>" items. .PARAMETER AlwaysBreak By default, common markdown practice is to build a paragraph from multiple lines of text. Only on an empty line would a new paragraph be created. This can be disabled with this switch, causing every end of line to be treated as the end of a paragraph. .EXAMPLE PS C:\> ConvertFrom-EBMarkdown -Line $Data.Lines Converts all the lines of text in $Data.Lines without assigning special classes to any text. .EXAMPLE PS C:\> ConvertFrom-EBMarkdown -Line $Data.Lines -ClassParagraph blockOther -ClassFirstParagraph blockFirst -EmphasisClass blockEmphasis Converts all the lines of text in $Data.Lines, assigning the specified classes as applicable. #> [OutputType([string])] [CmdletBinding()] param ( [Parameter(ValueFromPipeline = $true, Mandatory = $true)] [AllowEmptyString()] [string[]] $Line, [string] $EmphasisClass, [string] $ClassFirstParagraph, [string] $ClassParagraph, [hashtable] $Classes = @{ }, [switch] $AlwaysBreak ) begin { #region Utility Functions function Get-ClassString { [OutputType([string])] [CmdletBinding()] param ( [string] $Name, [hashtable] $Classes ) if (-not $Classes.$Name) { return '' } ' class="{0}"' -f $Classes.$Name } function Write-Paragraph { [OutputType([string])] [CmdletBinding()] param ( [string[]] $Text, [string] $FirstParagraph = $ClassFirstParagraph, [string] $Paragraph = $ClassParagraph, [bool] $First = $isFirstParagraph ) Set-Variable -Name isFirstParagraph -Scope 1 -Value $false Set-Variable -Name currentParagraph -Scope 1 -Value @() $class = $Paragraph if ($First -and $FirstParagraph) { $class = $FirstParagraph } $classString = Get-ClassString -Name p -Classes @{ p = $class } "<p$($classString)>$($Text -join " ")</p>" } #endregion Utility Functions $currentParagraph = @() $inBullet = $false $isFirstParagraph = $true $convertParam = $PSBoundParameters | ConvertTo-PSFHashtable -Include EmphasisClass } process { foreach ($string in $Line) { #region Empty Line if (-not $string) { if ($currentParagraph) { Write-Paragraph -Text $currentParagraph } if ($AlwaysBreak -and -not $inBullet) { Write-Paragraph -Text ' ' } if ($inBullet) { '</ul>' $inBullet = $false } continue } #endregion Empty Line #region Bullet Lists if ($string -match '^- |^\+ ') { $isFirstParagraph = $true if (-not $inBullet) { if ($currentParagraph) { Write-Paragraph -Text $currentParagraph } "<ul$(Get-ClassString -Name ul -Classes $Classes)>" $inBullet = $true } "<li$(Get-ClassString -Name li -Classes $Classes)>$($string | Set-String -OldValue '^- |^\+ ' | ConvertFrom-EBMarkdownLine @convertParam)</li>" continue } #endregion Bullet Lists #region Default: paragraph $currentParagraph += $string | ConvertFrom-EBMarkdownLine @convertParam if ($AlwaysBreak) { Write-Paragraph -Text $currentParagraph } #endregion Default: paragraph } } end { if ($inBullet) { '</ul>' } if ($currentParagraph) { Write-Paragraph -Text $currentParagraph } } } function ConvertFrom-EBMarkdownLine { <# .SYNOPSIS Converts markdown notation of bold and cursive to html. .DESCRIPTION Converts markdown notation of bold and cursive to html. .PARAMETER Line The line of text to convert. .PARAMETER EmphasisClass The tag to wrap text in that was marked in markdown with "_" symbols By default it encloses with italic tags ("<i>Test</i>"), specifying a class will change it to a span instead. .EXAMPLE PS C:\> ConvertFrom-EBMarkdownLine -Line '_value1_' Will convert "_value1_" to "<i>value1</i>" #> [CmdletBinding()] param ( [Parameter(ValueFromPipeline = $true, Mandatory = $true)] [string[]] $Line, [string] $EmphasisClass ) begin { $emphasis = '<i>$1</i>' if ($EmphasisClass) { $emphasis = '<span class="{0}">$1</span>' -f $EmphasisClass } } process { foreach ($string in $Line) { $string -replace '\*\*(.+?)\*\*', '<b>$1</b>' -replace '_(.+?)_', $emphasis } } } function Export-EBBook { <# .SYNOPSIS Exports pages and images into a epub ebook. .DESCRIPTION Exports pages and images into a epub ebook. .PARAMETER Path The path to export to. Will ignore the name if an explicit filename was specified. .PARAMETER Name The name of the ebook. Will also be used for the filename if a path to a folder was specified. Defaults to: New Book .PARAMETER FileName Explicitly specify the name of the exported file. The "Name" parameter will be used to calculate it if not specified. .PARAMETER Author The author to set for the ebook. .PARAMETER Publisher The publisher of the ebook. .PARAMETER CssData Custom CSS to use to style the ebook. Allows you to tune how the ebook is styled. .PARAMETER Page The pages to compile into an ebook. .PARAMETER Series The name of the series this book is part of. Added as metadata to the build ebook. .PARAMETER Volume The volume number of the series this book is part of. Only effecive if used together with the Series parameter. .PARAMETER Tags Any tags to add to the book's metadata. .PARAMETER Description A description to include in the book's metadata. .EXAMPLE PS C:\> Read-EBMicrosoftDocsIndexPage -Url https://docs.microsoft.com/en-us/windows-server/identity/ad-ds/plan/security-best-practices/best-practices-for-securing-active-directory | Export-EBBook -Path . -Name ads-best-practices.epub -Author "Friedrich Weinmann" -Publisher "Infernal Press" Compiles an ebook out of the Active Directory Best Practices. #> [CmdletBinding()] param ( [PsfValidateScript({ Resolve-PSFPath -Path $args[0] -Provider FileSystem -SingleItem -NewChild }, ErrorMessage = "Folder to place the file in must exist!")] [string] $Path = ".", [string] $Name = "New Book", [string] $FileName, [string] $Author = $env:USERNAME, [string] $Publisher = $env:USERNAME, [string] $CssData, [Parameter(Mandatory = $true, ValueFromPipeline = $true)] [EbookBuilder.Item[]] $Page, [string] $Series, [int] $Volume, [string[]] $Tags, [string] $Description ) begin { #region Functions function Write-File { [CmdletBinding()] param ( [System.IO.DirectoryInfo] $Root, [string] $Path, [string] $Text ) $tempPath = Resolve-PSFPath -Path (Join-Path $Root.FullName $Path) -NewChild Write-PSFMessage -Level SomewhatVerbose -Message "Writing file: $($Path)" $utf8NoBom = New-Object System.Text.UTF8Encoding($false) [System.IO.File]::WriteAllText($tempPath, $Text, $utf8NoBom) } function ConvertTo-ManifestPageData { [CmdletBinding()] param ( $Pages ) $lines = $Pages | ForEach-Object { ' <item id="{0}" href="Text/{0}" media-type="application/xhtml+xml"/>' -f $_.EbookFileName } $lines -join "`n" } function ConvertTo-ManifestImageData { [CmdletBinding()] param ( $Images ) $lines = $images | ForEach-Object { ' <item id="{0}" href="Images/{1}" media-type="image/{2}"/>' -f ($_.ImageID -replace "\s","_"), $_.FileName, "Jpeg" } $lines -join "`n" } #endregion Functions #region Prepare Resources if (-not $FileName) { $FileName = $Name } $resolvedPath = Resolve-PSFPath -Path $Path -Provider FileSystem -SingleItem -NewChild if (Test-Path $resolvedPath) { if ((Get-Item $resolvedPath).PSIsContainer) { $resolvedPath = Join-Path $resolvedPath $FileName } } if ($resolvedPath -notlike "*.epub") { $resolvedPath += ".epub" } $zipPath = $resolvedPath -replace 'epub$', 'zip' $cssContent = $CssData if (-not $cssContent) { $cssContent = [System.IO.File]::ReadAllText((Resolve-Path "$($script:ModuleRoot)\data\Common.css"), [System.Text.Encoding]::UTF8) } $pages = @() $images = @() #endregion Prepare Resources } process { #region Process Input items foreach ($item in $Page) { switch ($item.Type) { "Page" { $pages += $item } "Image" { $images += $item } } } #endregion Process Input items } end { $id = 1 $pages = $pages | Sort-Object Index | Select-PSFObject -KeepInputObject -Property @{ Name = "EbookFileName" # Expression = { "{0}.xhtml" -f (New-Guid) } Expression = { "Chapter {0:D3}.xhtml" -f $_.Index } }, @{ Name = "TocIndex" Expression = { $id++ } } $tempPath = New-Item -Path $env:TEMP -Name "Ebook-$(Get-Random -Maximum 99999 -Minimum 10000)" -ItemType Directory -Force Write-File -Root $tempPath -Path 'mimetype' -Text 'application/epub+zip' $metaPath = New-Item -Path $tempPath.FullName -Name "META-INF" -ItemType Directory Write-File -Root $metaPath -Path 'container.xml' -Text @' <?xml version="1.0" encoding="UTF-8"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/> </rootfiles> </container> '@ $oebpsPath = New-Item -Path $tempPath.FullName -Name "OEBPS" -ItemType Directory #region content.opf $contentOpfText = @" <?xml version="1.0" encoding="utf-8"?> <package version="2.0" unique-identifier="uuid_id" xmlns="http://www.idpf.org/2007/opf"> <metadata xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata"> <dc:publisher>$Publisher</dc:publisher> <dc:language>en</dc:language> <dc:creator opf:role="aut" opf:file-as="$Author">$Author</dc:creator> <dc:title opf:file-as="$Name">$Name</dc:title> "@ if ($Description) { $contentOpfText += "`n <dc:description>$Description</dc:description>" } if ($Series) { $contentOpfText += @" <opf:meta content="$Series" name="calibre:series" /> <opf:meta content="$Volume.0" name="calibre:series_index" /> "@ } foreach ($tag in $Tags) { $contentOpfText += "`n <dc:subject>$tag</dc:subject>" } $contentOpfText += @" </metadata> <manifest> $(ConvertTo-ManifestPageData -Pages $pages) $(ConvertTo-ManifestImageData -Images $images) <item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/> <item id="style.css" href="Styles/Style.css" media-type="text/css"/> </manifest> <spine toc="ncx"> $($pages | Format-String -Format ' <itemref idref="{0}"/>' -Property EbookFileName | Join-String "`n") </spine> <guide/> </package> "@ Write-File -Root $oebpsPath -Path 'content.opf' -Text $contentOpfText #endregion content.opf #region TOC.ncx $bookMarkText = ($pages | ForEach-Object { $tocIndex = $_.Index if ($_.TocIndex) { $tocIndex = $_.TocIndex} @' <navPoint id="navPoint-{0}" playOrder="{0}"> <navLabel> <text>Chapter {0}</text> </navLabel> <content src="Text/{1}"/> </navPoint> '@ -f $tocIndex, $_.EbookFileName }) -join "`n" $contentTocNcxText = @' <?xml version="1.0" encoding="utf-8" ?> <!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd"><ncx version="2005-1" xmlns="http://www.daisy.org/z3986/2005/ncx/"> <head> <meta content="{0}" name="dtb:uid"/> <meta content="1" name="dtb:depth"/> <meta content="0" name="dtb:totalPageCount"/> <meta content="0" name="dtb:maxPageNumber"/> </head> <docTitle> <text>{1}</text> </docTitle> <navMap> {2} </navMap> </ncx> '@ -f (New-Guid), $Name, $bookMarkText Write-File -Root $oebpsPath -Path 'toc.ncx' -Text $contentTocNcxText #endregion TOC.ncx #region Files $stylesPath = New-Item -Path $oebpsPath.FullName -Name "Styles" -ItemType Directory Write-File -Root $stylesPath -Path 'Style.css' -Text $cssContent $textPath = New-Item -Path $oebpsPath.FullName -Name 'Text' -ItemType Directory foreach ($pageItem in $pages) { $pageText = @' <?xml version="1.0" encoding="utf-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>{0}</title> <meta content="http://www.w3.org/1999/xhtml; charset=utf-8" http-equiv="Content-Type"/> <link href="../Styles/Style.css" type="text/css" rel="stylesheet"/> </head> <body> {1} </body> </html> '@ -f $Name, $pageItem.Content Write-File -Root $textPath -Path $pageItem.EbookFileName -Text $pageText } #endregion Files #region Images if ($images) { $imagesPath = New-Item -Path $oebpsPath.FullName -Name 'Images' -ItemType Directory foreach ($image in $images) { $targetPath = Join-Path $imagesPath.FullName $image.FileName [System.IO.File]::WriteAllBytes($targetPath, $image.Data) } } #endregion Images Get-ChildItem $tempPath | Compress-Archive -DestinationPath $zipPath -Force if (Test-Path -Path $resolvedPath) { Remove-Item -Path $resolvedPath -Force -ErrorAction Ignore } Rename-Item -Path $zipPath -NewName (Split-Path $resolvedPath -Leaf) Remove-Item $tempPath -Recurse -Force } } function Export-EBMdBook { <# .SYNOPSIS Converts a markdown-based book project into epub ebooks. .DESCRIPTION Converts a markdown-based book project into epub ebooks. This is the top-level execution command for processing the book pipeline. For details, see the description on New-EBBookProject. .PARAMETER ConfigFile The path to the configuration file, defining the properties of the book project. .EXAMPLE PS C:\> Export-EBMdBook -ConfigFile .\config.psd1 Builds the book project in the current folder. #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [PsfValidateScript('PSFramework.Validate.FSPath.File', ErrorString = 'PSFramework.Validate.FSPath.File')] [string] $ConfigFile ) $baseFolder = Split-Path -Path (Resolve-PSFPath -Path $ConfigFile) $config = Import-PSFPowerShellDataFile -Path $ConfigFile $bookRoot = Join-Path -Path $baseFolder -ChildPath $config.OutPath $blockRoot = Join-Path -Path $baseFolder -ChildPath $config.Blocks $exportPath = Join-Path -Path $baseFolder -ChildPath $config.ExportPath $author = "Unknown" if ($config.Author) { $author = $config.Author } $publisher = "Unknown" if ($config.Publisher) { $publisher = $config.Publisher } $cssPath = $null if ($config.Style) { $cssPath = Join-Path -Path $baseFolder -ChildPath $config.Style } foreach ($file in Get-ChildItem -Path $blockRoot -File -Filter *.ps1) { & { . $file.FullName } } foreach ($folder in Get-ChildItem -Path $bookRoot -Directory) { $volume = ($folder.Name -split "-")[0] -as [int] $bookName = ($folder.Name -split "-", 2)[1].Trim() $exportParam = @{ Name = $bookName FileName = '{0:D3}-{1}' -f $volume, $bookName Path = $exportPath Author = $author Publisher = $publisher Series = $config.Name Volume = $volume } if ($cssPath) { $exportParam.CssData = Get-ChildItem -Path $cssPath -Filter *.css | ForEach-Object { Get-Content -Path $_.FullName } | Join-String -Separator "`n" } if ($config.Tags) { $exportParam.Tags = $config.Tags } $exportPipe = { Export-EBBook @exportParam }.GetSteppablePipeline() $exportPipe.Begin($true) Get-ChildItem -Path $folder.FullName -File -Filter *.md | Read-EBMarkdown | ForEach-Object { $exportPipe.Process($_) } $picturePath = Join-Path -Path $folder.FullName -ChildPath pictures if (Test-Path -Path $picturePath) { foreach ($file in Get-ChildItem -Path $picturePath -File | Where-Object Extension -in '.jpeg', '.png', '.jpg', '.bmp') { $pictureObject = [EbookBuilder.Picture]::GetPicture($file) $exportPipe.Process($pictureObject) } } $exportPipe.End() } } function New-EBBookProject { <# .SYNOPSIS Create a new ebook project. .DESCRIPTION Create a new ebook project. This project will be designed for authoring in markdown. Recommended editor is VSCode, automation requires PowerShell and this module even after creation. All three can be installed on any common client Operating System, such as Windows, Linux or MacOS. It is recommended, but not required, to use a source control service such as GitHub to host your project (for free and not necessarily public). .PARAMETER Path The path where the project should be created. Defaults to the current path. .PARAMETER Name The name of the series / book. (This project template is designed with a series in mind, but can be used for a single book just as well) .PARAMETER Author The Author of the book. .PARAMETER Publisher The Publisher for this book. .EXAMPLE PS C:\> New-EBBookProject -Name 'Genesis' Creates a new book project named "Genesis" in the current path. #> [Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSUseShouldProcessForStateChangingFunctions", "")] [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [string] $Name, [PsfValidateScript('PSFramework.Validate.FSPath.Folder', ErrorString = 'PSFramework.Validate.FSPath.Folder')] [string] $Path = '.', [string] $Author, [string] $Publisher ) process { $parameters = @{ TemplateName = 'BookProject' NoFolder = $true OutPath = $Path Parameters = $PSBoundParameters | ConvertTo-PSFHashtable -Include Name, Author, Publisher } Invoke-PSMDTemplate @parameters Write-PSFMessage -Level Host -Message "Book Project $Name created under $(Resolve-PSFPath $Path)" } } function Read-EBMarkdown { <# .SYNOPSIS Reads a markdown file and converts it to a page to be built into an ebook .DESCRIPTION Reads a markdown file and converts it to a page to be built into an ebook .PARAMETER Path Path to the file to read. .EXAMPLE PS C:\> Get-ChildItem *.md | Read-EBMarkdown Reads and converts all markdown files in he current folder #> [CmdletBinding()] param ( [parameter(ValueFromPipeline = $true, ValueFromPipelineByPropertyName = $true)] [Alias('FullName')] [string[]] $Path ) begin { function ConvertFrom-Markdown { [CmdletBinding()] param ( [string] $Path, [int] $Index ) $lines = Get-Content -Path $Path -Encoding UTF8 $stringBuilder = New-SBStringBuilder -Name ebook $inBlock = $false $blockData = [pscustomobject]@{ Attributes = @{ } Type = $null Lines = @() File = $Path } $paragraph = @() $firstPar = $true foreach ($line in $lines) { #region Process Block Content if ($inBlock) { if ($line -like '## <*') { try { $firstPar = ConvertFrom-MdBlock -Type $blockData.Type -Lines $blockData.Lines -Attributes $blockData.Attributes -StringBuilder $stringBuilder } catch { Stop-PSFFunction -Message 'Failed to convert block' -ErrorRecord $_ -Target $blockData -EnableException $true -Cmdlet $PSCmdlet } $inBlock = $false } else { $blockData.Lines += $line } continue } #endregion Process Block Content # Handle Chapter Title if ($line -like '# *') { $null = $stringBuilder.AppendLine("<h2>$($line -replace '^# ')</h2>") continue } # Handle begin of a Block if ($line -like '## <*') { $inBlock = $true $blockData = New-Block -Line $line -Path $Path continue } #region Process paragraph if ($line.Trim() -eq "") { if (-not $paragraph) { continue } $class = 'text' if ($firstPar) { $class = 'firstpar' $firstPar = $false } $null = $stringBuilder.AppendLine("<p class=`"$class`">$(($paragraph -join " ") -replace '\*\*(.+?)\*\*', '<b>$1</b>' -replace '_(.+?)_', '<i>$1</i>')</p>") $paragraph = @() continue } $paragraph += $line #endregion Process paragraph } #region Ensure final paragraph is taken care of if ($paragraph) { $class = 'text' if ($firstPar) { $class = 'firstpar' $firstPar = $false } $null = $stringBuilder.AppendLine("<p class=`"$class`">$(($paragraph -join " ") -replace '\*\*(.+?)\*\*', '<b>$1</b>' -replace '_(.+?)_', '<i>$1</i>')</p>") } #endregion Ensure final paragraph is taken care of New-Object EbookBuilder.Page -Property @{ Index = $Index Name = (Get-Item -Path $Path).BaseName Content = Close-SBStringBuilder -Name ebook SourceName = $Path TimeCreated = Get-Date MetaData = @{ } } } function New-Block { [Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseShouldProcessForStateChangingFunctions', '')] [CmdletBinding()] param ( [string] $Line, [string] $Path ) $type = $Line -replace '## <(\w+).+$', '$1' $attributes = @{ } $entries = $Line | Select-String '(\w+)="(.+?)"' -AllMatches foreach ($match in $entries.Matches) { $attributes[$match.Groups[1].Value] = $match.Groups[2].Value } [pscustomobject]@{ Attributes = $attributes Type = $type Lines = @() File = $Path } } $Index = 1 } process { foreach ($pathItem in $Path) { Write-PSFMessage -Message "Processing: $pathItem" ConvertFrom-Markdown -Path $pathItem -Index $Index $Index++ } } } function Read-EBMdBlockData { <# .SYNOPSIS Parses lines of a markdown block into a structured content set. .DESCRIPTION Parses lines of a markdown block into a structured content set. This assumes the lines of strings provided are shaped in a structured manner. Example Input: > Classes + Hunter Level 10 + Warrior Level 12 > Skills Bash Slash Shoot This would then become a hashtable with two keys: Classes & Skills. Each line within each section would become the values of these keys. .PARAMETER Lines The lines of string to parse. .PARAMETER Header What constitutes a section header. This expects each header line to start with this sequence, followed by a whitespace. .PARAMETER IncludeEmpty Whether empty lines are included or not. .EXAMPLE PS C:\> $components = $Data.Lines | Read-EBMdBlockData Read all lines of string available in $Data, returns them as a components hashtable. #> [OutputType([hashtable])] [CmdletBinding()] param ( [Parameter(ValueFromPipeline = $true)] [AllowEmptyCollection()] [string[]] $Lines, [string] $Header = '>', [switch] $IncludeEmpty ) begin { $components = @{ '_default' = @() } $currentComponent = '_default' } process { foreach ($line in $Lines) { if (-not $IncludeEmpty -and $line.Trim() -eq "") { continue } if ($line -notlike "$Header *") { $components.$currentComponent += $line continue } $componentName = $line -replace "^$Header " $currentComponent = $componentName $components[$currentComponent] = @() } } end { if (-not $components['_default']) { $components.Remove('_default') } $components } } function Read-EBMdDataSection { <# .SYNOPSIS A simple string-data parser. .DESCRIPTION A simple string-data parser. Ignores empty lines. Skips lines that do not contain a ":" symbol. Will process each other line into key/value pairs, reading them as: <key>:<value> Each value will be trimmed and processed as string. Each key will be trimmed and have any leading "- " or "+ " elements removed. .PARAMETER Lines The lines of text to process. .PARAMETER Data An extra hashtable to merge with the parsing results. .EXAMPLE PS C:\> $Data.Lines | Read-EBMdDataSection -Data $Data.Attributes Parses all lines, merges them with the hashtable in $Data.Attributes and returns the resultant hashtable. #> [OutputType([hashtable])] [CmdletBinding()] param ( [Parameter(ValueFromPipeline = $true)] [AllowEmptyString()] [string[]] $Lines, [hashtable] $Data = @{ } ) begin { $result = @{ } $result += $Data } process { foreach ($line in $Lines | Get-SubString) { if (-not $line) { continue } if ($line -notlike "*:*") { continue } $name, $value = $line -split ":", 2 $result[$name.Trim('-+ ')] = $value.Trim() } } end { $result } } function Read-EBMicrosoftDocsIndexPage { <# .SYNOPSIS Converts an index page of a Microsoft Docs into a book. .DESCRIPTION Converts an index page of a Microsoft Docs into a book. Resolves all links in the index. .PARAMETER Url The Url to the index page. .PARAMETER StartIndex Start Index the pages will begin with. Index is what Export-EBBook will use to determine page order. .EXAMPLE PS C:\> Read-EBMicrosoftDocsIndexPage -Url https://docs.microsoft.com/en-us/windows-server/identity/ad-ds/plan/security-best-practices/best-practices-for-securing-active-directory Parses the Active Directory Security Best Practices into page and image objects. #> [CmdletBinding()] Param ( [string] $Url, [int] $StartIndex = 0 ) begin { $index = $StartIndex } process { $indexPage = Read-EBMicrosoftDocsPage -Url $Url -StartIndex $index $indexPage $index++ $pages = $indexPage.Content | Select-String '<a href="(.*?)"' -AllMatches | Select-Object -ExpandProperty Matches | ForEach-Object { $_.Groups[1].Value } $basePath = (Split-Path $indexPage.SourceName) -replace "\\", "/" foreach ($page in $pages) { $tempPath = $basePath while ($page -like "../*") { $tempPath = (Split-Path $tempPath) -replace "\\", "/" $page = $page -replace "^../", "" } Read-EBMicrosoftDocsPage -Url ("{0}/{1}" -f $tempPath, $page) -StartIndex $index $index++ } } } function Read-EBMicrosoftDocsPage { <# .SYNOPSIS Parses a web document from the Microsoft documents. .DESCRIPTION Parses a web document from the Microsoft documents. .PARAMETER Url The url of the website to parse. .PARAMETER StartIndex The index of the page. Used for sorting the pages when building the ebook. .EXAMPLE PS C:\> Read-EBMicrosoftDocsPage -Url https://docs.microsoft.com/en-us/windows-server/identity/ad-ds/plan/security-best-practices/best-practices-for-securing-active-directory Parses the file of the specified link and converts it into a page. #> [CmdletBinding()] param ( [Parameter(Mandatory = $true, ValueFromPipeline = $true)] [string[]] $Url, [int] $StartIndex = 1 ) begin { $index = $StartIndex } process { foreach ($weblink in $Url) { $data = Invoke-WebRequest -UseBasicParsing -Uri $weblink $main = ($data.RawContent | Select-String "(?ms)<main.*?>(.*?)</main>").Matches.Groups[1].Value $source, $title = ($main | Select-String '<h1.*?sourceFile="(.*?)".*?>(.*?)</h1>').Matches.Groups[1 .. 2].Value $text = ($main | Select-String '(?ms)<!-- <content> -->(.*?)<!-- </content> -->').Matches.Groups[1].Value.Trim() $content = "<h1>{0}</h1> {1}" -f $title, $text $webClient = New-Object System.Net.WebClient foreach ($imageMatch in ($content | Select-String '(<img.*?src="(.*?)".*?alt="(.*?)".*?>)' -AllMatches).Matches) { $relativeImagePath = $imageMatch.Groups[2].Value $imageName = $imageMatch.Groups[3].Value $imagePath = "{0}/{1}" -f ($weblink -replace '/[^/]*?$', '/'), $relativeImagePath $image = New-Object EbookBuilder.Image -Property @{ Data = $webClient.DownloadData($imagePath) Name = $imageName TimeCreated = Get-Date Extension = $imagePath.Split(".")[-1] MetaData = @{ WebLink = $imagePath } } $image $content = $content -replace ([regex]::Escape($relativeImagePath)), "../Images/$($image.FileName)" } New-Object EbookBuilder.Page -Property @{ Index = $index++ Name = $title Content = $content SourceName = $weblink TimeCreated = Get-Date MetaData = @{ GithubPath = $source } } } } } function Read-EBRoyalRoad { <# .SYNOPSIS Reads an entire series from Royal Road. .DESCRIPTION Reads an entire series from Royal Road. Converts it into the markdown format expected by Read-EBMarkdown. .PARAMETER Url The Url to the first chapter of a given Royal Road series .PARAMETER Name Name of the series .PARAMETER ConfigFile Path to a book project configuration file, replacing all the other parameters with values from it. For more details on configuration files, see New-EBBookProject. .PARAMETER Books A hashtable mapping page numbers as the start of a book to the name of that book. If left empty, there will only be one book, named for the series. Each page number key must an integer type. .PARAMETER OutPath The folder in which to create one subfolder per book, in which the chapter files will be created. .PARAMETER NoHeader The book does not include a header in the text portion. Will take the chapter-name as header instead. .PARAMETER ChapterOverride Chapters to skip. Intended for chapters where manual edits were performed and you do not want to overwrite them on the next sync. .EXAMPLE PS C:\> Read-EBRoyalRoad -Url https://www.royalroad.com/fiction/12345/evil-incarnate/chapter/666666/1-end-of-all-days -Name 'Evil Incarnate' -OutPath . Downloads the specified series, creates a folder in the current path and writes each chapter as its own .md file into that folder. #> [Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSAvoidUsingInvokeExpression", "")] [CmdletBinding(DefaultParameterSetName = 'Explicit')] param ( [Parameter(Mandatory = $true, ParameterSetName = 'Explicit')] [string] $Url, [Parameter(Mandatory = $true, ParameterSetName = 'Explicit')] [string] $Name, [Parameter(Mandatory = $true, ParameterSetName = 'Config')] [PsfValidateScript('PSFramework.Validate.FSPath.File', ErrorString = 'PSFramework.Validate.FSPath.File')] [string] $ConfigFile, [Parameter(ParameterSetName = 'Explicit')] [hashtable] $Books = @{ }, [Parameter(ParameterSetName = 'Explicit')] [string] $OutPath, [Parameter(ParameterSetName = 'Explicit')] [switch] $NoHeader, [Parameter(ParameterSetName = 'Explicit')] [int[]] $ChapterOverride = @() ) begin { $index = 1 $bookCount = 1 $replacements = @{ } $chaptersToSkip = $ChapterOverride #region Process Config File if ($ConfigFile) { $baseFolder = Split-Path -Path (Resolve-PSFPath -Path $ConfigFile) $config = Import-PSFPowerShellDataFile -Path $ConfigFile $Name = $config.Name $Url = $config.Url if ($config.StartIndex) { $index = $config.StartIndex } if ($config.BookIndex) { $bookCount = $config.BookIndex } if ($config.ContainsKey('HasTitle')) { $NoHeader = -not $config.HasTitle } if ($config.Books) { $Books = $config.Books } if ($config.ChapterOverride) { $chaptersToSkip = $config.ChapterOverride | Invoke-Expression | Write-Output } $OutPath = Join-Path -Path $baseFolder -ChildPath $config.OutPath if ($config.Replacements) { $replacementRoot = Join-Path -Path $baseFolder -ChildPath $config.Replacements foreach ($file in Get-ChildItem -Path $replacementRoot -Filter *.psd1) { $entrySet = Import-PSFPowerShellDataFile -Path $file.FullName foreach ($pair in $entrySet.GetEnumerator()) { if (-not $replacements[$pair.Name]) { $replacements[$pair.Name] = @{ } } foreach ($childPair in $pair.Value.GetEnumerator()) { $replacements[$pair.Name][$childPair.Name] = [PSCustomObject]$childPair.Value } } } } } #endregion Process Config File if (-not $Books[1]) { $Books[1] = $Name } $currentBook = '{0} - {1}' -f $bookCount, $Books[$index] $currentBookPath = Join-Path -Path $OutPath -ChildPath $currentBook if (-not (Test-Path -Path $currentBookPath)) { $null = New-Item -Path $currentBookPath -Force -ItemType Directory -ErrorAction Stop } } process { $nextLink = $Url while ($nextLink) { Write-PSFMessage -Message 'Processing {0} Chapter {1} : {2}' -StringValues $Name, $index, $nextLink try { $page = Read-RRChapter -Url $nextLink -Index $index -NoHeader:$NoHeader -Replacements $replacements } catch { throw } $nextLink = $page.NextLink if ($index -notin $chaptersToSkip) { $page.TextMD | Set-Content -Path ("{0}\{1}-{2:D4}-{3:D4}.md" -f $currentBookPath, $Name, $bookCount, $index) -Encoding UTF8 } $index++ if ($Books[$index]) { $bookCount++ $currentBook = '{0} - {1}' -f $bookCount, $Books[$index] $currentBookPath = Join-Path -Path $OutPath -ChildPath $currentBook if (-not (Test-Path -Path $currentBookPath)) { $null = New-Item -Path $currentBookPath -Force -ItemType Directory -ErrorAction Stop } } } } } function Register-EBMarkdownBlock { <# .SYNOPSIS Register a converter scriptblock for parsing block data with Read-EBMarkdown .DESCRIPTION Register a converter scriptblock for parsing block data with Read-EBMarkdown These allow you to custom-tailor and extend how special blocks are converted from markdown to html. The converter script receives one input object, which will contain three properties: - Type : What kind of block is being provided - Lines : The lines of text within the block - Attributes : Any attributes provided to the block - StringBuilder : The StringBuilder that you should append any lines of html to Your scriptblock should return a boolean value - whether the next paragraph should have the default indentation or be treated as a first line. .PARAMETER Name Name of the block. Equal to the html tag name used within markdown. .PARAMETER Converter Script logic performing the conversion. .EXAMPLE PS C:\> Register-EBMarkdownBlock -Name Warning -Converter $warningScript Registers a converter that will convert warning blocks to useful html. #> [CmdletBinding()] param ( [parameter(Mandatory = $true)] [string] $Name, [parameter(Mandatory = $true)] [System.Management.Automation.ScriptBlock] $Converter ) process { $script:mdBlockTypes[$Name] = $Converter } } |