Scripts/Update-MongoFiles.ps1
<# .Synopsis Updates the file system snapshot database. .Description Server: local, database: test, collections: files, files_log Module: Mdbc <https://github.com/nightroman/Mdbc> The script scans the specified directory tree, updates file and directory documents, and then removes orphan documents which have not been updated. Changes are optionally logged in another collection. Collection "files" * _id : full item path * Attributes : file system flags * Length : file length * LastWriteTime : last write time * CreationTime : creation time * Name : item name * Extension : file extension * Updated : last update time Collection "files_log" * _id : full item path * Updated : last update time * Log : array of item snapshots * Op : 0: created, 1: changed, 2: removed .Parameter Path Specifies one or more literal directory paths to be processed. .Parameter CollectionName Specifies the collection name. Default: files (implies files_log). .Parameter Log Tells to log created, changed, and removed items to files_log. .Parameter Split Tells to perform parallel data processing using Split-Pipeline. Module: SplitPipeline <https://github.com/nightroman/SplitPipeline> .Inputs None. Use the parameters to specify input. .Outputs The result object with statistics * Path : the input path * Created : count of created * Changed : count of changed * Removed : count of removed * Elapsed : elapsed time span .Link Get-MongoFile.ps1 #> param ( [Parameter(Position=0)][string[]]$Path = '.', [string]$CollectionName = 'files', [switch]$Log, [switch]$Split ) $ErrorActionPreference = 'Stop' Set-StrictMode -Version 2 $Now = [DateTime]::Now # Resolves exact case paths. function Resolve($Path) { $directory = [IO.DirectoryInfo]$Path if ($directory.Parent) { Join-Path (Resolve $directory.Parent.FullName) $directory.Parent.GetFileSystemInfos($directory.Name)[0].Name } else { $directory.Name.ToUpper() } } $Path = foreach($_ in $Path) { Resolve ($PSCmdlet.GetUnresolvedProviderPathFromPSPath($_)) } Write-Host "Updating data for $Path ..." # Connects collections and initializes data. function Connect { Import-Module Mdbc Connect-Mdbc . test $CollectionName $CollectionLog = $Database.GetCollection(($CollectionName + '_log')) $info = 1 | Select-Object Path, Created, Changed, Removed, Elapsed $info.Created = $info.Changed = $info.Removed = 0 $Update = New-MdbcUpdate -Set @{Updated = $Now} } # Gets input items from the path. function Input { $ea = if ($PSVersionTable.PSVersion.Major -ge 3) {'Ignore'} else { 0 } Get-ChildItem -LiteralPath $Path -Force -Recurse -ErrorAction $ea } # Updates documents from input items. function Update {process{ $file = !$_.PSIsContainer # main data $data = New-MdbcData $data._id = $_.FullName $data.Attributes = [int]$_.Attributes if ($file) { $data.Length = $_.Length $data.LastWriteTime = $_.LastWriteTime } # query by main data and update Updated $r = Update-MdbcData $Update $data -Result # updated means not changed, done if ($r.DocumentsAffected) {return} # more data if (!$file) { $data.LastWriteTime = $_.LastWriteTime } $data.CreationTime = $_.CreationTime $data.Name = $_.Name if ($file) { $data.Extension = $_.Extension } $data.Updated = $Now # add or update data $r = Add-MdbcData $data -Update -Result $op = [int]$r.UpdatedExisting if ($op) { ++$info.Changed } else { ++$info.Created } if (!$Log) {return} # log created or changed $data.Remove('_id') $data.Remove('Name') $data.Remove('Extension') $data.Op = $op Update-MdbcData -Collection $CollectionLog -Add -Query $_.FullName -Update ( New-MdbcUpdate -Set @{Updated = $Now; Op = $op} -Push @{Log = $data} ) }} ### Update existing . Connect $info.Path = $Path $time = [Diagnostics.Stopwatch]::StartNew() if ($Split) { Import-Module SplitPipeline Input | Split-Pipeline -Verbose -Count 2, 4 -Load 500, 5000 -Function Connect, Update -Variable CollectionName, Log, Now ` -Begin { . Connect } -Script { $input | Update } -End { $info } | .{process{ $info.Created += $_.Created $info.Changed += $_.Changed }} } else { Input | Update } ### Remove missing $in = foreach($_ in $Path) { if (!$_.EndsWith('\')) {$_ += '\'} [regex]('^' + [regex]::Escape($_)) } $queryUnknown = New-MdbcQuery -Not (New-MdbcQuery Updated -Type 9) $queryMissing = New-MdbcQuery -And (New-MdbcQuery _id -In $in), (New-MdbcQuery Updated -LT $Now) foreach($data in Get-MdbcData (New-MdbcQuery -Or $queryUnknown, $queryMissing)) { ++$info.Removed # remove data $id = $data._id Remove-MdbcData $id # log removed if ($Log) { $data.Remove('_id') $data.Remove('Name') $data.Remove('Extension') $data.Op = 2 Update-MdbcData -Collection $CollectionLog -Add -Query $id -Update ( New-MdbcUpdate -Set @{Updated = $Now; Op = 2} -Push @{Log = $data} ) } } # output info $info.Elapsed = $time.Elapsed $info |