hlalumiere
12 years agoRegular Contributor
Code donation: TestComplete log disk size trimmer.
I wrote this to solve an irritating issue I had with the huge log folders TestComplete was producing.
What this tool does is find all identical GIF, SWF and JS files scattered across a log folder structure, identifies them through MD5, copies distinct files to a specified common media folder, update references in XML and HTML files, and deletes the non-shared copies from the log folder structure. It translates the references to ABSOLUTE reference, so this is not suitable if you move your logs around a lot. However if like me you use a centralized network share for your logs, this will make you save a substancial amount of hard disk space, and it has the potential to speed up log browsing a bit, because of the way media is cached in most browsers. I have plans to adapt the tool to be able to use path-relative URIs eventually, however for now my problem is fixed, so this has gone on the backburner.
I will not include binaries because well you wouldn't want to run it anyways. Here is the source code to the VB console app. It requires .NET 4.0 and Visual Studio (Express will work fine...), because of my use of deferred processing and parallelism to speed up things.
There is still one bug I have not been able to put my finger on, where occasionally a single "info" icon will go missing from the log detail, my guess is there is another small patch to apply to one of the javascripts.
To build:
1- Paste the code in a new console application named TCLogTrim.
2- Build.
What this tool does is find all identical GIF, SWF and JS files scattered across a log folder structure, identifies them through MD5, copies distinct files to a specified common media folder, update references in XML and HTML files, and deletes the non-shared copies from the log folder structure. It translates the references to ABSOLUTE reference, so this is not suitable if you move your logs around a lot. However if like me you use a centralized network share for your logs, this will make you save a substancial amount of hard disk space, and it has the potential to speed up log browsing a bit, because of the way media is cached in most browsers. I have plans to adapt the tool to be able to use path-relative URIs eventually, however for now my problem is fixed, so this has gone on the backburner.
I will not include binaries because well you wouldn't want to run it anyways. Here is the source code to the VB console app. It requires .NET 4.0 and Visual Studio (Express will work fine...), because of my use of deferred processing and parallelism to speed up things.
There is still one bug I have not been able to put my finger on, where occasionally a single "info" icon will go missing from the log detail, my guess is there is another small patch to apply to one of the javascripts.
To build:
1- Paste the code in a new console application named TCLogTrim.
2- Build.
USAGE: TCLogTrim.exe TARGETPATH COMMONPATH
TARGETPATH is the TestComplete log path you want to clean up.
COMMONPATH is the path you want to use to hold the shared media.
Imports System.Runtime.Remoting.Metadata.W3cXsd2001
Imports System.IO
Imports System.Threading
Imports System.Threading.Tasks
Module TCLogTrim
Private Args() As String = Environment.GetCommandLineArgs
Public Class FileMD5
Public Property Fullname As String
Public Property Filename As String
Public Property AltFilenames As IEnumerable(Of String)
Public Property Count As Integer
Public Property MD5 As String
End Class
Private cspMD5 As New System.Security.Cryptography.MD5CryptoServiceProvider
Private strCommonPath As String = ""
Private strTargetPath As String = ""
Private lstTargetFiles As List(Of FileMD5)
Dim DistinctFlashFiles, DistinctImageFiles, DistinctJavaScripts As IEnumerable(Of FileMD5)
Public Sub Main()
Console.WriteLine()
Console.WriteLine("TCLogTrim v1.0")
Console.WriteLine("--------------")
ParseArguments()
Console.Write("* Enumerating files in the target directory... ")
lstTargetFiles = (From f In Directory.EnumerateFiles(strTargetPath, "*.*", SearchOption.AllDirectories).AsParallel
Select New FileMD5 With {.Fullname = f,
.Filename = Path.GetFileName(f),
.MD5 = MD5CalcFile(f)}).ToList
Console.Write("Done." & vbCrLf)
Console.Write("* Searching for common files... ")
DistinctFlashFiles = EnumerateFiles(".swf")
DistinctImageFiles = EnumerateFiles(".gif")
DistinctJavaScripts = EnumerateFiles(".js")
Console.Write("Done." & vbCrLf)
UpdateCommonPath()
Console.Write("* Enumerating found common files... ")
Dim lstCommonFiles = From f In Directory.EnumerateFiles(strCommonPath, "*.*", SearchOption.AllDirectories).AsParallel
Select New FileMD5 With {.Fullname = f,
.Filename = Path.GetFileName(f),
.MD5 = MD5CalcFile(f)}
Console.Write("Done." & vbCrLf)
Console.Write("* Searching for hypertext files to update... ")
Dim lstHyperTextFiles = From htf In lstTargetFiles
Where htf.Filename.ToUpper.Contains(".HTM") _
Or htf.Filename.ToUpper.Contains(".XML")
Select htf
Parallel.ForEach(lstHyperTextFiles, Sub(HTFile As FileMD5)
EditReferences(HTFile.Fullname, DistinctFlashFiles)
EditReferences(HTFile.Fullname, DistinctImageFiles)
EditReferences(HTFile.Fullname, DistinctJavaScripts)
End Sub)
Console.Write("Done." & vbCrLf)
Console.Write("* Searching for files to delete... ")
Dim lstFilesToDelete = From t In lstTargetFiles
Join c In lstCommonFiles On t.MD5 Equals c.MD5
Select t
Parallel.ForEach(lstFilesToDelete, Sub(ThisFile As FileMD5)
File.Delete(ThisFile.Fullname)
End Sub)
Dim lstJSToDelete = From t In lstTargetFiles
Where t.Filename.ToUpper.Contains(".JS")
Select t
Parallel.ForEach(lstJSToDelete, Sub(ThisFile As FileMD5)
File.Delete(ThisFile.Fullname)
End Sub)
Console.Write("Done." & vbCrLf)
Console.WriteLine()
Console.WriteLine("All done!" & vbCrLf)
End Sub
Private Sub ParseArguments()
If Args.Length = 3 Then
If Directory.Exists(Args(1)) _
And Directory.Exists(Args(2)) Then
strTargetPath = Args(1)
strCommonPath = Args(2)
Else
Console.WriteLine("Exception! One of the arguments is invalid.")
Console.WriteLine()
End
End If
Else
Console.WriteLine("USAGE: TCLogTrim.exe [TARGETPATH] [COMMONPATH]")
Console.WriteLine()
Console.WriteLine("[TARGETPATH] is the TestComplete log path you want to clean up.")
Console.WriteLine("[COMMONPATH] is the path you want to use to hold the shared media.")
Console.WriteLine()
End
End If
End Sub
Private Sub UpdateCommonPath()
Dim Count As Integer = 0
Console.Write("* Copying common files to common directory... ")
Count += CopyDistinctToCommon(DistinctFlashFiles, False, False)
Count += CopyDistinctToCommon(DistinctImageFiles, True, False)
Count += CopyDistinctToCommon(DistinctJavaScripts, False, True)
Console.Write("Done." & vbCrLf)
Console.WriteLine("* Copied " & Count & " unique shared files to """ & strCommonPath & """")
' Now we need to change the references in the common .js files to absolute URIs instead of the existing relative ones.
Console.Write("* Updating internal JScript references... ")
Parallel.ForEach(DistinctJavaScripts, Sub(ThisJS As FileMD5)
EditReferences(strCommonPath & "\" & ThisJS.Filename, DistinctFlashFiles)
EditReferences(strCommonPath & "\" & ThisJS.Filename, DistinctImageFiles)
EditReferences(strCommonPath & "\" & ThisJS.Filename, DistinctJavaScripts)
End Sub)
Console.Write("Done." & vbCrLf)
' We also need to patch table.js so that the scripts find the nested images.
' "<img src=\"" + correctLocation(element.location, nodeValue) => "<img src=\"" + nodeValue
Console.Write("* Patching ""table.js""... ")
Dim FileText As String = My.Computer.FileSystem.ReadAllText(strCommonPath & "\table.js")
FileText = FileText.Replace("""<img src=\"""" + correctLocation(element.location, nodeValue)", """<img src=\"""" + nodeValue")
My.Computer.FileSystem.WriteAllText(strCommonPath & "\table.js", FileText, False)
Console.Write("Done." & vbCrLf)
End Sub
Private Sub EditReferences(ByVal FileToEdit As String, ByVal RefsToUpdate As IEnumerable(Of FileMD5))
Dim FileText As String = My.Computer.FileSystem.ReadAllText(FileToEdit)
Parallel.ForEach(RefsToUpdate, Sub(ThisRef As FileMD5)
For Each FileName As String In ThisRef.AltFilenames
FileText = FileText.Replace(FileName, ("file:///" & strCommonPath & "\" & If(FileName.ToUpper.Contains(".GIF"), ThisRef.MD5 & ".gif", ThisRef.Filename)).Replace("\", "/"))
Next
End Sub)
My.Computer.FileSystem.WriteAllText(FileToEdit, FileText, False)
End Sub
' Takes a filespec (".gif"), and returns an IEnumerable(Of FileMD5) that contains every distinct (MD5-wise) file found.
Private Function EnumerateFiles(ByVal strFileSpec As String) As IEnumerable(Of FileMD5)
'Get a list of all the files matching the filespec in the target tree.
EnumerateFiles = From df In lstTargetFiles
Where df.Filename.ToUpper.Contains(strFileSpec.ToUpper)
Group df By df.MD5 Into Group
Select New FileMD5 With {.MD5 = MD5,
.Fullname = Group.FirstOrDefault.Fullname,
.Filename = Group.FirstOrDefault.Filename,
.AltFilenames = (From f In lstTargetFiles
Where f.MD5 = MD5
Select f.Filename).Distinct(),
.Count = Group.Count}
End Function
' Takes an IEnumerable(Of FileMD5), and copies the distinct (MD5-wise) files from it into the common folder. Returns the count.
Private Function CopyDistinctToCommon(ByVal EnumeratedFiles As IEnumerable(Of FileMD5), ByVal bRenameToMD5 As Boolean, ByVal bOverwrite As Boolean) As Integer
CopyDistinctToCommon = 0
' Make one copy of each in the common tree, giving it the MD5 as filename.
Parallel.ForEach(EnumeratedFiles, Sub(ThisFile As FileMD5)
Dim CommonFilename = strCommonPath & "\" & If(bRenameToMD5, ThisFile.MD5 & Path.GetExtension(ThisFile.Fullname), ThisFile.Filename)
If Not File.Exists(CommonFilename) Or bOverwrite Then
File.Copy(ThisFile.Fullname, CommonFilename, True)
CopyDistinctToCommon += 1
End If
End Sub)
End Function
' Calculate the MD5 hash for a file.
Public Function MD5CalcFile(ByVal FilePath As String) As String
Dim Reader As New System.IO.FileStream(FilePath, IO.FileMode.Open, IO.FileAccess.Read, IO.FileShare.ReadWrite)
MD5CalcFile = New SoapHexBinary(cspMD5.ComputeHash(Reader)).ToString
End Function
End Module