Back to the main data prep VBA page
I’ve been experimenting recently with Excel VBA dictionaries and arrays to avoid writing to disk while parsing log files. I have been using a dictionary to cache transactional data, and when the transaction parsing is completed, I write the complete transaction metrics to a time series array. Once the parsing of log files is complete, I’ll write the time series profile out to disk.
Sub parsePERFMSG() 'set variables '''''''''''''' Dim rawLOG As String, rawLOGpath As String, splitArray() As String, splitArrayDate() As String, splitTime() As String Dim threadID As String, tempThread As String, tempThreadID As String, tsDAY As Date Dim lineCt As Double, tsXLDAY As Double, tempVAL As Double Dim IXNvalues() As Variant pathFull = Sheets("input").Range("C13") Set objrawLOG = CreateObject("Scripting.FileSystemObject") rawLOGpath = objrawLOG.GetFile(pathFull).ParentFolder.path & "\" Dim ixnDIC As New Dictionary ixnDIC.Add "date", "" ixnDIC.Add "time", "" ixnDIC.Add "thread", "" ixnDIC.Add "buckets", "" ixnDIC.Add "candidates", "" ixnDIC.Add "matched", "" ixnDIC.Add "MSselect", "" ixnDIC.Add "MSmatch", "" ixnDIC.Add "MStot", "" ixnDIC.Add "type", "" ixnDIC.Add "user", "" ixnDIC.Add "rows", "" Dim ixnTS As New Dictionary ixnTS.Add "bkts", "1" ixnTS.Add "cands", "2" ixnTS.Add "matched", "3" ixnCOLct = 6 headerLine = "date time,rows in,rows read,bkts,cands,matched,threads" colHead = 6 colTrd = 1 'setup clean files '''''''''''''''''' srcCSV = Dir(objrawLOG.GetFile(pathFull).ParentFolder.path & "\") logCT = 0: logSO = 0 Dim logNameArr(1000, 1) As Variant Do While srcCSV <> "" tempFILE = rawLOGpath & "\" & srcCSV & "" If srcCSV Like "csv*" Or srcCSV Like "THREADgrep*" Or srcCSV = "csvLOGclean.txt" Then Set delFILE = CreateObject("Scripting.FileSystemObject") delFILE.Deletefile tempFILE, True End If If srcCSV Like "*.dat*" Then logCT = logCT + 1 logNameArr(logCT, 1) = srcCSV End If srcCSV = Dir Loop ReDim Preserve IXNvalues(logCT * 1440 + 1, 3 + 1, 5) fnameLOGclean = "csvLOGclean.txt" Set objFSO = CreateObject("Scripting.FileSystemObject") Set objCLEAN = objFSO.CreateTextFile(rawLOGpath & "\" & fnameLOGclean, 1) objCLEAN.WriteLine "date time,host,thread,ctxfree,ixn type,user,buckets,candidates,matched,select ms,match ms,total ms,rows in,rows read" 'loop through mlg files ''''''''''''''''''''''' tsChk = 0 For logLoop = 1 To logCT rawLOG = logNameArr(logLoop, 1) srcFullPath = rawLOGpath & rawLOG Set objFSO = CreateObject("Scripting.FileSystemObject") Set objrawLOG = objFSO.OpenTextFile(rawLOGpath & rawLOG, 1) Do Until objrawLOG.AtEndOfStream On Error Resume Next txnstring = objrawLOG.ReadLine If Not txnstring Like "*/*" Then GoTo skipIXN If txnstring Like "*UDP Collector application!" Then GoTo skipIXN splitArray() = Split(txnstring, "|") splitArrayDate() = Split(splitArray(1), " ") ixnDIC.Item("date") = splitArrayDate(0) ixnDIC.Item("time") = splitArrayDate(1) 'Right(splitArray(1), Len(splitArray(1)) - 11) ixnDIC.Item("host") = splitArray(3) ixnDIC.Item("thread") = splitArray(2) ixnDIC.Item("type") = splitArray(6) ixnDIC.Item("user") = splitArray(7) ixnDIC.Item("buckets") = splitArray(28) ixnDIC.Item("candidates") = splitArray(29) ixnDIC.Item("matched") = splitArray(30) ixnDIC.Item("MSselect") = splitArray(31) ixnDIC.Item("MSmatch") = splitArray(32) ixnDIC.Item("MStot") = splitArray(10) ixnDIC.Item("rowsIN") = splitArray(22) ixnDIC.Item("rowsOUT") = splitArray(24) ixnDIC.Item("ctxfree") = splitArray(5) outputline = ixnDIC.Item("date") & " " & ixnDIC.Item("time") 'compile minute stats 'determine row tsDAY = ixnDIC.Item("date"): tsXLDAY = tsDAY splitTime() = Split(ixnDIC.Item("time"), ":") tsHour = splitTime(Val(0)) tsMinute = splitTime(Val(1)) tsTIME = tsXLDAY + tsHour / 24 + tsMinute / 1440 If tsChk = 0 Then tsStart = tsTIME tsChk = 1 End If rowID = Int(tsTIME - tsStart) * 1440 + Hour(tsTIME - tsStart) * 60 + Minute(tsTIME - tsStart) + 2 'determine if this is a new IXN type, find column tgtCol = "" If ixnTS.Exists(ixnDIC.Item("type")) Then tgtCol = ixnTS.Item(ixnDIC.Item("type")) pause = 1 Else ixnCOLct = ixnCOLct + 1 ixnTS.Add ixnDIC.Item("type"), ixnCOLct tgtCol = ixnCOLct headerLine = headerLine & ixnDIC.Item("type") & " ct," & ixnDIC.Item("type") & " ms," ReDim Preserve IXNvalues(20000, 3 + (ixnCOLct - 3) * 2 + 1) End If 'update ixn array with values tempVAL = 1: IXNvalues(rowID, colHead + (tgtCol - colHead) * 2 - 1) = _ IXNvalues(rowID, colHead + (tgtCol - colHead) * 2 - 1) + tempVAL tempVAL = ixnDIC.Item("rowsIN"): IXNvalues(rowID, 1) = IXNvalues(rowID, 1) + tempVAL tempVAL = ixnDIC.Item("rowsOUT"): IXNvalues(rowID, 2) = IXNvalues(rowID, 2) + tempVAL tempVAL = ixnDIC.Item("buckets"): IXNvalues(rowID, 3) = IXNvalues(rowID, 3) + tempVAL tempVAL = ixnDIC.Item("candidates"): IXNvalues(rowID, 4) = IXNvalues(rowID, 4) + tempVAL tempVAL = ixnDIC.Item("matched"): IXNvalues(rowID, 5) = IXNvalues(rowID, 5) + tempVAL tempVAL = ixnDIC.Item("MStot"): _ IXNvalues(rowID, colHead + (tgtCol - colHead) * 2 - 1 + 1) = _ IXNvalues(rowID, colHead + (tgtCol - colHead) * 2 - 1 + 1) + tempVAL 'trap threadID tempThread = IXNvalues(rowID, 6) tempThreadID = ixnDIC.Item("thread") & ";" If tempThread = "" Then IXNvalues(rowID, 6) = ixnDIC.Item("thread") & ";": GoTo skipThread If InStr(1, tempThread, tempThreadID, 1) > 0 Then GoTo skipThread tempThread = tempThread & ixnDIC.Item("thread") & ";" IXNvalues(rowID, 6) = tempThread skipThread: tempThread = "" 'construct clean line ''''''''''''''''''''' outputline = outputline & "," & ixnDIC.Item("host"): ixnDIC.Item("host") = "" outputline = outputline & "," & ixnDIC.Item("thread"): ixnDIC.Item("thread") = "" outputline = outputline & "," & ixnDIC.Item("ctxfree"): ixnDIC.Item("ctxfree") = "" outputline = outputline & "," & ixnDIC.Item("type"): ixnDIC.Item("type") = "" outputline = outputline & "," & ixnDIC.Item("user"): ixnDIC.Item("user") = "" outputline = outputline & "," & ixnDIC.Item("buckets"): ixnDIC.Item("buckets") = "" outputline = outputline & "," & ixnDIC.Item("candidates"): ixnDIC.Item("candidates") = "" outputline = outputline & "," & ixnDIC.Item("matched"): ixnDIC.Item("matched") = "" outputline = outputline & "," & ixnDIC.Item("MSselect"): ixnDIC.Item("MSselect") = "" outputline = outputline & "," & ixnDIC.Item("MSmatch"): ixnDIC.Item("MSmatch") = "" outputline = outputline & "," & ixnDIC.Item("MStot"): ixnDIC.Item("MStot") = "" outputline = outputline & "," & ixnDIC.Item("rowsIN"): ixnDIC.Item("rowsIN") = "" outputline = outputline & "," & ixnDIC.Item("rowsOUT"): ixnDIC.Item("rowsOUT") = "" objCLEAN.WriteLine outputline outputline = "" skipIXN: Loop Next logLoop objrawLOG.Close 'print time series data Set objFSO = CreateObject("Scripting.FileSystemObject") Set objTS = objFSO.CreateTextFile(rawLOGpath & "\csvTS.txt", 1) objTS.WriteLine headerLine For i = 2 To logCT * 1440 + 1 tsDAY = tsStart + (i - 2) / 1440 tsLine = tsDAY & "," For j = 1 To 3 + (ixnCOLct - 3) * 2 tsLine = tsLine & IXNvalues(i, j) & "," Next j objTS.WriteLine tsLine tsLine = "" Next i End Sub
This is an evolution of my basic text parsing script. In this example I look for a multi-line transaction pattern typically from a single thread extract from a log file. This pattern forms a long running transaction that I need to track. This script looks for the pattern, and reconstructs to total transaction into a single line that can be used to generate complete transaction profiles and subsequent fenced latency analyses.
Sub patternMLG() 'set variables '''''''''''''' Dim rawLOG As String, rawLOGpath As String, splitArray() As String, splitTime() As String 'Dim threadID As String, tsDAY As Date 'Dim lineCt As Double, tsXLDAY As Double, tempVAL As Double 'Dim IXNvalues() As Variant 'ReDim Preserve IXNvalues(10000, 3) pathFull = Sheets("input").Range("C13") Set objrawLOG = CreateObject("Scripting.FileSystemObject") rawLOGpath = objrawLOG.GetFile(pathFull).ParentFolder.path & "\" Dim ixnDIC As New Dictionary ixnDIC.Add "MPI_CtxDbxGetRecnoList: stmt =", "" ixnDIC.Add "MAD_PkgSelAcbByStmt: mpi_reclist", "" ixnDIC.Add "MAD_PkgSelAcbByStmt: mpi_memcmpd", "" ixnDIC.Add "MPI_MxmRunSearch:", "" ixnDIC.Add "MAD_PkgSelAcbByStmt: mpi_memhead", "" ixnDIC.Add "MAD_PkgSelAcbByStmt: mpi_memexta", "" ixnDIC.Add "MAD_PkgSelAcbByStmt: mpi_audhead", "" ixnDIC.Add "MAD_DbxCommit: stmt='commit'", "" ixnDIC.Add "IXN=MEMSEARCH", "" 'setup clean files '''''''''''''''''' srcCSV = Dir(objrawLOG.GetFile(pathFull).ParentFolder.path & "\") Do While srcCSV <> "" tempFILE = rawLOGpath & "\" & srcCSV & "" If srcCSV Like "csvTHREAD*" Then Set delFILE = CreateObject("Scripting.FileSystemObject") delFILE.Deletefile tempFILE, True End If srcCSV = Dir Loop fnameLOGclean = "csvTHREAD.txt" Set objFSO = CreateObject("Scripting.FileSystemObject") Set objCLEAN = objFSO.CreateTextFile(rawLOGpath & "\" & fnameLOGclean, 1) objCLEAN.WriteLine "MPI_CtxDbxGetRecnoList: stmt,MAD_PkgSelAcbByStmt: mpi_reclist,MAD_PkgSelAcbByStmt: mpi_memcmpd,MPI_MxmRunSearch:,MAD_PkgSelAcbByStmt: mpi_memhead,MAD_PkgSelAcbByStmt: mpi_memexta,MAD_PkgSelAcbByStmt: mpi_audhead,MAD_DbxCommit: stmt='commit',IXN=MEMSEARCH,ms tot" 'loop through mlg files ''''''''''''''''''''''' procRec = 0 rawLOG = "sampTHREAD.txt" srcFullPath = rawLOGpath & rawLOG Set objFSO = CreateObject("Scripting.FileSystemObject") Set objrawLOG = objFSO.OpenTextFile(rawLOGpath & rawLOG, 1) Do Until objrawLOG.AtEndOfStream txnstring = objrawLOG.ReadLine If txnstring Like "*MPI_CtxDbxGetRecnoList: stmt*" Then ixnDIC.Item("MPI_CtxDbxGetRecnoList: stmt") = "" ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_reclist") = "" ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memcmpd") = "" ixnDIC.Item("MPI_MxmRunSearch:") = "" ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memhead") = "" ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memexta") = "" ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_audhead") = "" ixnDIC.Item("MAD_DbxCommit: stmt='commit'") = "" ixnDIC.Item("IXN=MEMSEARCH") = "" ixnDIC.Item("ms tot") = "" ixnDIC.Item("MPI_CtxDbxGetRecnoList: stmt") = Left(txnstring, 23) End If If txnstring Like "*MAD_PkgSelAcbByStmt: mpi_reclist*" Then _ ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_reclist") = Left(txnstring, 23) If txnstring Like "*MAD_PkgSelAcbByStmt: mpi_memcmpd*" Then _ ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memcmpd") = Left(txnstring, 23) If txnstring Like "*MPI_MxmRunSearch:*" Then _ ixnDIC.Item("MPI_MxmRunSearch:") = Left(txnstring, 23) If txnstring Like "*MAD_PkgSelAcbByStmt: mpi_memhead*" Then _ ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memhead") = Left(txnstring, 23) If txnstring Like "*MAD_PkgSelAcbByStmt: mpi_memexta*" Then _ ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memexta") = Left(txnstring, 23) If txnstring Like "*MAD_PkgSelAcbByStmt: mpi_audhead*" Then _ ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_audhead") = Left(txnstring, 23) If txnstring Like "*MAD_DbxCommit: stmt='commit'*" Then _ ixnDIC.Item("MAD_DbxCommit: stmt='commit'") = Left(txnstring, 23) If txnstring Like "*IXN=MEMSEARCH*" Then ixnDIC.Item("IXN=MEMSEARCH") = Left(txnstring, 23) FindEQ5 = 0 For j = 1 To 5 FindEQ5 = InStr(FindEQ5 + 1, txnstring, "=") If FindEQ5 = 0 Then Exit For Next ixnDIC.Item("ms tot") = Replace(Replace(Right(txnstring, Len(txnstring) - FindEQ5), " seconds.", ""), " ", "") * 1000 outputline = ixnDIC.Item("MPI_CtxDbxGetRecnoList: stmt"): ixnDIC.Item("MPI_CtxDbxGetRecnoList: stmt") = "" outputline = outputline & "," & ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_reclist"): ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_reclist") = "" outputline = outputline & "," & ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memcmpd"): ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memcmpd") = "" outputline = outputline & "," & ixnDIC.Item("MPI_MxmRunSearch:"): ixnDIC.Item("MPI_MxmRunSearch:") = "" outputline = outputline & "," & ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memhead"): ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memhead") = "" outputline = outputline & "," & ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memexta"): ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memexta") = "" outputline = outputline & "," & ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_audhead"): ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_audhead") = "" outputline = outputline & "," & ixnDIC.Item("MAD_DbxCommit: stmt='commit'"): ixnDIC.Item("MAD_DbxCommit: stmt='commit'") = "" outputline = outputline & "," & ixnDIC.Item("IXN=MEMSEARCH"): ixnDIC.Item("IXN=MEMSEARCH") = "" outputline = outputline & "," & ixnDIC.Item("ms tot"): ixnDIC.Item("ms tot") = "" objCLEAN.WriteLine outputline End If Loop objrawLOG.Close End Sub
text