Files
perfdb/plogdownloader/extractAppAsgLogsWith7z.py

226 lines
7.8 KiB
Python

from zipfile import ZipFile
import os
from os import listdir
from os.path import getsize, isdir, isfile, join, dirname
import sys
import getopt
import re
import shutil
from subprocess import call,check_call
import time
# S:\Working\ahr\Python36\python S:\Working\ahr\giru\plog-downloader\extractAppAsgLogsWith7z.py -e "*/performance*log*" -o S:\Working\ahr\giru\tmp\9323982100316752\extracted\vaprjrey01 -i S:\Working\ahr\giru\tmp\9323982100316752\logs\vaprjrey01
def extract(inputDir, outputDir, pattern):
zips = listZips(inputDir)
zips.sort()
print("zips: %s" % zips);
zipToOutputDir = mapZipToOutputDir(zips, outputDir)
print("zipToOutputDir: %s" % zipToOutputDir);
extractFiles(zipToOutputDir, pattern)
def extractFiles(zipToOutputDir, pattern):
for zipFile, outputDir in zipToOutputDir:
# 7z e archive.zip -o outputdir *.xml *.dll
cmd = ["C:\\Program Files\\7-Zip\\7z.exe", "e", zipFile, "-y", "-o%s"%outputDir, pattern]
print(cmd)
check_call(cmd)
extractZippedLogs(outputDir)
removeLogsCreatedAfterRolling(outputDir)
#with ZipFile(zipFile, 'r') as myzip:
# members = myzip.namelist()
# filteredMembers = filterMembers(members, pattern)
# filteredMembers = filterExisting(filteredMembers, outputDir, myzip)
# filteredMembers.sort()
# if len(filteredMembers) == 0:
# continue
# print("extracting ",filteredMembers, "-> " +outputDir)
# myzip.extractall(outputDir, filteredMembers)
# extractZippedLogs(filteredMembers, outputDir)
# removeLogsCreatedAfterRolling(filteredMembers, outputDir)
def extractZippedLogs(outputDir):
from os import walk
zips = []
for (dirpath, dirnames, filenames) in walk(outputDir):
for name in filenames:
if name.endswith(".zip"):
zips.append(join(dirpath, name))
print ("extract2: %s" % zips)
for zipFile in zips:
extractFile(zipFile, outputDir)
remove_file(zipFile)
def extractFile(zipFile, outputDir):
attempt = 0
while attempt < 10:
if not os.path.isfile(zipFile):
print("extractFile: %s is not a file" % zipFile)
return
try:
attempt += 1
cmd = ["C:\\Program Files\\7-Zip\\7z.exe", "e", zipFile, "-y", "-o%s"% outputDir]
print("attempt %d: %s" % (attempt,cmd))
check_call(cmd)
return
except:
print("extraction failed")
time.sleep(1)
if attempt >= 10:
print("aborted after 10 failed attempts")
sys.exit(2)
def removeLogsCreatedAfterRolling(outputDir):
if not os.path.isdir(outputDir):
return
logfiles = [f for f in listdir(outputDir) if isfile(join(outputDir, f))]
for member in logfiles:
file = join(outputDir, member)
print("check if %s has just been rolled" % file)
if member.endswith('performance.log'):
lastLine = readLastLine(file)
print("lastLine: %s)" % lastLine)
if re.match(".*\d\d\d\d-\d\d-\d\d 00:1.*", lastLine.decode('utf8')):
attempt = 0
while os.path.isfile(file):
attempt += 1
print("attempt %d removing %s, because it was just rolled" % (attempt, file))
try:
os.remove(file)
except:
print("failed to delete")
time.sleep(1)
def readLastLine(file):
with open(file, 'rb') as f:
for line in f:
f.seek(-2, os.SEEK_END) # Jump to the second last byte.
while f.read(1) != b"\n": # Until EOL is found...
#print f.tell()
if f.tell() <= 2:
break;
f.seek(-2, os.SEEK_CUR) # ...jump back the read byte plus one more.
last = f.readline() # Read last line.
#print "%s -> %s" % (file, last)
return last
def filterExisting(filteredMembers, outputDir, myzip):
result = []
for member in filteredMembers:
file = join(outputDir, member)
if not isfile(file):
result.append(member)
continue
zipInfo = myzip.getinfo(member)
zipFileSize = zipInfo.file_size
fileSize = getsize(file)
if zipFileSize != fileSize:
result.append(member)
else:
print("skip: " + member + " (file exists and has the same size)")
return result
def filterMembers(members, pattern):
result = []
for member in members:
if pattern.match(member):
result.append(member)
return result
def mapZipToOutputDir(zips, outputDir):
result = []
for zipFileName in zips:
date = extractDate(zipFileName)
instanceId = extractInstanceId(zipFileName)
if date and instanceId:
# why did I have a version with date AND instanceId ???
# result.append((zipFileName, join(outputDir, date, instanceId)))
result.append((zipFileName, join(outputDir, instanceId)))
return result
def extractInstanceId(zipFileName):
# pre 5.14 pattern: axcng-service_i-0ccd20213cffb9fc3_001201.zip
# post 5.14 pattern: axcng-service_i-09c26757fd0b61c12_172_19_113_219_VAPFINRA01AA001_2018-04-14_090701.zip
pattern = re.compile(".*axcng-.*_i-([a-zA-Z0-9]+)_.*.zip")
match = pattern.match(zipFileName)
if match:
instanceId = match.group(1)
else:
instanceId = False
return instanceId
def extractDate(zipFileName):
# axcng-service_i-0376ad122c7fa2bbc_172_28_1_153_VADTRANS01AA001_2018-08-17_095022.zip
#pattern = re.compile(".*(/|\\\\)([0-9]{4}-[0-9]{2}-[0-9]{2})(/|\\\\).*zip")
pattern = re.compile(".*_([0-9]{4}-[0-9]{2}-[0-9]{2})_[0-9]{6}\.zip")
match = pattern.match(zipFileName)
if match:
date = match.group(1)
else:
print("no date" , zipFileName)
date = False
return date
def listZips(directory):
zips = []
for root, dirs, files in os.walk(directory):
for name in files:
print(join(root, name))
zips.append(join(root,name))
return zips
def remove_file(file):
if os.path.isfile(file):
os.remove(file)
def help(returnValue):
print(sys.argv[0] + ' -i <directory> -o <directory> [-e <expression>]')
print('Examples: ')
print(sys.argv[0] + ' -e ".*/performance.log.*" -i logs\\vapaccen01 -o extracted\\vapaccen01')
print(sys.argv[0] + ' -e ".*/performance.log.*" -i logs\\vapaccen01 -o d:\\ws\\pdb\\logs\\vapaccen01')
sys.exit(returnValue)
def main(argv):
expression = '.*'
inputDir = ''
outputDir = ''
try:
opts, args = getopt.getopt(argv,"he:i:o:",["expression", "input=", "output="])
except getopt.GetoptError:
help(2)
for opt, arg in opts:
if opt == '-h':
help(0)
elif opt in ("-e", "--expression"):
expression = arg
elif opt in ("-i", "--input"):
inputDir = arg
elif opt in ("-o", "--output"):
outputDir = arg
print('Input directory is "' + inputDir + '"')
print('Ouput directory is "' + outputDir + '"')
if not os.path.isdir(inputDir):
print("input dir is not a directory")
help(2)
if not isdir(outputDir):
print("output dir is not a directory")
help(2)
print("extract")
extract(inputDir, outputDir, expression)
sys.exit(0)
if __name__ == "__main__":
main(sys.argv[1:])