226 lines
7.8 KiB
Python
226 lines
7.8 KiB
Python
from zipfile import ZipFile
|
|
import os
|
|
from os import listdir
|
|
from os.path import getsize, isdir, isfile, join, dirname
|
|
import sys
|
|
import getopt
|
|
import re
|
|
import shutil
|
|
from subprocess import call,check_call
|
|
import time
|
|
|
|
# S:\Working\ahr\Python36\python S:\Working\ahr\giru\plog-downloader\extractAppAsgLogsWith7z.py -e "*/performance*log*" -o S:\Working\ahr\giru\tmp\9323982100316752\extracted\vaprjrey01 -i S:\Working\ahr\giru\tmp\9323982100316752\logs\vaprjrey01
|
|
|
|
def extract(inputDir, outputDir, pattern):
|
|
zips = listZips(inputDir)
|
|
zips.sort()
|
|
print("zips: %s" % zips);
|
|
zipToOutputDir = mapZipToOutputDir(zips, outputDir)
|
|
print("zipToOutputDir: %s" % zipToOutputDir);
|
|
extractFiles(zipToOutputDir, pattern)
|
|
|
|
|
|
def extractFiles(zipToOutputDir, pattern):
|
|
for zipFile, outputDir in zipToOutputDir:
|
|
# 7z e archive.zip -o outputdir *.xml *.dll
|
|
cmd = ["C:\\Program Files\\7-Zip\\7z.exe", "e", zipFile, "-y", "-o%s"%outputDir, pattern]
|
|
print(cmd)
|
|
check_call(cmd)
|
|
extractZippedLogs(outputDir)
|
|
removeLogsCreatedAfterRolling(outputDir)
|
|
#with ZipFile(zipFile, 'r') as myzip:
|
|
# members = myzip.namelist()
|
|
# filteredMembers = filterMembers(members, pattern)
|
|
# filteredMembers = filterExisting(filteredMembers, outputDir, myzip)
|
|
# filteredMembers.sort()
|
|
# if len(filteredMembers) == 0:
|
|
# continue
|
|
# print("extracting ",filteredMembers, "-> " +outputDir)
|
|
# myzip.extractall(outputDir, filteredMembers)
|
|
# extractZippedLogs(filteredMembers, outputDir)
|
|
# removeLogsCreatedAfterRolling(filteredMembers, outputDir)
|
|
|
|
def extractZippedLogs(outputDir):
|
|
|
|
from os import walk
|
|
zips = []
|
|
for (dirpath, dirnames, filenames) in walk(outputDir):
|
|
for name in filenames:
|
|
if name.endswith(".zip"):
|
|
zips.append(join(dirpath, name))
|
|
|
|
|
|
print ("extract2: %s" % zips)
|
|
for zipFile in zips:
|
|
extractFile(zipFile, outputDir)
|
|
remove_file(zipFile)
|
|
|
|
def extractFile(zipFile, outputDir):
|
|
attempt = 0
|
|
while attempt < 10:
|
|
if not os.path.isfile(zipFile):
|
|
print("extractFile: %s is not a file" % zipFile)
|
|
return
|
|
try:
|
|
attempt += 1
|
|
cmd = ["C:\\Program Files\\7-Zip\\7z.exe", "e", zipFile, "-y", "-o%s"% outputDir]
|
|
print("attempt %d: %s" % (attempt,cmd))
|
|
check_call(cmd)
|
|
return
|
|
except:
|
|
print("extraction failed")
|
|
time.sleep(1)
|
|
if attempt >= 10:
|
|
print("aborted after 10 failed attempts")
|
|
sys.exit(2)
|
|
|
|
def removeLogsCreatedAfterRolling(outputDir):
|
|
if not os.path.isdir(outputDir):
|
|
return
|
|
logfiles = [f for f in listdir(outputDir) if isfile(join(outputDir, f))]
|
|
for member in logfiles:
|
|
file = join(outputDir, member)
|
|
print("check if %s has just been rolled" % file)
|
|
if member.endswith('performance.log'):
|
|
lastLine = readLastLine(file)
|
|
print("lastLine: %s)" % lastLine)
|
|
if re.match(".*\d\d\d\d-\d\d-\d\d 00:1.*", lastLine.decode('utf8')):
|
|
attempt = 0
|
|
while os.path.isfile(file):
|
|
attempt += 1
|
|
print("attempt %d removing %s, because it was just rolled" % (attempt, file))
|
|
try:
|
|
os.remove(file)
|
|
except:
|
|
print("failed to delete")
|
|
time.sleep(1)
|
|
|
|
def readLastLine(file):
|
|
with open(file, 'rb') as f:
|
|
for line in f:
|
|
f.seek(-2, os.SEEK_END) # Jump to the second last byte.
|
|
while f.read(1) != b"\n": # Until EOL is found...
|
|
#print f.tell()
|
|
if f.tell() <= 2:
|
|
break;
|
|
f.seek(-2, os.SEEK_CUR) # ...jump back the read byte plus one more.
|
|
last = f.readline() # Read last line.
|
|
#print "%s -> %s" % (file, last)
|
|
return last
|
|
|
|
|
|
def filterExisting(filteredMembers, outputDir, myzip):
|
|
result = []
|
|
for member in filteredMembers:
|
|
file = join(outputDir, member)
|
|
if not isfile(file):
|
|
result.append(member)
|
|
continue
|
|
zipInfo = myzip.getinfo(member)
|
|
zipFileSize = zipInfo.file_size
|
|
fileSize = getsize(file)
|
|
if zipFileSize != fileSize:
|
|
result.append(member)
|
|
else:
|
|
print("skip: " + member + " (file exists and has the same size)")
|
|
return result
|
|
|
|
|
|
def filterMembers(members, pattern):
|
|
result = []
|
|
for member in members:
|
|
if pattern.match(member):
|
|
result.append(member)
|
|
return result
|
|
|
|
|
|
def mapZipToOutputDir(zips, outputDir):
|
|
result = []
|
|
|
|
for zipFileName in zips:
|
|
date = extractDate(zipFileName)
|
|
instanceId = extractInstanceId(zipFileName)
|
|
if date and instanceId:
|
|
# why did I have a version with date AND instanceId ???
|
|
# result.append((zipFileName, join(outputDir, date, instanceId)))
|
|
result.append((zipFileName, join(outputDir, instanceId)))
|
|
|
|
return result
|
|
|
|
def extractInstanceId(zipFileName):
|
|
# pre 5.14 pattern: axcng-service_i-0ccd20213cffb9fc3_001201.zip
|
|
# post 5.14 pattern: axcng-service_i-09c26757fd0b61c12_172_19_113_219_VAPFINRA01AA001_2018-04-14_090701.zip
|
|
pattern = re.compile(".*axcng-.*_i-([a-zA-Z0-9]+)_.*.zip")
|
|
match = pattern.match(zipFileName)
|
|
if match:
|
|
instanceId = match.group(1)
|
|
else:
|
|
instanceId = False
|
|
return instanceId
|
|
|
|
def extractDate(zipFileName):
|
|
# axcng-service_i-0376ad122c7fa2bbc_172_28_1_153_VADTRANS01AA001_2018-08-17_095022.zip
|
|
#pattern = re.compile(".*(/|\\\\)([0-9]{4}-[0-9]{2}-[0-9]{2})(/|\\\\).*zip")
|
|
pattern = re.compile(".*_([0-9]{4}-[0-9]{2}-[0-9]{2})_[0-9]{6}\.zip")
|
|
match = pattern.match(zipFileName)
|
|
if match:
|
|
date = match.group(1)
|
|
else:
|
|
print("no date" , zipFileName)
|
|
date = False
|
|
return date
|
|
|
|
def listZips(directory):
|
|
zips = []
|
|
for root, dirs, files in os.walk(directory):
|
|
for name in files:
|
|
print(join(root, name))
|
|
zips.append(join(root,name))
|
|
return zips
|
|
|
|
def remove_file(file):
|
|
if os.path.isfile(file):
|
|
os.remove(file)
|
|
|
|
|
|
|
|
def help(returnValue):
|
|
print(sys.argv[0] + ' -i <directory> -o <directory> [-e <expression>]')
|
|
print('Examples: ')
|
|
print(sys.argv[0] + ' -e ".*/performance.log.*" -i logs\\vapaccen01 -o extracted\\vapaccen01')
|
|
print(sys.argv[0] + ' -e ".*/performance.log.*" -i logs\\vapaccen01 -o d:\\ws\\pdb\\logs\\vapaccen01')
|
|
sys.exit(returnValue)
|
|
|
|
def main(argv):
|
|
expression = '.*'
|
|
inputDir = ''
|
|
outputDir = ''
|
|
try:
|
|
opts, args = getopt.getopt(argv,"he:i:o:",["expression", "input=", "output="])
|
|
except getopt.GetoptError:
|
|
help(2)
|
|
for opt, arg in opts:
|
|
if opt == '-h':
|
|
help(0)
|
|
elif opt in ("-e", "--expression"):
|
|
expression = arg
|
|
elif opt in ("-i", "--input"):
|
|
inputDir = arg
|
|
elif opt in ("-o", "--output"):
|
|
outputDir = arg
|
|
print('Input directory is "' + inputDir + '"')
|
|
print('Ouput directory is "' + outputDir + '"')
|
|
|
|
if not os.path.isdir(inputDir):
|
|
print("input dir is not a directory")
|
|
help(2)
|
|
if not isdir(outputDir):
|
|
print("output dir is not a directory")
|
|
help(2)
|
|
print("extract")
|
|
extract(inputDir, outputDir, expression)
|
|
sys.exit(0)
|
|
|
|
if __name__ == "__main__":
|
|
main(sys.argv[1:])
|