add plog downloader scripts

This commit is contained in:
2020-11-24 10:00:24 +01:00
parent 1e59271286
commit 7ad1e95825
7 changed files with 804 additions and 0 deletions

View File

@@ -0,0 +1,225 @@
from zipfile import ZipFile
import os
from os import listdir
from os.path import getsize, isdir, isfile, join, dirname
import sys
import getopt
import re
import shutil
from subprocess import call,check_call
import time
# S:\Working\ahr\Python36\python S:\Working\ahr\giru\plog-downloader\extractAppAsgLogsWith7z.py -e "*/performance*log*" -o S:\Working\ahr\giru\tmp\9323982100316752\extracted\vaprjrey01 -i S:\Working\ahr\giru\tmp\9323982100316752\logs\vaprjrey01
def extract(inputDir, outputDir, pattern):
zips = listZips(inputDir)
zips.sort()
print("zips: %s" % zips);
zipToOutputDir = mapZipToOutputDir(zips, outputDir)
print("zipToOutputDir: %s" % zipToOutputDir);
extractFiles(zipToOutputDir, pattern)
def extractFiles(zipToOutputDir, pattern):
for zipFile, outputDir in zipToOutputDir:
# 7z e archive.zip -o outputdir *.xml *.dll
cmd = ["C:\\Program Files\\7-Zip\\7z.exe", "e", zipFile, "-y", "-o%s"%outputDir, pattern]
print(cmd)
check_call(cmd)
extractZippedLogs(outputDir)
removeLogsCreatedAfterRolling(outputDir)
#with ZipFile(zipFile, 'r') as myzip:
# members = myzip.namelist()
# filteredMembers = filterMembers(members, pattern)
# filteredMembers = filterExisting(filteredMembers, outputDir, myzip)
# filteredMembers.sort()
# if len(filteredMembers) == 0:
# continue
# print("extracting ",filteredMembers, "-> " +outputDir)
# myzip.extractall(outputDir, filteredMembers)
# extractZippedLogs(filteredMembers, outputDir)
# removeLogsCreatedAfterRolling(filteredMembers, outputDir)
def extractZippedLogs(outputDir):
from os import walk
zips = []
for (dirpath, dirnames, filenames) in walk(outputDir):
for name in filenames:
if name.endswith(".zip"):
zips.append(join(dirpath, name))
print ("extract2: %s" % zips)
for zipFile in zips:
extractFile(zipFile, outputDir)
remove_file(zipFile)
def extractFile(zipFile, outputDir):
attempt = 0
while attempt < 10:
if not os.path.isfile(zipFile):
print("extractFile: %s is not a file" % zipFile)
return
try:
attempt += 1
cmd = ["C:\\Program Files\\7-Zip\\7z.exe", "e", zipFile, "-y", "-o%s"% outputDir]
print("attempt %d: %s" % (attempt,cmd))
check_call(cmd)
return
except:
print("extraction failed")
time.sleep(1)
if attempt >= 10:
print("aborted after 10 failed attempts")
sys.exit(2)
def removeLogsCreatedAfterRolling(outputDir):
if not os.path.isdir(outputDir):
return
logfiles = [f for f in listdir(outputDir) if isfile(join(outputDir, f))]
for member in logfiles:
file = join(outputDir, member)
print("check if %s has just been rolled" % file)
if member.endswith('performance.log'):
lastLine = readLastLine(file)
print("lastLine: %s)" % lastLine)
if re.match(".*\d\d\d\d-\d\d-\d\d 00:1.*", lastLine.decode('utf8')):
attempt = 0
while os.path.isfile(file):
attempt += 1
print("attempt %d removing %s, because it was just rolled" % (attempt, file))
try:
os.remove(file)
except:
print("failed to delete")
time.sleep(1)
def readLastLine(file):
with open(file, 'rb') as f:
for line in f:
f.seek(-2, os.SEEK_END) # Jump to the second last byte.
while f.read(1) != b"\n": # Until EOL is found...
#print f.tell()
if f.tell() <= 2:
break;
f.seek(-2, os.SEEK_CUR) # ...jump back the read byte plus one more.
last = f.readline() # Read last line.
#print "%s -> %s" % (file, last)
return last
def filterExisting(filteredMembers, outputDir, myzip):
result = []
for member in filteredMembers:
file = join(outputDir, member)
if not isfile(file):
result.append(member)
continue
zipInfo = myzip.getinfo(member)
zipFileSize = zipInfo.file_size
fileSize = getsize(file)
if zipFileSize != fileSize:
result.append(member)
else:
print("skip: " + member + " (file exists and has the same size)")
return result
def filterMembers(members, pattern):
result = []
for member in members:
if pattern.match(member):
result.append(member)
return result
def mapZipToOutputDir(zips, outputDir):
result = []
for zipFileName in zips:
date = extractDate(zipFileName)
instanceId = extractInstanceId(zipFileName)
if date and instanceId:
# why did I have a version with date AND instanceId ???
# result.append((zipFileName, join(outputDir, date, instanceId)))
result.append((zipFileName, join(outputDir, instanceId)))
return result
def extractInstanceId(zipFileName):
# pre 5.14 pattern: axcng-service_i-0ccd20213cffb9fc3_001201.zip
# post 5.14 pattern: axcng-service_i-09c26757fd0b61c12_172_19_113_219_VAPFINRA01AA001_2018-04-14_090701.zip
pattern = re.compile(".*axcng-.*_i-([a-zA-Z0-9]+)_.*.zip")
match = pattern.match(zipFileName)
if match:
instanceId = match.group(1)
else:
instanceId = False
return instanceId
def extractDate(zipFileName):
# axcng-service_i-0376ad122c7fa2bbc_172_28_1_153_VADTRANS01AA001_2018-08-17_095022.zip
#pattern = re.compile(".*(/|\\\\)([0-9]{4}-[0-9]{2}-[0-9]{2})(/|\\\\).*zip")
pattern = re.compile(".*_([0-9]{4}-[0-9]{2}-[0-9]{2})_[0-9]{6}\.zip")
match = pattern.match(zipFileName)
if match:
date = match.group(1)
else:
print("no date" , zipFileName)
date = False
return date
def listZips(directory):
zips = []
for root, dirs, files in os.walk(directory):
for name in files:
print(join(root, name))
zips.append(join(root,name))
return zips
def remove_file(file):
if os.path.isfile(file):
os.remove(file)
def help(returnValue):
print(sys.argv[0] + ' -i <directory> -o <directory> [-e <expression>]')
print('Examples: ')
print(sys.argv[0] + ' -e ".*/performance.log.*" -i logs\\vapaccen01 -o extracted\\vapaccen01')
print(sys.argv[0] + ' -e ".*/performance.log.*" -i logs\\vapaccen01 -o d:\\ws\\pdb\\logs\\vapaccen01')
sys.exit(returnValue)
def main(argv):
expression = '.*'
inputDir = ''
outputDir = ''
try:
opts, args = getopt.getopt(argv,"he:i:o:",["expression", "input=", "output="])
except getopt.GetoptError:
help(2)
for opt, arg in opts:
if opt == '-h':
help(0)
elif opt in ("-e", "--expression"):
expression = arg
elif opt in ("-i", "--input"):
inputDir = arg
elif opt in ("-o", "--output"):
outputDir = arg
print('Input directory is "' + inputDir + '"')
print('Ouput directory is "' + outputDir + '"')
if not os.path.isdir(inputDir):
print("input dir is not a directory")
help(2)
if not isdir(outputDir):
print("output dir is not a directory")
help(2)
print("extract")
extract(inputDir, outputDir, expression)
sys.exit(0)
if __name__ == "__main__":
main(sys.argv[1:])