from zipfile import ZipFile import os from os import listdir from os.path import getsize, isdir, isfile, join, dirname import sys import getopt import re import shutil from subprocess import call,check_call import time # S:\Working\ahr\Python36\python S:\Working\ahr\giru\plog-downloader\extractAppAsgLogsWith7z.py -e "*/performance*log*" -o S:\Working\ahr\giru\tmp\9323982100316752\extracted\vaprjrey01 -i S:\Working\ahr\giru\tmp\9323982100316752\logs\vaprjrey01 def extract(inputDir, outputDir, pattern): zips = listZips(inputDir) zips.sort() print("zips: %s" % zips); zipToOutputDir = mapZipToOutputDir(zips, outputDir) print("zipToOutputDir: %s" % zipToOutputDir); extractFiles(zipToOutputDir, pattern) def extractFiles(zipToOutputDir, pattern): for zipFile, outputDir in zipToOutputDir: # 7z e archive.zip -o outputdir *.xml *.dll cmd = ["C:\\Program Files\\7-Zip\\7z.exe", "e", zipFile, "-y", "-o%s"%outputDir, pattern] print(cmd) check_call(cmd) extractZippedLogs(outputDir) removeLogsCreatedAfterRolling(outputDir) #with ZipFile(zipFile, 'r') as myzip: # members = myzip.namelist() # filteredMembers = filterMembers(members, pattern) # filteredMembers = filterExisting(filteredMembers, outputDir, myzip) # filteredMembers.sort() # if len(filteredMembers) == 0: # continue # print("extracting ",filteredMembers, "-> " +outputDir) # myzip.extractall(outputDir, filteredMembers) # extractZippedLogs(filteredMembers, outputDir) # removeLogsCreatedAfterRolling(filteredMembers, outputDir) def extractZippedLogs(outputDir): from os import walk zips = [] for (dirpath, dirnames, filenames) in walk(outputDir): for name in filenames: if name.endswith(".zip"): zips.append(join(dirpath, name)) print ("extract2: %s" % zips) for zipFile in zips: extractFile(zipFile, outputDir) remove_file(zipFile) def extractFile(zipFile, outputDir): attempt = 0 while attempt < 10: if not os.path.isfile(zipFile): print("extractFile: %s is not a file" % zipFile) return try: attempt += 1 cmd = ["C:\\Program Files\\7-Zip\\7z.exe", "e", zipFile, "-y", "-o%s"% outputDir] print("attempt %d: %s" % (attempt,cmd)) check_call(cmd) return except: print("extraction failed") time.sleep(1) if attempt >= 10: print("aborted after 10 failed attempts") sys.exit(2) def removeLogsCreatedAfterRolling(outputDir): if not os.path.isdir(outputDir): return logfiles = [f for f in listdir(outputDir) if isfile(join(outputDir, f))] for member in logfiles: file = join(outputDir, member) print("check if %s has just been rolled" % file) if member.endswith('performance.log'): lastLine = readLastLine(file) print("lastLine: %s)" % lastLine) if re.match(".*\d\d\d\d-\d\d-\d\d 00:1.*", lastLine.decode('utf8')): attempt = 0 while os.path.isfile(file): attempt += 1 print("attempt %d removing %s, because it was just rolled" % (attempt, file)) try: os.remove(file) except: print("failed to delete") time.sleep(1) def readLastLine(file): with open(file, 'rb') as f: for line in f: f.seek(-2, os.SEEK_END) # Jump to the second last byte. while f.read(1) != b"\n": # Until EOL is found... #print f.tell() if f.tell() <= 2: break; f.seek(-2, os.SEEK_CUR) # ...jump back the read byte plus one more. last = f.readline() # Read last line. #print "%s -> %s" % (file, last) return last def filterExisting(filteredMembers, outputDir, myzip): result = [] for member in filteredMembers: file = join(outputDir, member) if not isfile(file): result.append(member) continue zipInfo = myzip.getinfo(member) zipFileSize = zipInfo.file_size fileSize = getsize(file) if zipFileSize != fileSize: result.append(member) else: print("skip: " + member + " (file exists and has the same size)") return result def filterMembers(members, pattern): result = [] for member in members: if pattern.match(member): result.append(member) return result def mapZipToOutputDir(zips, outputDir): result = [] for zipFileName in zips: date = extractDate(zipFileName) instanceId = extractInstanceId(zipFileName) if date and instanceId: # why did I have a version with date AND instanceId ??? # result.append((zipFileName, join(outputDir, date, instanceId))) result.append((zipFileName, join(outputDir, instanceId))) return result def extractInstanceId(zipFileName): # pre 5.14 pattern: axcng-service_i-0ccd20213cffb9fc3_001201.zip # post 5.14 pattern: axcng-service_i-09c26757fd0b61c12_172_19_113_219_VAPFINRA01AA001_2018-04-14_090701.zip pattern = re.compile(".*axcng-.*_i-([a-zA-Z0-9]+)_.*.zip") match = pattern.match(zipFileName) if match: instanceId = match.group(1) else: instanceId = False return instanceId def extractDate(zipFileName): # axcng-service_i-0376ad122c7fa2bbc_172_28_1_153_VADTRANS01AA001_2018-08-17_095022.zip #pattern = re.compile(".*(/|\\\\)([0-9]{4}-[0-9]{2}-[0-9]{2})(/|\\\\).*zip") pattern = re.compile(".*_([0-9]{4}-[0-9]{2}-[0-9]{2})_[0-9]{6}\.zip") match = pattern.match(zipFileName) if match: date = match.group(1) else: print("no date" , zipFileName) date = False return date def listZips(directory): zips = [] for root, dirs, files in os.walk(directory): for name in files: print(join(root, name)) zips.append(join(root,name)) return zips def remove_file(file): if os.path.isfile(file): os.remove(file) def help(returnValue): print(sys.argv[0] + ' -i -o [-e ]') print('Examples: ') print(sys.argv[0] + ' -e ".*/performance.log.*" -i logs\\vapaccen01 -o extracted\\vapaccen01') print(sys.argv[0] + ' -e ".*/performance.log.*" -i logs\\vapaccen01 -o d:\\ws\\pdb\\logs\\vapaccen01') sys.exit(returnValue) def main(argv): expression = '.*' inputDir = '' outputDir = '' try: opts, args = getopt.getopt(argv,"he:i:o:",["expression", "input=", "output="]) except getopt.GetoptError: help(2) for opt, arg in opts: if opt == '-h': help(0) elif opt in ("-e", "--expression"): expression = arg elif opt in ("-i", "--input"): inputDir = arg elif opt in ("-o", "--output"): outputDir = arg print('Input directory is "' + inputDir + '"') print('Ouput directory is "' + outputDir + '"') if not os.path.isdir(inputDir): print("input dir is not a directory") help(2) if not isdir(outputDir): print("output dir is not a directory") help(2) print("extract") extract(inputDir, outputDir, expression) sys.exit(0) if __name__ == "__main__": main(sys.argv[1:])