import re import gzip import os import sys import getopt from os.path import getsize, isdir, isfile, join, dirname, abspath, exists from multiprocessing import Pool, cpu_count import time import shutil from datetime import datetime from datetime import timedelta from random import random from subprocess import call,check_call import time def download(fromDate, to_date, pod_list, outputFile, tmpDir): dates = getDateRange(fromDate, to_date) print(dates) pods = pod_list.replace(' ','').split(',') tmpExtractDir = join(tmpDir, "extracted") os.makedirs(tmpExtractDir) for pod in pods: for date in dates: tmpLogDir = join(tmpDir, 'logs', pod); os.makedirs(tmpLogDir, exist_ok=True) tmpExtractedPodDir = join(tmpExtractDir, pod) os.makedirs(tmpExtractedPodDir, exist_ok=True) cmd = ["aws", "s3", "cp", "s3://recommind-logs/"+pod.upper()+"/log/app_asg/"+date, tmpLogDir+"/"+date, "--recursive", "--exclude", "*", "--include", "axcng-service*"] print(cmd) check_call(cmd) #cmd = [sys.executable, "extractAppAsgLogs.py", "-e", ".*/performance.*log.*", "-i", tmpLogDir, "-o", tmpExtractedPodDir] cmd = [sys.executable, "extractAppAsgLogsWith7z.py", "-e", "*/performance*log*", "-i", tmpLogDir, "-o", tmpExtractedPodDir] print(cmd) check_call(cmd) shutil.rmtree(tmpLogDir, ignore_errors=True) # zip the performance logs #shutil.make_archive(outputFile, 'zip', tmpExtractDir) remove_file(outputFile) print("sleep 20 seconds to give the file share time to update directory entries") time.sleep(20) cmd = ["C:\\Program Files\\7-Zip\\7z.exe", "a", "-tzip", outputFile, join(tmpExtractDir, "*")] print(cmd) check_call(cmd) shutil.rmtree(tmpDir, ignore_errors=True) def remove_file(file): if os.path.isfile(file): os.remove(file) def getDateRange(from_date, to_date): pattern = "%Y-%m-%d" from_datetime = datetime.strptime(from_date, pattern) to_datetime = datetime.strptime(to_date, pattern) print(from_datetime) print(to_datetime) if from_datetime > to_datetime: raise Exception("invalid date range %s - %s" % (from_datetime, to_datetime)) result = [] current = from_datetime while current <= to_datetime: result.append(datetime.strftime(current, pattern)) current += timedelta(days=1) return result def help(returnValue): print(sys.argv[0] + ' --from --to --output --pods ') print('Examples: ') print(sys.argv[0] + ' --from 2018-01-01 --to 2018-01-31 --output /path/to/outputdir --pods vapondem01') sys.exit(returnValue) def main(argv): tmpBaseDir = 'tmp123' outputDir = 'out'; pods = 'vapbrewe01,vapglask01,vapjazzp01,vapnorto01,vapondem01,vapondem02,vapondem03,vapondem04' from_date = '' to_date = '' try: print ("args: %s" % (argv)) opts, args = getopt.getopt(argv,"f:ho:p:t:x:",["from=", "help","output=", "pods=", "tmpdir=", "to=",]) except getopt.GetoptError: help(2) print ("opts: %s" % opts) for opt, arg in opts: print ("-- %s, %s" % (opt, arg)) if opt in ("-h", "--help"): help(0) elif opt in ("-f", "--from"): from_date = arg elif opt in ("-o", "--output"): outputDir = arg elif opt in ("-p", "--pods"): pods = arg elif opt in ("-x", "--tmpdir"): tmpBaseDir = arg elif opt in ("-t", "--to"): to_date = arg outputFile = join(outputDir, "logs_"+from_date+"_"+to_date+"_"+pods.replace(',', '_')) tmpDir = join(tmpBaseDir, str(random())[2:]) print('tmpBaseDir directory is "' + tmpBaseDir + '"') print('tmpDir directory is "' + tmpDir + '"') print('Ouput file is "' + outputFile + '"') if not os.path.isdir(tmpBaseDir): print("tmpBaseDir is not a directory") help(2) if not os.path.isdir(dirname(abspath(outputFile))): print("'%s' is not a directory" % (dirname(abspath(outputFile)))) help(2) if os.path.exists(abspath(outputFile)): print("'%s' already exists" % (abspath(outputFile))) help(2) start = time.time() download(from_date, to_date, pods, outputFile, tmpDir) end = time.time() print("duration %s seconds" % (end - start)) if __name__ == "__main__": main(sys.argv[1:])