Files
perfdb/plogdownloader/download.py

136 lines
4.5 KiB
Python

import re
import gzip
import os
import sys
import getopt
from os.path import getsize, isdir, isfile, join, dirname, abspath, exists
from multiprocessing import Pool, cpu_count
import time
import shutil
from datetime import datetime
from datetime import timedelta
from random import random
from subprocess import call,check_call
import time
def download(fromDate, to_date, pod_list, outputFile, tmpDir):
dates = getDateRange(fromDate, to_date)
print(dates)
pods = pod_list.replace(' ','').split(',')
tmpExtractDir = join(tmpDir, "extracted")
os.makedirs(tmpExtractDir)
for pod in pods:
for date in dates:
tmpLogDir = join(tmpDir, 'logs', pod);
os.makedirs(tmpLogDir, exist_ok=True)
tmpExtractedPodDir = join(tmpExtractDir, pod)
os.makedirs(tmpExtractedPodDir, exist_ok=True)
cmd = ["aws", "s3", "cp", "s3://recommind-logs/"+pod.upper()+"/log/app_asg/"+date, tmpLogDir+"/"+date, "--recursive", "--exclude", "*", "--include", "axcng-service*"]
print(cmd)
check_call(cmd)
#cmd = [sys.executable, "extractAppAsgLogs.py", "-e", ".*/performance.*log.*", "-i", tmpLogDir, "-o", tmpExtractedPodDir]
cmd = [sys.executable, "extractAppAsgLogsWith7z.py", "-e", "*/performance*log*", "-i", tmpLogDir, "-o", tmpExtractedPodDir]
print(cmd)
check_call(cmd)
shutil.rmtree(tmpLogDir, ignore_errors=True)
# zip the performance logs
#shutil.make_archive(outputFile, 'zip', tmpExtractDir)
remove_file(outputFile)
print("sleep 20 seconds to give the file share time to update directory entries")
time.sleep(20)
cmd = ["C:\\Program Files\\7-Zip\\7z.exe", "a", "-tzip", outputFile, join(tmpExtractDir, "*")]
print(cmd)
check_call(cmd)
shutil.rmtree(tmpDir, ignore_errors=True)
def remove_file(file):
if os.path.isfile(file):
os.remove(file)
def getDateRange(from_date, to_date):
pattern = "%Y-%m-%d"
from_datetime = datetime.strptime(from_date, pattern)
to_datetime = datetime.strptime(to_date, pattern)
print(from_datetime)
print(to_datetime)
if from_datetime > to_datetime:
raise Exception("invalid date range %s - %s" % (from_datetime, to_datetime))
result = []
current = from_datetime
while current <= to_datetime:
result.append(datetime.strftime(current, pattern))
current += timedelta(days=1)
return result
def help(returnValue):
print(sys.argv[0] + ' --from <yyyy-mm-dd> --to <yyyy-mm-dd> --output <directory> --pods <comma separated pod names>')
print('Examples: ')
print(sys.argv[0] + ' --from 2018-01-01 --to 2018-01-31 --output /path/to/outputdir --pods vapondem01')
sys.exit(returnValue)
def main(argv):
tmpBaseDir = 'tmp123'
outputDir = 'out';
pods = 'vapbrewe01,vapglask01,vapjazzp01,vapnorto01,vapondem01,vapondem02,vapondem03,vapondem04'
from_date = ''
to_date = ''
try:
print ("args: %s" % (argv))
opts, args = getopt.getopt(argv,"f:ho:p:t:x:",["from=", "help","output=", "pods=", "tmpdir=", "to=",])
except getopt.GetoptError:
help(2)
print ("opts: %s" % opts)
for opt, arg in opts:
print ("-- %s, %s" % (opt, arg))
if opt in ("-h", "--help"):
help(0)
elif opt in ("-f", "--from"):
from_date = arg
elif opt in ("-o", "--output"):
outputDir = arg
elif opt in ("-p", "--pods"):
pods = arg
elif opt in ("-x", "--tmpdir"):
tmpBaseDir = arg
elif opt in ("-t", "--to"):
to_date = arg
outputFile = join(outputDir, "logs_"+from_date+"_"+to_date+"_"+pods.replace(',', '_'))
tmpDir = join(tmpBaseDir, str(random())[2:])
print('tmpBaseDir directory is "' + tmpBaseDir + '"')
print('tmpDir directory is "' + tmpDir + '"')
print('Ouput file is "' + outputFile + '"')
if not os.path.isdir(tmpBaseDir):
print("tmpBaseDir is not a directory")
help(2)
if not os.path.isdir(dirname(abspath(outputFile))):
print("'%s' is not a directory" % (dirname(abspath(outputFile))))
help(2)
if os.path.exists(abspath(outputFile)):
print("'%s' already exists" % (abspath(outputFile)))
help(2)
start = time.time()
download(from_date, to_date, pods, outputFile, tmpDir)
end = time.time()
print("duration %s seconds" % (end - start))
if __name__ == "__main__":
main(sys.argv[1:])