import smtplib
import mimetypes
from email.Encoders import encode_base64
from email.MIMEAudio import MIMEAudio
from email.MIMEBase import MIMEBase
from email.MIMEImage import MIMEImage
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText
import ftplib 
import os, sys
import time 
import threading
import random 
import urllib2
import Queue
import subprocess
import datetime
import tail 
import cPickle


thispath = str(os.path.abspath(__file__))
thispath = thispath[0:thispath.rindex("/")]
slink = "http://209.52.60.182:82/portal/recordflyerview.py?id=%s&flyerid=%s&zonename=%s&storenum=%s&banner=%s&time=%s"
logpath = '/var/log/apache2/'
monthd = {'JAN':1,'FEB':2,'MAR':3,'APR':4,'MAY':5,'JUN':6,'JUL':7,'AUG':8,'SEP':9,'OCT':10,'NOV':11,'DEC':12}
goodlinks = [] 
dtx0 = None

def processLine(s):
	global monthd, goodlinks, dtx0
	found=False
	for g in goodlinks:
		if s.find(g):
			found=True
			break
	if found==False:
		print "not in goodlinks"
		return {"OK":False}
	x = s.find("?flyerreclinkid=")
	if x>-1:
		y = s.find("[")
		z = s.find("]")
		ds = s[y+1:z]
		dp = ds.split("/")
		dy = dp[0]
		dm = dp[1].upper().strip()
		mo = monthd[dm]
		yr = dp[2]
		yr = yr[0:yr.find(":")]
		tm = dp[2]
		tm = tm[tm.find(":")+1:]
		tm = tm[0:tm.find("-")]
		tm = tm.strip()
		tmc = tm.split(":")
		dtn = datetime.datetime.now()
		dtn_secs = (dtn - dtn.fromtimestamp(0)).total_seconds()
		t_secs = time.time()
		t_diff = dtn_secs - dtn_secs
		dtx = datetime.datetime(int(yr),mo,int(dy),int(tmc[0]),int(tmc[1]),int(tmc[2]))
		print "dtx=",str(dtx)
		print "dtx0=",str(dtx0)
		if dtx0!=None:
			td = dtx - dtx0
			print "td=",str(td)
			if td.days<0:
				print "old line, dtx=",str(dtx)
				return {"OK":False}
		tx = (dtx - dtx.fromtimestamp(0)).total_seconds()
		tx = tx - t_diff
		sp = s[x+1:]
		sl = sp.split("&")
		d = {'OK':True,'TIMESTAMP':str(tx),'DTX':dtx}
		for sa in sl:
			se = sa.split("=")
			if len(se)==2:
				k = se[0].strip()
				k = k.replace("!%20","")  # for some reason, sometimes !%20 is inserted in the log file on the GET parameter.
				d[k]=se[1].strip()
		return d
	print "flyerreclinkid not found"
	return {"OK":False}
		

def processLog(banner,blogs):
	global logpath , dtx0
	print "processLog, banner=",banner
	blogs.sort()
	blogs.reverse() # the logs must be processed in reverse order so that older logs are processed before newer ones.
	print "blogs reversed"
	logfilesavailable = os.listdir(logpath)
	flyerreclinkidsprocessed=[]
	okcount=0
	okemails=[]
	errcount=0
	for blog in blogs:
		print "blog=",blog
		if not (blog in logfilesavailable):
			print "blog not available"
			continue
		print "loading pickle " + thispath + '/flyersclickcapture_' + banner + '_lastdtx.pkl'
		try:
			dtx0 = cPickle.load(open(thispath + '/flyersclickcapture_' + banner + '_lastdtx.pkl','rb'))
		except:
			print "unable to load pickle" 
			continue
		linecount=0
		serrs = ""
		dtx=None
		lpath = logpath + blog
		print "loading log file " + lpath
		with open(lpath) as f:
			for sa in f:
				linecount = linecount + 1
				sa = sa.strip()
				print lpath + ":linecount=", linecount , " : " + sa
				if sa=="":
					continue
				d = processLine(sa)
				if d['OK']==False:
					continue
				if d['flyerreclinkid'] in flyerreclinkidsprocessed:
					continue
				flyerreclinkidsprocessed.append(d['flyerreclinkid'])
				print "processLog, d=",d
				dtx = d['DTX']
				try:
					sout = slink % (d['flyerreclinkid'],d['l'],d['z'],d['sn'],d['b'],d['TIMESTAMP'])
				except:
					sr = '\nSA='+sa+'\nD='+str(d)+'\n'
					f = open(thispath + '/processlog_'+banner+'.err','a')
					f.write(sr)
					f.close()
					serrs = serrs + sr
					errcount=errcount+1
					continue
				ccount=1
				while ccount<10:
					sok=False
					try:
						print "connecting to trx server, count=",ccount
						su = urllib2.urlopen(sout)
						sok=True
					except:
						print "failed connecting: ", sout
						print "processLog, called to slink, sout=",sout
					if sok==True:
						sresp = su.read()
						print "sresp=",sresp
						if (sresp!="OK" and sresp!="FAIL"):
							okemails.append(sresp + " , ltv = " + str(dtx) + " , " + d['TIMESTAMP'])
							okcount=okcount+1						
							break
						if (sresp=="OK"):
							break
					time.sleep(1)
					ccount = ccount + 1
					
			cPickle.dump(dtx,open('flyersclickcapture_' + banner + '_lastdtx.pkl','wb'))
	print "Sending email"
	server = smtplib.SMTP('webmail.buy-low.com',port=587)
	server.login('systems@buy-low.com','jamesbond')
	smsg = 'Flyersclickcapture2 cron status.\nBanner=' + banner + '\nnum OK=' + str(okcount) + '\nnum Err=' + str(errcount) + '\n'
	if errcount>0:
		print "errcount > 0"
		smsg = smsg + '\nError Details:' + serrs 
	smsg = smsg + "EMAILS:\n"
	for em in okemails:
		print em
		smsg = smsg + em + "\n"
	smsg = smsg + 'End of list.'
	msg = MIMEText(smsg)
	msg['Subject']='Flyersclickcapture2 cron report, ' + banner + ', status=' + str(okcount) + "/" + str(errcount)
	msg['From']='systems@buy-low.com'
	tos=['clark_wong@buy-low.com','carlos_choy@buy-low.com']			
	for toaddr in tos:
		msg['To']=toaddr
		server.sendmail('systems@buy-low.com',toaddr,msg.as_string())
		print "sent email to ",toaddr
	print "Done with banner=",banner
	time.sleep(30)
	
	
 
 
 
 
		
		
	
def main():
	global thispath, slink, goodlinks
	f = open('/home/flyers/goodlinks.txt','r')
	s = f.read()
	f.close()
	sl = s.split("\n")
	for s in sl:
		s = s.strip()
		if s=="":
			continue
		goodlinks.append(s)
	print "read goolinks file, number of lines=",str(len(goodlinks))
	f = open(thispath + '/flyersclickcapture2.cfg','r')
	s = f.read()
	f.close()
	sl = s.split("\n")
	#bannerlogs = {'AG':['agfoods.access_log','agfoods.access_log.1'],'N':['nesters.access_log','nesters.access_log.1'],'BL':['buy-low.access_log','buy-low.access_log.1']}
	bannerlogs={}
	for sa in sl:
		sa = sa.strip()
		if sa=="" or sa[0]=="#":
			continue
		se = sa.split("=")
		k = se[0].strip()
		sec = se[1].split(",")
		fls=[]
		for fl in sec:
			fl = fl.strip()
			if fl=="":
				continue
			fls.append(fl)
		bannerlogs[k]=fls
	for b in bannerlogs.keys():
		"""note: previous versions of this program launched processLog in a thread so that all banners
		would be done at once, however, this seems to cause unexpected processing errors with variables
		clobbering each other.  This should not be happening but there is no need to investigate further
		because it should be able to get through the logs in series within a reasonable time period anyway."""
		processLog(b,bannerlogs[b])
	print "Done everything."
	 
 
	 
	
if __name__ == "__main__":
	main()	
