Rev 15825 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
from elixir import sessionfrom shop2020.config.client.ConfigClient import ConfigClientfrom sqlalchemy.sql import ascfrom sqlalchemy.sql.expression import or_from shop2020.utils.daemon import Daemonimport optparseimport sysimport mechanizeimport timefrom shop2020.model.v1.catalog.impl import DataServicefrom shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequestimport gcfrom operator import itemgetterconfig_client = ConfigClient()host = config_client.get_property('staging_hostname')DataService.initialize(db_hostname=host)headers = {'User-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11','Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8','Accept-Language' : 'en-US,en;q=0.8','Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'}class CompetitorScraping(Daemon):def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)def run(self):start()def start():try:while True:requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()if requests ==[] or requests is None:print "No new request to process, sleeeeeeping....."close_session()collected = gc.collect()print locals()print globals()print "Garbage collector: collected %d objects." % (collected)time.sleep(600)for request in requests:fetchDetails(request)request.isProcessed = Truesession.commit()sendMail(request)close_session()collected = gc.collect()print "Garbage collector: collected %d objects." % (collected)print locals()print globals()requests = []except Exception as e:print esys.exit(2)def fetchDetails(request):import threadingitems = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()print itemssnapdeal, flipkart, amazon =[],[],[]for item in items:if item.snapdealScraping:snapdeal.append(item)if item.flipkartScraping:flipkart.append(item)if item.amazonScraping:amazon.append(item)threads = []t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))t1.daemon = Truet1.start()t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))t2.daemon = Truet2.start()t3 = threading.Thread(target=scrapAmazon, args = (amazon,))t3.daemon = Truet3.start()threads.append(t1)threads.append(t2)threads.append(t3)for th in threads:th.join()br,t1,t2,t3 =None,None,None,Noneitems[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]def scrapSnapdeal(snapdealItems):import simplejson as jsonimport urllib2from shop2020.model.v1.catalog.impl.DataService import SnapdealItemfor snapdealItem in snapdealItems:sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)if sdItem is None:continuetry:url="http://www.snapdeal.com/acors/json/v2/gvbps?supc=%s&catUrl=&bn=&catId=175&start=0&count=10000"%(sdItem.supc)print urltime.sleep(1)req = urllib2.Request(url,headers=headers)response = urllib2.urlopen(req)vendorInfo = json.load(response)response.close()print vendorInfolowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice = (0,)*8lowestSellerName = ''sortedVendorsData = sorted(vendorInfo['vendors'], key=itemgetter('sellingPrice'))for vendor in sortedVendorsData:if iterator == 0:lowestSellerName = vendor['vendorDisplayName'].encode('utf-8')try:lowestSp = vendor['sellingPriceBefIntCashBack']except:lowestSp = vendor['sellingPrice']lowestOfferPrice = vendor['sellingPrice']lowestSellerInventory = vendor['buyableInventory']if vendor['vendorDisplayName'] == 'MobilesnMore':ourInventory = vendor['buyableInventory']try:ourSp = vendor['sellingPriceBefIntCashBack']except:ourSp = vendor['sellingPrice']ourOfferPrice = vendor['sellingPrice']iterator+=1except Exception as e:import tracebackprint traceback.print_exc()continuefinally:sdItem =Noneprint ourSpprint ourOfferPriceprint ourInventorysnapdealItem.ourSnapdealPrice = ourSpsnapdealItem.ourSnapdealOfferPrice = ourOfferPricesnapdealItem.ourSnapdealInventory = ourInventorysnapdealItem.lowestSnapdealPrice = lowestSpsnapdealItem.lowestSnapdealOfferPrice = lowestOfferPricesnapdealItem.lowestSnapdealSeller = lowestSellerNamesnapdealItem.lowestSnapdealSellerInventory = lowestSellerInventorysnapdealItems[:]=[]def scrapFlipkart(flipkartItems):from shop2020.model.v1.catalog.script import FlipkartProductPageParserfrom operator import itemgetterimport requests as httpRequestfrom shop2020.model.v1.catalog.impl.DataService import FlipkartItemscraperFk = FlipkartProductPageParser.FlipkartProductPageScraper()for flipkartItem in flipkartItems:fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)if fkItem is None:continuetry:vendorsData = scraperFk.read(fkItem.flipkartSerialNumber.strip())lowestSellerName = vendorsData['cheapestSeller'].encode('utf-8')lowestSellerSp = vendorsData['lowestSellingPrice']ourSp = vendorsData['saholicSellingPrice']except:continuefinally:fkItem=Nonetry:request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))print "Inventory info",r.json()stock_count = int((r.json()['attributeValues'])['stock_count'])except:stock_count = 0finally:r={}flipkartItem.ourFlipkartPrice = ourSpflipkartItem.ourFlipkartInventory = stock_countflipkartItem.lowestFlipkartPrice = lowestSellerSpflipkartItem.lowestFlipkartSeller = lowestSellerNamescraperFk = NoneflipkartItems[:] =[]def close_session():if session.is_active:print "session is active. closing it."session.close()def scrapAmazon(amazonItems):from shop2020.model.v1.catalog.script import AmazonScraperfrom shop2020.model.v1.catalog.impl.DataService import Amazonlistedprint "Inside amazonitems ",amazonItemsprint "len amazon items ",len(amazonItems)time.sleep(5)amScraper = AmazonScraper.AmazonScraper()for amazonItem in amazonItems:amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)if amazon_d_item is None:continueif len(amazon_d_item.asin)==0:print "No asin found for ",amazonItem.item_idcontinuesaholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())generalUrl = "http://www.amazon.in/gp/offer-listing/%s"%(amazon_d_item.asin.strip())try:saholicPrice = (amScraper.read(saholicUrl, False))[0]except:saholicPrice = 0.0try:cheapestSeller = (amScraper.read(generalUrl, True))cheapestSellerPrice = cheapestSeller[0]cheapestSellerName = cheapestSeller[1].encode('utf-8')except:cheapestSellerPrice = 0.0cheapestSellerName = ""amazonItem.ourAmazonPrice = saholicPriceamazonItem.lowestAmazonPrice = cheapestSellerPriceamazonItem.lowestAmazonSeller = cheapestSellerNameamazonItems[:] =[]def sendMail(request):import smtplibfrom email.mime.text import MIMETextfrom email.mime.multipart import MIMEMultipartmailServer = smtplib.SMTP("smtp.gmail.com", 587)mailServer.ehlo()mailServer.starttls()mailServer.ehlo()recipients = []recipients.append(request.user)message = "Your Request has been processed.Visit dashboard to check & download report"msg = MIMEMultipart()msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)msg['From'] = ""msg['To'] = ",".join(recipients)msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)html_msg = MIMEText(message, 'html')msg.attach(html_msg)try:mailServer.login("build@shop2020.in", "cafe@nes")#mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())mailServer.sendmail("cafe@nes", recipients, msg.as_string())except Exception as e:print eprint "Unable to send mail.Lets try with local SMTP."smtpServer = smtplib.SMTP('localhost')smtpServer.set_debuglevel(1)sender = 'build@shop2020.in'try:smtpServer.sendmail(sender, recipients, msg.as_string())print "Successfully sent email"except:print "Error: unable to send email."if __name__ == "__main__":parser = optparse.OptionParser()parser.add_option("-l", "--logfile", dest="logfile",type="string",help="Log all output to LOG_FILE",)parser.add_option("-i", "--pidfile", dest="pidfile",type="string",help="Write the PID to pidfile")(options, args) = parser.parse_args()daemon = CompetitorScraping(options.logfile, options.pidfile)if len(args) == 0:daemon.run()elif len(args) == 1:if 'start' == args[0]:daemon.start()elif 'stop' == args[0]:daemon.stop()elif 'restart' == args[0]:daemon.restart()else:print "Unknown command"sys.exit(2)sys.exit(0)else:print "usage: %s start|stop|restart" % sys.argv[0]sys.exit(2)