Rev 12314 | Rev 15492 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
from elixir import sessionfrom shop2020.config.client.ConfigClient import ConfigClientfrom sqlalchemy.sql import ascfrom sqlalchemy.sql.expression import or_from shop2020.utils.daemon import Daemonimport optparseimport sysimport mechanizeimport timefrom shop2020.model.v1.catalog.impl import DataServicefrom shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequestimport gcconfig_client = ConfigClient()host = config_client.get_property('staging_hostname')DataService.initialize(db_hostname=host)class CompetitorScraping(Daemon):def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)def run(self):start()def start():try:while True:requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()if requests ==[] or requests is None:print "No new request to process, sleeeeeeping....."close_session()collected = gc.collect()print locals()print globals()print "Garbage collector: collected %d objects." % (collected)time.sleep(600)for request in requests:fetchDetails(request)request.isProcessed = Truesession.commit()sendMail(request)close_session()collected = gc.collect()print "Garbage collector: collected %d objects." % (collected)print locals()print globals()requests = []except Exception as e:print esys.exit(2)def fetchDetails(request):import threadinglogin_url = "https://sellercentral.amazon.in/gp/homepage.html"br = login(login_url)items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()print itemssnapdeal, flipkart, amazon =[],[],[]for item in items:if item.snapdealScraping:snapdeal.append(item)if item.flipkartScraping:flipkart.append(item)if item.amazonScraping:amazon.append(item)threads = []t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))t1.daemon = Truet1.start()t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))t2.daemon = Truet2.start()t3 = threading.Thread(target=scrapAmazon, args = (amazon,br))t3.daemon = Truet3.start()threads.append(t1)threads.append(t2)threads.append(t3)for th in threads:th.join()br,t1,t2,t3 =None,None,None,Noneitems[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]def scrapSnapdeal(snapdealItems):import simplejson as jsonimport urllib2from shop2020.model.v1.catalog.impl.DataService import SnapdealItemfor snapdealItem in snapdealItems:sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)if sdItem is None:continuetry:url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=91&sort=sellingPrice"%(sdItem.supc)print urltime.sleep(1)req = urllib2.Request(url)response = urllib2.urlopen(req)json_input = response.read()vendorInfo = json.loads(json_input)lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice = (0,)*8lowestSellerName = ''for vendor in vendorInfo:if iterator == 0:lowestSellerName = vendor['vendorDisplayName']try:lowestSp = vendor['sellingPriceBefIntCashBack']except:lowestSp = vendor['sellingPrice']lowestOfferPrice = vendor['sellingPrice']lowestSellerInventory = vendor['buyableInventory']if vendor['vendorDisplayName'] == 'MobilesnMore':ourInventory = vendor['buyableInventory']try:ourSp = vendor['sellingPriceBefIntCashBack']except:ourSp = vendor['sellingPrice']ourOfferPrice = vendor['sellingPrice']iterator+=1except:continuefinally:sdItem =NonesnapdealItem.ourSnapdealPrice = ourSpsnapdealItem.ourSnapdealOfferPrice = ourOfferPricesnapdealItem.ourSnapdealInventory = ourInventorysnapdealItem.lowestSnapdealPrice = lowestSpsnapdealItem.lowestSnapdealOfferPrice = lowestOfferPricesnapdealItem.lowestSnapdealSeller = lowestSellerNamesnapdealItem.lowestSnapdealSellerInventory = lowestSellerInventorysnapdealItems[:]=[]def scrapFlipkart(flipkartItems):from shop2020.model.v1.catalog.script import FlipkartScraperfrom operator import itemgetterimport requests as httpRequestfrom shop2020.model.v1.catalog.impl.DataService import FlipkartItemscraperFk = FlipkartScraper.FlipkartScraper()for flipkartItem in flipkartItems:fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)if fkItem is None:continuetry:url = "http://www.flipkart.com/ps/%s"%(fkItem.flipkartSerialNumber)vendorsData = scraperFk.read(url)sortedVendorsData = []sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))lowestSellerSp, iterator, ourSp = (0,)*3lowestSellerName = ''for data in sortedVendorsData:if iterator == 0:lowestSellerName = data['sellerName']lowestSellerSp = data['sellingPrice']if data['sellerName'] == 'Saholic':ourSp = data['sellingPrice']iterator+=1except:continuefinally:fkItem=Nonetry:request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))print "Inventory info",r.json()stock_count = int((r.json()['attributeValues'])['stock_count'])except:stock_count = 0finally:r={}flipkartItem.ourFlipkartPrice = ourSpflipkartItem.ourFlipkartInventory = stock_countflipkartItem.lowestFlipkartPrice = lowestSellerSpflipkartItem.lowestFlipkartSeller = lowestSellerNamescraperFk = NoneflipkartItems[:] =[]def close_session():if session.is_active:print "session is active. closing it."session.close()def scrapAmazon(amazonItems,br):from shop2020.model.v1.catalog.script import AmazonScraper, SellerCentralScraperprint "Inside amazonitems ",amazonItemsprint "len amazon items ",len(amazonItems)time.sleep(5)sc = SellerCentralScraper.SellerCentralScraper()scraperAmazon = AmazonScraper.AmazonScraper()for amazonItem in amazonItems:skuUrlMfn = "https://sellercentral.amazon.in/myi/search/ProductSummary?keyword=%d" %(amazonItem.item_id)skuUrlFba = "https://sellercentral.amazon.in/myi/search/ProductSummary?keyword=FBA%d" %(amazonItem.item_id)try:asin, mfnInventory, mfnPrice= sc.requestSku(br, skuUrlMfn)fbaAsin, fbaInventory, fbaPrice= sc.requestSku(br, skuUrlFba)except Exception as e:print eprint "Unable to fetch details from Seller Central for ",amazonItem.item_idcontinuetry:if len(asin)==0 and len(fbaAsin)==0:print "No asin found for ",amazonItem.item_idcontinueif len(asin)==0:asin=fbaAsinurl = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(asin)scraperAmazon.read(url,True)lowestSp,lowestSeller = scraperAmazon.createData()amazonItem.lowestAmazonPrice = lowestSpamazonItem.ourMfnPrice = float(str(mfnPrice).replace("Rs.","").replace(",",""))amazonItem.ourFbaPrice = float(str(fbaPrice).replace("Rs.","").replace(",",""))amazonItem.ourMfnInventory = int(mfnInventory)amazonItem.ourFbaInventory = int(fbaInventory)amazonItem.lowestAmazonPrice = float(lowestSp)amazonItem.lowestAmazonSeller = lowestSellerexcept Exception as e:print eprint "Unable to fetch details from Amazon Listing page for ",amazonItem.item_idcontinuesc =NonescraperAmazon = NoneamazonItems[:] =[]def getBrowserObject():import cookielibbr = mechanize.Browser(factory=mechanize.RobustFactory())cj = cookielib.LWPCookieJar()br.set_cookiejar(cj)br.set_handle_equiv(True)br.set_handle_redirect(True)br.set_handle_referer(True)br.set_handle_robots(False)br.set_debug_http(False)br.set_debug_redirects(False)br.set_debug_responses(False)br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),('Accept-Encoding', 'gzip,deflate,sdch'),('Accept-Language', 'en-US,en;q=0.8'),('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]return brdef ungzipResponse(r,b):headers = r.info()if headers['Content-Encoding']=='gzip':import gzipprint "********************"print "Deflating gzip response"print "********************"gz = gzip.GzipFile(fileobj=r, mode='rb')html = gz.read()gz.close()headers["Content-type"] = "text/html; charset=utf-8"r.set_data( html )b.set_response(r)def login(url):br = getBrowserObject()br.open(url)response = br.open(url)ungzipResponse(response, br)#html = response.read()#print htmlbr.select_form(name="signinWidget")br.form['username'] = "kshitij.sood@saholic.com"br.form['password'] = "pioneer"response = br.submit()print "********************"print "Attempting to Login"print "********************"#ungzipResponse(response, br)return brdef sendMail(request):import smtplibfrom email.mime.text import MIMETextfrom email.mime.multipart import MIMEMultipartmailServer = smtplib.SMTP("smtp.gmail.com", 587)mailServer.ehlo()mailServer.starttls()mailServer.ehlo()recipients = []recipients.append(request.user)message = "Your Request has been processed.Visit dashboard to check & download report"msg = MIMEMultipart()msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)msg['From'] = ""msg['To'] = ",".join(recipients)msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)html_msg = MIMEText(message, 'html')msg.attach(html_msg)try:mailServer.login("build@shop2020.in", "cafe@nes")#mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())mailServer.sendmail("cafe@nes", recipients, msg.as_string())except Exception as e:print eprint "Unable to send mail.Lets try with local SMTP."smtpServer = smtplib.SMTP('localhost')smtpServer.set_debuglevel(1)sender = 'build@shop2020.in'try:smtpServer.sendmail(sender, recipients, msg.as_string())print "Successfully sent email"except:print "Error: unable to send email."if __name__ == "__main__":parser = optparse.OptionParser()parser.add_option("-l", "--logfile", dest="logfile",type="string",help="Log all output to LOG_FILE",)parser.add_option("-i", "--pidfile", dest="pidfile",type="string",help="Write the PID to pidfile")(options, args) = parser.parse_args()daemon = CompetitorScraping(options.logfile, options.pidfile)if len(args) == 0:daemon.run()elif len(args) == 1:if 'start' == args[0]:daemon.start()elif 'stop' == args[0]:daemon.stop()elif 'restart' == args[0]:daemon.restart()else:print "Unknown command"sys.exit(2)sys.exit(0)else:print "usage: %s start|stop|restart" % sys.argv[0]sys.exit(2)