Rev 12268 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
from elixir import sessionfrom sqlalchemy.sql import ascfrom sqlalchemy.sql.expression import or_from shop2020.utils.daemon import Daemonimport optparseimport sysimport threadingimport urllib2import mechanizeimport cookielibimport timeimport requests as httpRequestimport simplejson as jsonfrom shop2020.model.v1.catalog.impl import DataServicefrom shop2020.model.v1.catalog.script import FlipkartScraper, AmazonScraper, SellerCentralScraperfrom operator import itemgetterfrom shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest, \SnapdealItem, FlipkartItemDataService.initialize(db_hostname='localhost')class CompetitorScraping(Daemon):def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)def run(self):start()def start():try:while True:requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()if requests ==[] or requests is None:print "No new request to process, sleeeeeeping....."time.sleep(600)for request in requests:fetchDetails(request)request.isProcessed = Truesession.commit()close_session()except Exception as e:print esys.exit(2)def fetchDetails(request):login_url = "https://sellercentral.amazon.in/gp/homepage.html"br = login(login_url)items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()print itemssnapdeal, flipkart, amazon =[],[],[]for item in items:if item.snapdealScraping:snapdeal.append(item)if item.flipkartScraping:flipkart.append(item)if item.amazonScraping:amazon.append(item)threads = []t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))t1.daemon = Truet1.start()t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))t2.daemon = Truet2.start()amazonLen = len(amazon)if amazonLen > 20:t3 = threading.Thread(target=scrapAmazon, args = (amazon[0:amazonLen/2],br))t3.daemon = Truet3.start()t4 = threading.Thread(target=scrapAmazon, args = (amazon[amazonLen/2:],br))t4.daemon = Truet4.start()threads.append(t4)else:t3 = threading.Thread(target=scrapAmazon, args = (amazon,br))t3.daemon = Truet3.start()threads.append(t1)threads.append(t2)threads.append(t3)for th in threads:th.join()def scrapSnapdeal(snapdealItems):for snapdealItem in snapdealItems:sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)if sdItem is None:continuetry:url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=91&sort=sellingPrice"%(sdItem.supc)print urltime.sleep(1)req = urllib2.Request(url)response = urllib2.urlopen(req)json_input = response.read()vendorInfo = json.loads(json_input)lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice = (0,)*8lowestSellerName = ''for vendor in vendorInfo:if iterator == 0:lowestSellerName = vendor['vendorDisplayName']try:lowestSp = vendor['sellingPriceBefIntCashBack']except:lowestSp = vendor['sellingPrice']lowestOfferPrice = vendor['sellingPrice']lowestSellerInventory = vendor['buyableInventory']if vendor['vendorDisplayName'] == 'MobilesnMore':ourInventory = vendor['buyableInventory']try:ourSp = vendor['sellingPriceBefIntCashBack']except:ourSp = vendor['sellingPrice']ourOfferPrice = vendor['sellingPrice']iterator+=1except:continuesnapdealItem.ourSnapdealPrice = ourSpsnapdealItem.ourSnapdealOfferPrice = ourOfferPricesnapdealItem.ourSnapdealInventory = ourInventorysnapdealItem.lowestSnapdealPrice = lowestSpsnapdealItem.lowestSnapdealOfferPrice = lowestOfferPricesnapdealItem.lowestSnapdealSeller = lowestSellerNamesnapdealItem.lowestSnapdealSellerInventory = lowestSellerInventorydef scrapFlipkart(flipkartItems):for flipkartItem in flipkartItems:scraperFk = FlipkartScraper.FlipkartScraper()fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)if fkItem is None:continuetry:url = "http://www.flipkart.com/ps/%s"%(fkItem.flipkartSerialNumber)vendorsData = scraperFk.read(url)sortedVendorsData = []sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))lowestSellerSp, iterator, ourSp = (0,)*3lowestSellerName = ''for data in sortedVendorsData:if iterator == 0:lowestSellerName = data['sellerName']lowestSellerSp = data['sellingPrice']if data['sellerName'] == 'Saholic':ourSp = data['sellingPrice']iterator+=1except:continuetry:request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))print "Inventory info",r.json()stock_count = int((r.json()['attributeValues'])['stock_count'])except:stock_count = 0finally:r={}flipkartItem.ourFlipkartPrice = ourSpflipkartItem.ourFlipkartInventory = stock_countflipkartItem.lowestFlipkartPrice = lowestSellerSpflipkartItem.lowestFlipkartSeller = lowestSellerNamedef close_session():if session.is_active:print "session is active. closing it."session.close()def scrapAmazon(amazonItems,br):for amazonItem in amazonItems:sc = SellerCentralScraper.SellerCentralScraper()skuUrlMfn = "https://sellercentral.amazon.in/myi/search/ProductSummary?keyword=%d" %(amazonItem.item_id)skuUrlFba = "https://sellercentral.amazon.in/myi/search/ProductSummary?keyword=FBA%d" %(amazonItem.item_id)try:print type(sc.requestSku(br, skuUrlMfn))asin, mfnInventory, mfnPrice= sc.requestSku(br, skuUrlMfn)fbaAsin, fbaInventory, fbaPrice= sc.requestSku(br, skuUrlFba)except Exception as e:print eprint "Unable to fetch details from Seller Central for ",amazonItem.item_idcontinuescraperAmazon = AmazonScraper.AmazonScraper()try:if len(asin)==0 and len(fbaAsin)==0:print "No asin found for ",amazonItem.item_idcontinueif len(asin)==0:asin=fbaAsinurl = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(asin)scraperAmazon.read(url,True)lowestSp,lowestSeller = scraperAmazon.createData()amazonItem.lowestAmazonPrice = lowestSpprint "Details of Item id ",amazonItem.item_idprint "ASIN ",asinprint "MFN INVENTORY ",mfnInventoryprint "FBA INVENTORY ",fbaInventoryprint "MFN PRICE ",mfnPriceprint "FBA PRICE ",fbaPriceprint "Lowest Seller SP ",lowestSpprint "LOWEST SELLER NAME ",lowestSelleramazonItem.ourMfnPrice = float(mfnPrice.replace("Rs.","").replace(",",""))amazonItem.ourFbaPrice = float(fbaPrice.replace("Rs.","").replace(",",""))amazonItem.ourMfnInventory = int(mfnInventory)amazonItem.ourFbaInventory = int(fbaInventory)amazonItem.lowestAmazonPrice = float(lowestSp)amazonItem.lowestAmazonSeller = lowestSellerexcept Exception as e:print eprint "Unable to fetch details from Amazon Listing page for ",amazonItem.item_idcontinuedef getBrowserObject():br = mechanize.Browser(factory=mechanize.RobustFactory())cj = cookielib.LWPCookieJar()br.set_cookiejar(cj)br.set_handle_equiv(True)br.set_handle_redirect(True)br.set_handle_referer(True)br.set_handle_robots(False)br.set_debug_http(False)br.set_debug_redirects(False)br.set_debug_responses(False)br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),('Accept-Encoding', 'gzip,deflate,sdch'),('Accept-Language', 'en-US,en;q=0.8'),('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]return brdef ungzipResponse(r,b):headers = r.info()if headers['Content-Encoding']=='gzip':import gzipprint "********************"print "Deflating gzip response"print "********************"gz = gzip.GzipFile(fileobj=r, mode='rb')html = gz.read()gz.close()headers["Content-type"] = "text/html; charset=utf-8"r.set_data( html )b.set_response(r)def login(url):br = getBrowserObject()br.open(url)response = br.open(url)ungzipResponse(response, br)#html = response.read()#print htmlbr.select_form(name="signinWidget")br.form['username'] = "kshitij.sood@saholic.com"br.form['password'] = "pioneer"response = br.submit()print "********************"print "Attempting to Login"print "********************"#ungzipResponse(response, br)return brif __name__ == "__main__":parser = optparse.OptionParser()parser.add_option("-l", "--logfile", dest="logfile",type="string",help="Log all output to LOG_FILE",)parser.add_option("-i", "--pidfile", dest="pidfile",type="string",help="Write the PID to pidfile")(options, args) = parser.parse_args()daemon = CompetitorScraping(options.logfile, options.pidfile)if len(args) == 0:daemon.run()elif len(args) == 1:if 'start' == args[0]:daemon.start()elif 'stop' == args[0]:daemon.stop()elif 'restart' == args[0]:daemon.restart()else:print "Unknown command"sys.exit(2)sys.exit(0)else:print "usage: %s start|stop|restart" % sys.argv[0]sys.exit(2)