Subversion Repositories SmartDukaan

Rev

Rev 12314 | Rev 15492 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

from elixir import session
from shop2020.config.client.ConfigClient import ConfigClient
from sqlalchemy.sql import asc
from sqlalchemy.sql.expression import or_
from shop2020.utils.daemon import Daemon
import optparse
import sys
import mechanize
import time
from shop2020.model.v1.catalog.impl import DataService
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
import gc 

config_client = ConfigClient()
host = config_client.get_property('staging_hostname')
DataService.initialize(db_hostname=host)

class CompetitorScraping(Daemon):
    def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
        Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
        
    def run(self):
        start()

def start():
    try:
        while True:
            requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
            if requests ==[] or requests is None:
                print "No new request to process, sleeeeeeping....."
                close_session()
                collected = gc.collect()
                print locals()
                print globals()
                print "Garbage collector: collected %d objects." % (collected)
                time.sleep(600)
            for request in requests:
                fetchDetails(request)
                request.isProcessed = True
                session.commit()
                sendMail(request)
            close_session()
            collected = gc.collect()
            print "Garbage collector: collected %d objects." % (collected)
            print locals()
            print globals()
            requests = []
    except Exception as e:
        print e
        sys.exit(2)
    
def fetchDetails(request):
    import threading
    login_url = "https://sellercentral.amazon.in/gp/homepage.html"
    br = login(login_url)
    items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
    print items
    snapdeal, flipkart, amazon =[],[],[]
    for item in items:
        if item.snapdealScraping:
            snapdeal.append(item)
        if item.flipkartScraping:
            flipkart.append(item)
        if item.amazonScraping:
            amazon.append(item)
    threads = []
    t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
    t1.daemon = True
    t1.start()
    t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
    t2.daemon = True
    t2.start()
    t3 = threading.Thread(target=scrapAmazon, args = (amazon,br))
    t3.daemon = True
    t3.start()
    threads.append(t1)
    threads.append(t2)
    threads.append(t3)
    for th in threads:
        th.join()
    br,t1,t2,t3 =None,None,None,None
    items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]

def scrapSnapdeal(snapdealItems):
    import simplejson as json
    import urllib2
    from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
    for snapdealItem in snapdealItems:
        sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
        if sdItem is None:
            continue
        try:
            url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=91&sort=sellingPrice"%(sdItem.supc)
            print url
            time.sleep(1)
            req = urllib2.Request(url)
            response = urllib2.urlopen(req)
            json_input = response.read()
            vendorInfo = json.loads(json_input)
            lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice   = (0,)*8
            lowestSellerName = ''
            for vendor in vendorInfo:
                if iterator == 0:
                    lowestSellerName = vendor['vendorDisplayName']
                    try:
                        lowestSp = vendor['sellingPriceBefIntCashBack']
                    except:
                        lowestSp = vendor['sellingPrice']
                    lowestOfferPrice = vendor['sellingPrice']
                    lowestSellerInventory = vendor['buyableInventory']
                    
                if vendor['vendorDisplayName'] == 'MobilesnMore':
                    ourInventory = vendor['buyableInventory']
                    try:
                        ourSp = vendor['sellingPriceBefIntCashBack']
                    except:
                        ourSp = vendor['sellingPrice']
                    ourOfferPrice = vendor['sellingPrice']
                iterator+=1
        except:
            continue
        finally:
            sdItem =None
        snapdealItem.ourSnapdealPrice = ourSp
        snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
        snapdealItem.ourSnapdealInventory = ourInventory
        snapdealItem.lowestSnapdealPrice = lowestSp
        snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
        snapdealItem.lowestSnapdealSeller = lowestSellerName 
        snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
    snapdealItems[:]=[]  

def scrapFlipkart(flipkartItems):
    from shop2020.model.v1.catalog.script import FlipkartScraper
    from operator import itemgetter
    import requests as httpRequest
    from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
    scraperFk = FlipkartScraper.FlipkartScraper()
    for flipkartItem in flipkartItems:
        fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
        if fkItem is None:
            continue
        try:
            url = "http://www.flipkart.com/ps/%s"%(fkItem.flipkartSerialNumber)
            vendorsData = scraperFk.read(url)
            sortedVendorsData = []
            sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))
            lowestSellerSp, iterator, ourSp = (0,)*3
            lowestSellerName = ''
            for data in sortedVendorsData:
                if iterator == 0:
                    lowestSellerName = data['sellerName']
                    lowestSellerSp = data['sellingPrice']
                        
                if data['sellerName'] == 'Saholic':
                    ourSp = data['sellingPrice']

                iterator+=1
        except:
            continue
        finally:
            fkItem=None
        try:
            request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
            r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
            print "Inventory info",r.json()
            stock_count = int((r.json()['attributeValues'])['stock_count'])
        except:
            stock_count = 0
        finally:
                r={}
        flipkartItem.ourFlipkartPrice = ourSp
        flipkartItem.ourFlipkartInventory = stock_count
        flipkartItem.lowestFlipkartPrice = lowestSellerSp
        flipkartItem.lowestFlipkartSeller =  lowestSellerName
    scraperFk = None
    flipkartItems[:] =[] 


def close_session():
    if session.is_active:
        print "session is active. closing it."
        session.close()
        
def scrapAmazon(amazonItems,br):
    from shop2020.model.v1.catalog.script import AmazonScraper, SellerCentralScraper
    print "Inside amazonitems ",amazonItems
    print "len amazon items ",len(amazonItems)
    time.sleep(5)
    sc = SellerCentralScraper.SellerCentralScraper()
    scraperAmazon = AmazonScraper.AmazonScraper()
    for amazonItem in amazonItems:
        skuUrlMfn = "https://sellercentral.amazon.in/myi/search/ProductSummary?keyword=%d" %(amazonItem.item_id)
        skuUrlFba =  "https://sellercentral.amazon.in/myi/search/ProductSummary?keyword=FBA%d" %(amazonItem.item_id)
        try:
            asin, mfnInventory, mfnPrice= sc.requestSku(br, skuUrlMfn)
            fbaAsin, fbaInventory, fbaPrice= sc.requestSku(br, skuUrlFba)
        except Exception as e:
            print e
            print "Unable to fetch details from Seller Central for ",amazonItem.item_id
            continue
        try:
            if len(asin)==0 and len(fbaAsin)==0:
                print "No asin found for ",amazonItem.item_id
                continue
            if len(asin)==0:
                asin=fbaAsin
            url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(asin)
            scraperAmazon.read(url,True)
            lowestSp,lowestSeller = scraperAmazon.createData()
            amazonItem.lowestAmazonPrice = lowestSp
            amazonItem.ourMfnPrice = float(str(mfnPrice).replace("Rs.","").replace(",",""))
            amazonItem.ourFbaPrice = float(str(fbaPrice).replace("Rs.","").replace(",",""))
            amazonItem.ourMfnInventory = int(mfnInventory)
            amazonItem.ourFbaInventory = int(fbaInventory)
            amazonItem.lowestAmazonPrice = float(lowestSp)
            amazonItem.lowestAmazonSeller = lowestSeller

        except Exception as e:
            print e
            print "Unable to fetch details from Amazon Listing page for ",amazonItem.item_id
            continue
    sc =None
    scraperAmazon = None
    amazonItems[:] =[] 
        
def getBrowserObject():
        import cookielib
        br = mechanize.Browser(factory=mechanize.RobustFactory())
        cj = cookielib.LWPCookieJar()
        br.set_cookiejar(cj)
        br.set_handle_equiv(True)
        br.set_handle_redirect(True)
        br.set_handle_referer(True)
        br.set_handle_robots(False)
        br.set_debug_http(False)
        br.set_debug_redirects(False)
        br.set_debug_responses(False)
        
        br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
        
        br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),
                         ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
                         ('Accept-Encoding', 'gzip,deflate,sdch'),                  
                         ('Accept-Language', 'en-US,en;q=0.8'),                     
                         ('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]
        return br

def ungzipResponse(r,b):
    headers = r.info()
    if headers['Content-Encoding']=='gzip':
        import gzip
        print "********************"
        print "Deflating gzip response"
        print "********************"
        gz = gzip.GzipFile(fileobj=r, mode='rb')
        html = gz.read()
        gz.close()
        headers["Content-type"] = "text/html; charset=utf-8"
        r.set_data( html )
        b.set_response(r)

    
def login(url):
    br = getBrowserObject()
    br.open(url)
    response = br.open(url)
    ungzipResponse(response, br)
    #html = response.read()
    #print html
    br.select_form(name="signinWidget")
    br.form['username'] = "kshitij.sood@saholic.com"
    br.form['password'] = "pioneer"
    response = br.submit()
    print "********************"
    print "Attempting to Login"
    print "********************"
    #ungzipResponse(response, br)
    return br

def sendMail(request):
    import smtplib
    from email.mime.text import MIMEText
    from email.mime.multipart import MIMEMultipart
    mailServer = smtplib.SMTP("smtp.gmail.com", 587)
    mailServer.ehlo()
    mailServer.starttls()
    mailServer.ehlo()
    recipients = []
    recipients.append(request.user)
    message = "Your Request has been processed.Visit dashboard to check & download report" 
    msg = MIMEMultipart()
    msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
    msg['From'] = ""
    msg['To'] = ",".join(recipients)
    msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
    html_msg = MIMEText(message, 'html')
    msg.attach(html_msg)
    try:
        mailServer.login("build@shop2020.in", "cafe@nes")
        #mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
        mailServer.sendmail("cafe@nes", recipients, msg.as_string())
    except Exception as e:
        print e
        print "Unable to send mail.Lets try with local SMTP."
        smtpServer = smtplib.SMTP('localhost')
        smtpServer.set_debuglevel(1)
        sender = 'build@shop2020.in'
        try:
            smtpServer.sendmail(sender, recipients, msg.as_string())
            print "Successfully sent email"
        except:
            print "Error: unable to send email."

        

if __name__ == "__main__":
    parser = optparse.OptionParser()
    parser.add_option("-l", "--logfile", dest="logfile",
                      type="string",
                      help="Log all output to LOG_FILE",
                      )
    parser.add_option("-i", "--pidfile", dest="pidfile",
                      type="string",
                      help="Write the PID to pidfile")
    (options, args) = parser.parse_args()
    daemon = CompetitorScraping(options.logfile, options.pidfile)
    if len(args) == 0:
        daemon.run()
    elif len(args) == 1:
        if 'start' == args[0]:
            daemon.start()
        elif 'stop' == args[0]:
            daemon.stop()
        elif 'restart' == args[0]:
            daemon.restart()
        else:
            print "Unknown command"
            sys.exit(2)
        sys.exit(0)
    else:
        print "usage: %s start|stop|restart" % sys.argv[0]
        sys.exit(2)