Subversion Repositories SmartDukaan

Rev

Rev 15825 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

from elixir import session
from shop2020.config.client.ConfigClient import ConfigClient
from sqlalchemy.sql import asc
from sqlalchemy.sql.expression import or_
from shop2020.utils.daemon import Daemon
import optparse
import sys
import mechanize
import time
from shop2020.model.v1.catalog.impl import DataService
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
import gc 
from operator import itemgetter

config_client = ConfigClient()
host = config_client.get_property('staging_hostname')
DataService.initialize(db_hostname=host)

headers = { 
           'User-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
            'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      
            'Accept-Language' : 'en-US,en;q=0.8',                     
            'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
        }

class CompetitorScraping(Daemon):
    def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
        Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
        
    def run(self):
        start()

def start():
    try:
        while True:
            requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
            if requests ==[] or requests is None:
                print "No new request to process, sleeeeeeping....."
                close_session()
                collected = gc.collect()
                print locals()
                print globals()
                print "Garbage collector: collected %d objects." % (collected)
                time.sleep(600)
            for request in requests:
                fetchDetails(request)
                request.isProcessed = True
                session.commit()
                sendMail(request)
            close_session()
            collected = gc.collect()
            print "Garbage collector: collected %d objects." % (collected)
            print locals()
            print globals()
            requests = []
    except Exception as e:
        print e
        sys.exit(2)
    
def fetchDetails(request):
    import threading
    items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
    print items
    snapdeal, flipkart, amazon =[],[],[]
    for item in items:
        if item.snapdealScraping:
            snapdeal.append(item)
        if item.flipkartScraping:
            flipkart.append(item)
        if item.amazonScraping:
            amazon.append(item)
    threads = []
    t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
    t1.daemon = True
    t1.start()
    t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
    t2.daemon = True
    t2.start()
    t3 = threading.Thread(target=scrapAmazon, args = (amazon,))
    t3.daemon = True
    t3.start()
    threads.append(t1)
    threads.append(t2)
    threads.append(t3)
    for th in threads:
        th.join()
    br,t1,t2,t3 =None,None,None,None
    items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]

def scrapSnapdeal(snapdealItems):
    import simplejson as json
    import urllib2
    from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
    for snapdealItem in snapdealItems:
        sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
        if sdItem is None:
            continue
        try:
            url="http://www.snapdeal.com/acors/json/v2/gvbps?supc=%s&catUrl=&bn=&catId=175&start=0&count=10000"%(sdItem.supc)
            print url
            time.sleep(1)
            req = urllib2.Request(url,headers=headers)
            response = urllib2.urlopen(req)
            vendorInfo = json.load(response)
            response.close()
            print vendorInfo
            lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice   = (0,)*8
            lowestSellerName = ''
            sortedVendorsData = sorted(vendorInfo['vendors'], key=itemgetter('sellingPrice'))
            for vendor in sortedVendorsData:
                if iterator == 0:
                    lowestSellerName = vendor['vendorDisplayName'].encode('utf-8')
                    try:
                        lowestSp = vendor['sellingPriceBefIntCashBack']
                    except:
                        lowestSp = vendor['sellingPrice']
                    lowestOfferPrice = vendor['sellingPrice']
                    lowestSellerInventory = vendor['buyableInventory']
                    
                if vendor['vendorDisplayName'] == 'MobilesnMore':
                    ourInventory = vendor['buyableInventory']
                    try:
                        ourSp = vendor['sellingPriceBefIntCashBack']
                    except:
                        ourSp = vendor['sellingPrice']
                    ourOfferPrice = vendor['sellingPrice']
                iterator+=1
        except Exception as e:
            import traceback
            print traceback.print_exc()
            continue
        finally:
            sdItem =None
        print ourSp
        print ourOfferPrice
        print ourInventory
        snapdealItem.ourSnapdealPrice = ourSp
        snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
        snapdealItem.ourSnapdealInventory = ourInventory
        snapdealItem.lowestSnapdealPrice = lowestSp
        snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
        snapdealItem.lowestSnapdealSeller = lowestSellerName 
        snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
    snapdealItems[:]=[]  

def scrapFlipkart(flipkartItems):
    from shop2020.model.v1.catalog.script import FlipkartProductPageParser
    from operator import itemgetter
    import requests as httpRequest
    from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
    scraperFk = FlipkartProductPageParser.FlipkartProductPageScraper()
    for flipkartItem in flipkartItems:
        fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
        if fkItem is None:
            continue
        try:
            vendorsData = scraperFk.read(fkItem.flipkartSerialNumber.strip())
            lowestSellerName = vendorsData['cheapestSeller'].encode('utf-8')
            lowestSellerSp = vendorsData['lowestSellingPrice']
            ourSp = vendorsData['saholicSellingPrice']
        except:
            continue
        finally:
            fkItem=None
        try:
            request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
            r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
            print "Inventory info",r.json()
            stock_count = int((r.json()['attributeValues'])['stock_count'])
        except:
            stock_count = 0
        finally:
                r={}
        flipkartItem.ourFlipkartPrice = ourSp
        flipkartItem.ourFlipkartInventory = stock_count
        flipkartItem.lowestFlipkartPrice = lowestSellerSp
        flipkartItem.lowestFlipkartSeller =  lowestSellerName
    scraperFk = None
    flipkartItems[:] =[] 


def close_session():
    if session.is_active:
        print "session is active. closing it."
        session.close()
        
def scrapAmazon(amazonItems):
    from shop2020.model.v1.catalog.script import AmazonScraper
    from shop2020.model.v1.catalog.impl.DataService import Amazonlisted
    print "Inside amazonitems ",amazonItems
    print "len amazon items ",len(amazonItems)
    time.sleep(5)
    amScraper = AmazonScraper.AmazonScraper()
    for amazonItem in amazonItems:
        amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)
        if amazon_d_item is None:
            continue
        if len(amazon_d_item.asin)==0:
            print "No asin found for ",amazonItem.item_id
            continue
        saholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())
        generalUrl = "http://www.amazon.in/gp/offer-listing/%s"%(amazon_d_item.asin.strip())
        try:
            saholicPrice = (amScraper.read(saholicUrl, False))[0]
        except:
            saholicPrice = 0.0
        try:
            cheapestSeller = (amScraper.read(generalUrl, True))
            cheapestSellerPrice = cheapestSeller[0]
            cheapestSellerName = cheapestSeller[1].encode('utf-8')
        except:
            cheapestSellerPrice = 0.0
            cheapestSellerName = ""
        amazonItem.ourAmazonPrice = saholicPrice
        amazonItem.lowestAmazonPrice = cheapestSellerPrice
        amazonItem.lowestAmazonSeller = cheapestSellerName 
    amazonItems[:] =[] 
        

def sendMail(request):
    import smtplib
    from email.mime.text import MIMEText
    from email.mime.multipart import MIMEMultipart
    mailServer = smtplib.SMTP("smtp.gmail.com", 587)
    mailServer.ehlo()
    mailServer.starttls()
    mailServer.ehlo()
    recipients = []
    recipients.append(request.user)
    message = "Your Request has been processed.Visit dashboard to check & download report" 
    msg = MIMEMultipart()
    msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
    msg['From'] = ""
    msg['To'] = ",".join(recipients)
    msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
    html_msg = MIMEText(message, 'html')
    msg.attach(html_msg)
    try:
        mailServer.login("build@shop2020.in", "cafe@nes")
        #mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
        mailServer.sendmail("cafe@nes", recipients, msg.as_string())
    except Exception as e:
        print e
        print "Unable to send mail.Lets try with local SMTP."
        smtpServer = smtplib.SMTP('localhost')
        smtpServer.set_debuglevel(1)
        sender = 'build@shop2020.in'
        try:
            smtpServer.sendmail(sender, recipients, msg.as_string())
            print "Successfully sent email"
        except:
            print "Error: unable to send email."

        

if __name__ == "__main__":
    parser = optparse.OptionParser()
    parser.add_option("-l", "--logfile", dest="logfile",
                      type="string",
                      help="Log all output to LOG_FILE",
                      )
    parser.add_option("-i", "--pidfile", dest="pidfile",
                      type="string",
                      help="Write the PID to pidfile")
    (options, args) = parser.parse_args()
    daemon = CompetitorScraping(options.logfile, options.pidfile)
    if len(args) == 0:
        daemon.run()
    elif len(args) == 1:
        if 'start' == args[0]:
            daemon.start()
        elif 'stop' == args[0]:
            daemon.stop()
        elif 'restart' == args[0]:
            daemon.restart()
        else:
            print "Unknown command"
            sys.exit(2)
        sys.exit(0)
    else:
        print "usage: %s start|stop|restart" % sys.argv[0]
        sys.exit(2)