Subversion Repositories SmartDukaan

Rev

Rev 16102 | Rev 16182 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

import urllib2
from BeautifulSoup import BeautifulSoup
import pymongo
import re
from dtr.utils.utils import to_java_date, getNlcPoints
import optparse
from datetime import datetime
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from dtr.utils import ShopCluesScraper
import traceback
from dtr.storage.MemCache import MemCache

con = None
parser = optparse.OptionParser()
parser.add_option("-m", "--m", dest="mongoHost",
                      default="localhost",
                      type="string", help="The HOST where the mongo server is running",
                      metavar="mongo_host")
parser.add_option("-r", "--reset", dest="reset",
                   default="False", type="string",
                   help="Reset Ranks?")

(options, args) = parser.parse_args()

SOURCE_MAP = {'AMAZON':1,'FLIPKART':2,'SNAPDEAL':3,'SAHOLIC':4, 'SHOPCLUES.COM':5}
bestSellers = []
baseUrl = "http://m.shopclues.com/products/getProductList/mobiles:top-selling-mobiles-and-tablets.html/%s/page=%s"
headers = {
            'User-Agent':'Mozilla/5.0 (Linux; Android 4.3; Nexus 7 Build/JSS15Q) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.72 Safari/537.36',
            'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      
            'Accept-Language' : 'en-US,en;q=0.8',                     
            'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
            'Connection':'keep-alive'
        }
now = datetime.now()
mc = MemCache(options.mongoHost)
sc = ShopCluesScraper.ShopCluesScraper(findThumbnail=True)

bundledProducts = []
exceptionList = []


class __ProductInfo:
    
    def __init__(self, identifier, rank, url, available_price, in_stock, codAvailable, source_product_name, thumbnail, coupon):
        self.identifier = identifier
        self.rank  = rank
        self.url = url
        self.available_price = available_price
        self.in_stock = in_stock
        self.codAvailable = codAvailable
        self.source_product_name = source_product_name
        self.thumbnail = thumbnail
        self.coupon = coupon

class __NewBundled:
    def __init__(self, newProduct, oldProduct):
        self.newProduct = newProduct
        self.oldProduct = oldProduct

def get_mongo_connection(host=options.mongoHost, port=27017):
    global con
    if con is None:
        print "Establishing connection %s host and port %d" %(host,port)
        try:
            con = pymongo.MongoClient(host, port)
        except Exception, e:
            print e
            return None
    return con

def getSoupObject(url):
    print "Getting soup object for"
    print url
    global RETRY_COUNT
    RETRY_COUNT = 1 
    while RETRY_COUNT < 10:
        try:
            soup = None
            request = urllib2.Request(url, headers=headers)
            response = urllib2.urlopen(request)   
            response_data = response.read()
            response.close()
            try:
                page=response_data.decode("utf-8")
                soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
            except:
                print traceback.print_exc()
                soup = BeautifulSoup(response_data,convertEntities=BeautifulSoup.HTML_ENTITIES)
            if soup is None:
                raise
            return soup
        except Exception as e:
            traceback.print_exc()
            print "Retrying"
            RETRY_COUNT = RETRY_COUNT + 1

        
def scrapeBestSellers():
    global bestSellers
    global exceptionList
    bestSellers = []
    rank = 0
    page = 1
    while (True):
        url = (baseUrl)%(page,page-1)
        soup = getSoupObject(url)
        productDivs = soup.findAll('div',{'class':'pd-list-cont'})
        if productDivs is None or len(productDivs)==0:
            return
        for productDiv in productDivs:
            rank = rank + 1
            info_tag =  productDiv.find('a')
            link = info_tag['href']
            scin = info_tag['data-id'].strip()
            print link
            print scin
            productName = productDiv.find('div',{'class':'pdt-name'}).string
            try:
                productInfo = sc.read(link)
            except Exception as e:
                traceback.print_exc()
                continue
            product = list(get_mongo_connection().Catalog.MasterData.find({'source_id':5,'identifier':scin}))
            if len(product) > 0:
                if productInfo['inStock'] ==1:
                    get_mongo_connection().Catalog.MasterData.update({'_id':product[0]['_id']},{"$set":{'rank':rank, 'available_price':productInfo['price'], \
                                                                                                            'in_stock':productInfo['inStock'], 'codAvailable':productInfo['isCod'], \
                                                                                                            'coupon':productInfo['coupon'], 'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now())}})
                    get_mongo_connection().Catalog.Deals.update({'_id':product[0]['_id']}, {'$set' : {'rank':rank,'available_price':productInfo['price'] , 'in_stock':productInfo['inStock'],'codAvailable':productInfo['isCod']}})
                else:
                    get_mongo_connection().Catalog.MasterData.update({'_id':product[0]['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':0,'priceUpdatedOn':to_java_date(datetime.now())}})
                    get_mongo_connection().Catalog.Deals.update({'_id':product[0]['_id']}, {'$set' : {'in_stock':0}})
        
                try:
                    recomputeDeal(product[0])
                except:
                    print "Unable to compute deal for %s"%(product[0]['skuBundleId'])
                            
            else:
                #Lets bundle product by finding similar url pattern
                uri = link.replace('http://m.shopclues.com','').replace(".html","")
                try:
                    int(uri[uri.rfind('-')+1:])
                    uri =  uri[:uri.rfind('-')]
                except:
                    pass
                product = list(get_mongo_connection().Catalog.MasterData.find({'source_id':5,'marketPlaceUrl':{'$regex': uri}}))
                toBundle = __ProductInfo(scin, rank, link, productInfo['price'], productInfo['inStock'],productInfo['isCod'], productName, productInfo['thumbnail'] ,productInfo['coupon'])
                if len(product) > 0:
                    bundleNewProduct(product[0], toBundle)
                    try:
                        recomputeDeal(product[0])
                    except:
                        print "Unable to compute deal for %s"%(product[0]['skuBundleId'])
                else:
                    exceptionList.append(toBundle)
        page = page+1

def populateNegativeDeals():
    negativeDeals = get_mongo_connection().Catalog.NegativeDeals.find().distinct('sku')
    mc.set("negative_deals", negativeDeals, 600)

def recomputePoints(item, deal):
    try:
        nlcPoints = getNlcPoints(item, deal['minNlc'], deal['maxNlc'], deal['available_price'])
    except:
        traceback.print_exc()
        nlcPoints = deal['nlcPoints']
    if item['manualDealThresholdPrice'] >= deal['available_price']:
        dealPoints = item['dealPoints']
    else:
        dealPoints = 0
    get_mongo_connection().Catalog.Deals.update({'_id':deal['_id']},{"$set":{'totalPoints':deal['totalPoints'] - deal['nlcPoints'] + nlcPoints - deal['dealPoints'] +dealPoints , 'nlcPoints': nlcPoints, 'dealPoints': dealPoints, 'manualDealThresholdPrice': item['manualDealThresholdPrice']}})

    
def recomputeDeal(item):
    """Lets recompute deal for this bundle"""
    print "Recomputing for bundleId %d" %(item.get('skuBundleId'))
    skuBundleId = item['skuBundleId']
    item['dealFlag'] = 0
    item['dealType'] = 0
    item['dealPoints'] = 0
    item['manualDealThresholdPrice'] = None
    manualDeals = list(get_mongo_connection().Catalog.ManualDeals.find({'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())},'source_id':item['source_id'], 'sku':item['_id']}))
    if len(manualDeals) > 0:
        item['dealFlag'] = 1
        item['dealType'] =manualDeals[0]['dealType']
        item['dealPoints'] = manualDeals[0]['dealPoints']
        item['manualDealThresholdPrice'] = manualDeals[0]['dealThresholdPrice']
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
    bestPrice = float("inf")
    bestOne = None
    bestSellerPoints = 0
    toUpdate = []
    prepaidBestPrice = float("inf")
    prepaidBestOne = None
    prepaidBestSellerPoints = 0
    for similarItem in similarItems:
        if similarItem['_id'] == item['_id']:
            try:
                recomputePoints(item, similarItem)
            except:
                traceback.print_exc()
        if similarItem['codAvailable'] ==1:
            if mc.get("negative_deals") is None:
                populateNegativeDeals()
            if similarItem['in_stock'] == 0 or similarItem['maxprice'] is None or similarItem['maxprice'] < similarItem['available_price'] or similarItem['_id'] in mc.get("negative_deals"):
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
                continue
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
                continue
            if similarItem['available_price'] < bestPrice:
                bestOne = similarItem
                bestPrice = similarItem['available_price']
                bestSellerPoints = similarItem['bestSellerPoints']
            elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
                bestOne = similarItem
                bestPrice = similarItem['available_price']
                bestSellerPoints = similarItem['bestSellerPoints']
            else:
                pass
        else:
            if mc.get("negative_deals") is None:
                populateNegativeDeals()
            if similarItem['in_stock'] == 0 or similarItem['maxprice'] is None or similarItem['maxprice'] < similarItem['available_price'] or similarItem['_id'] in mc.get("negative_deals"):
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
                continue
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
                continue
            if similarItem['available_price'] < prepaidBestPrice:
                prepaidBestOne = similarItem
                prepaidBestPrice = similarItem['available_price']
                prepaidBestSellerPoints = similarItem['bestSellerPoints']
            elif similarItem['available_price'] == prepaidBestPrice and prepaidBestSellerPoints < similarItem['bestSellerPoints']:
                prepaidBestOne = similarItem
                prepaidBestPrice = similarItem['available_price']
                prepaidBestSellerPoints = similarItem['bestSellerPoints']
            else:
                pass
    if bestOne is not None or prepaidBestOne is not None:
        for similarItem in similarItems:
            toUpdate.append(similarItem['_id'])
        if bestOne is not None:
            toUpdate.remove(bestOne['_id'])
            get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1,'prepaidDeal':0 }})
        if prepaidBestOne is not None:
            if bestOne is not None:
                if prepaidBestOne['available_price'] < bestOne['available_price']: 
                    toUpdate.remove(prepaidBestOne['_id'])
                    get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
            else:
                toUpdate.remove(prepaidBestOne['_id'])
                get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
    if len(toUpdate) > 0:
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0,'prepaidDeal':0 }},upsert=False, multi=True)

          
def bundleNewProduct(existingProduct, toBundle):
    global bundledProducts
    global exceptionList
    print "Adding new product"
    try:
        max_id = list(get_mongo_connection().Catalog.MasterData.find().sort([('_id',pymongo.DESCENDING)]).limit(1))
        existingProduct['_id'] = max_id[0]['_id'] + 1
        existingProduct['addedOn'] = to_java_date(datetime.now())
        existingProduct['available_price'] = toBundle.available_price
        existingProduct['updatedOn'] = to_java_date(datetime.now())
        existingProduct['codAvailable'] = toBundle.codAvailable
        existingProduct['coupon'] = str(toBundle.coupon)
        existingProduct['identifier'] = str(toBundle.identifier)
        existingProduct['in_stock'] = toBundle.in_stock
        existingProduct['marketPlaceUrl'] = toBundle.url
        existingProduct['rank'] = toBundle.rank
        existingProduct['source_product_name'] = toBundle.source_product_name
        existingProduct['url'] = toBundle.url
        get_mongo_connection().Catalog.MasterData.insert(existingProduct)
        newBundled = __NewBundled(toBundle, existingProduct)
        bundledProducts.append(newBundled)
        return {1:'Data added successfully.'}
    except Exception as e:
        print e
        exceptionList.append(toBundle)
        return {0:'Unable to add data.'}

def sendMail():
    message="""<html>
            <body>
            <h3>ShopClues Best Sellers Auto Bundled</h3>
            <table border="1" style="width:100%;">
            <thead>
            <tr>
            <th>Item Id</th>
            <th>Identifier</th>
            <th>Rank</th>
            <th>Product Name</th>
            <th>Bundle Id</th>
            <th>Bundled with Brand</th>
            <th>Bundled with Product Name</th>
            <th>Available_price</th>
            <th>In Stock</th>
            <th>Coupon</th>
            <th>COD Available</th>
            </tr></thead>
            <tbody>"""
    for bundledProduct in bundledProducts:
        newProduct = bundledProduct.newProduct
        oldProduct = bundledProduct.oldProduct
        message+="""<tr>
        <td style="text-align:center">"""+str(oldProduct.get('_id'))+"""</td>
        <td style="text-align:center">"""+oldProduct.get('identifier')+"""</td>
        <td style="text-align:center">"""+str(oldProduct.get('rank'))+"""</td>
        <td style="text-align:center">"""+(oldProduct.get('source_product_name'))+"""</td>
        <td style="text-align:center">"""+str(oldProduct.get('skuBundleId'))+"""</td>
        <td style="text-align:center">"""+(oldProduct.get('brand'))+"""</td>
        <td style="text-align:center">"""+(oldProduct.get('product_name'))+"""</td>
        <td style="text-align:center">"""+str(oldProduct.get('available_price'))+"""</td>
        <td style="text-align:center">"""+str(oldProduct.get('in_stock'))+"""</td>
        <td style="text-align:center">"""+str(oldProduct.get('coupon'))+"""</td>
        <td style="text-align:center">"""+str(oldProduct.get('codAvailable'))+"""</td>
        </tr>"""
    message+="""</tbody></table><h3>Items not bundled</h3><table border="1" style="width:100%;">
    <tr>
    <th>Identifier</th>
    <th>Rank</th>
    <th>Product Name</th>
    <th>Url</th>
    <th>Available Price</th>
    <th>In Stock</th>
    <th>COD Available</th>
    <th>Coupon</th>
    <th>Thumbnail</th>
    </tr></thead>
    <tbody>"""
    for exceptionItem in exceptionList:
        message+="""<tr>
        <td style="text-align:center">"""+str(exceptionItem.identifier)+"""</td>
        <td style="text-align:center">"""+str(exceptionItem.rank)+"""</td>
        <td style="text-align:center">"""+(exceptionItem.source_product_name)+"""</td>
        <td style="text-align:center">"""+(exceptionItem.url)+"""</td>
        <td style="text-align:center">"""+str(exceptionItem.available_price)+"""</td>
        <td style="text-align:center">"""+str(exceptionItem.in_stock)+"""</td>
        <td style="text-align:center">"""+str(exceptionItem.codAvailable)+"""</td>
        <td style="text-align:center">"""+str(exceptionItem.coupon)+"""</td>
        <td style="text-align:left">"""+(exceptionItem.thumbnail)+"""</td>
        </tr>"""
    message+="""</tbody></table></body></html>"""
    print message
    recipients = ['kshitij.sood@saholic.com']
    #recipients = ['rajneesh.arora@saholic.com','kshitij.sood@saholic.com','chaitnaya.vats@saholic.com','manoj.kumar@saholic.com']
    msg = MIMEMultipart()
    msg['Subject'] = "Shopclues Best Sellers" + ' - ' + str(datetime.now())
    msg['From'] = ""
    msg['To'] = ",".join(recipients)
    msg.preamble = "Shopclues Best Sellers" + ' - ' + str(datetime.now())
    html_msg = MIMEText(message, 'html')
    msg.attach(html_msg)
    
    smtpServer = smtplib.SMTP('localhost')
    smtpServer.set_debuglevel(1)
    sender = 'dtr@shop2020.in'
    try:
        smtpServer.sendmail(sender, recipients, msg.as_string())
        print "Successfully sent email"
    except:
        traceback.print_exc()
        print "Error: unable to send email."

def resetRanks(category):
    get_mongo_connection().Catalog.MasterData.update({'rank':{'$gt':0},'source_id':5},{'$set' : {'rank':0,'updatedOn':to_java_date(now)}}, multi=True)
        

def main():
    if options.reset == 'True':
        resetRanks()
    scrapeBestSellers()
    if len(bundledProducts)>0 or len(exceptionList) > 0:
        sendMail()
        
if __name__=='__main__':
    main()