Subversion Repositories SmartDukaan

Rev

Rev 20347 | Rev 21135 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

import urllib2
from BeautifulSoup import BeautifulSoup
import pymongo
import re
from dtr.utils.utils import to_java_date, getNlcPoints, DEAL_PRIORITY, getCashBack
import optparse
from datetime import datetime
import time
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from dtr.utils import ShopCluesScraper
import traceback
from dtr.storage.MemCache import MemCache
import chardet

con = None
parser = optparse.OptionParser()
parser.add_option("-m", "--m", dest="mongoHost",
                      default="localhost",
                      type="string", help="The HOST where the mongo server is running",
                      metavar="mongo_host")
parser.add_option("-r", "--reset", dest="reset",
                   default="False", type="string",
                   help="Reset Ranks?")

(options, args) = parser.parse_args()

SOURCE_MAP = {'AMAZON':1,'FLIPKART':2,'SNAPDEAL':3,'SAHOLIC':4, 'SHOPCLUES.COM':5,'PAYTM.COM':6}
bestSellers = []
baseUrl = "http://m.shopclues.com/products/getProductList/mobiles:top-selling-mobiles-and-tablets.html/%s/page=%s"
headers = {
            'User-Agent':'Mozilla/5.0 (Linux; Android 4.3; Nexus 7 Build/JSS15Q) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.72 Safari/537.36',
            'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      
            'Accept-Language' : 'en-US,en;q=0.8',                     
            'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
            'Connection':'keep-alive'
        }
now = datetime.now()
mc = MemCache(options.mongoHost)
sc = ShopCluesScraper.ShopCluesScraper(findThumbnail=True)

bundledProducts = []
exceptionList = []


class __ProductInfo:
    
    def __init__(self, identifier, rank, url, available_price, in_stock, codAvailable, source_product_name, thumbnail, coupon):
        self.identifier = identifier
        self.rank  = rank
        self.url = url
        self.available_price = available_price
        self.in_stock = in_stock
        self.codAvailable = codAvailable
        self.source_product_name = source_product_name
        self.thumbnail = thumbnail
        self.coupon = coupon

class __NewBundled:
    def __init__(self, newProduct, oldProduct):
        self.newProduct = newProduct
        self.oldProduct = oldProduct

def get_mongo_connection(host=options.mongoHost, port=27017):
    global con
    if con is None:
        print "Establishing connection %s host and port %d" %(host,port)
        try:
            con = pymongo.MongoClient(host, port)
        except Exception, e:
            print e
            return None
    return con

def getNetPriceForItem(itemId, source_id, category_id ,price):
    cash_back_type = 0
    cash_back = 0
    try:
        cashBack = getCashBack(itemId, source_id, category_id, mc, options.mongoHost)
        if not cashBack or cashBack.get('cash_back_status')!=1:
            cash_back_type = 0
            cash_back = 0 
            
        else:
            if cashBack['cash_back_type'] in (1,2):
                
                if cashBack.get('maxCashBack') is not None:
                    
                    if cashBack.get('cash_back_type') ==1 and (float(cashBack.get('cash_back'))*price)/100 > cashBack.get('maxCashBack'):
                        cashBack['cash_back_type'] = 2
                        cashBack['cash_back'] = cashBack['maxCashBack']
                    elif cashBack.get('cash_back_type') ==2 and cashBack.get('cash_back') > cashBack.get('maxCashBack'):
                        cashBack['cash_back'] = cashBack['maxCashBack']
                    else:
                        pass
                
                
                
                cash_back_type = cashBack['cash_back_type']
                cash_back = float(cashBack['cash_back'])
    except Exception as cashBackEx:
        pass
    
    if cash_back_type ==1:
        return (price - float(cash_back)*price/100)
    elif cash_back_type ==2:
        return (price - cash_back)
    else:
        return price


def getSoupObject(url):
    print "Getting soup object for"
    print url
    global RETRY_COUNT
    RETRY_COUNT = 1 
    while RETRY_COUNT < 10:
        try:
            soup = None
            request = urllib2.Request(url, headers=headers)
            response = urllib2.urlopen(request)   
            response_data = response.read()
            response.close()
            try:
                page=response_data.decode("utf-8")
                soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
            except:
                print traceback.print_exc()
                soup = BeautifulSoup(response_data,convertEntities=BeautifulSoup.HTML_ENTITIES)
            if soup is None:
                raise
            return soup
        except Exception as e:
            traceback.print_exc()
            print "Retrying"
            RETRY_COUNT = RETRY_COUNT + 1

        
def scrapeBestSellers():
    global bestSellers
    global exceptionList
    bestSellers = []
    rank = 0
    page = 1
    while (True):
        url = (baseUrl)%(page,page-1)
        soup = getSoupObject(url)
        productDivs = soup.findAll('div',{'class':'pd-list-cont'})
        if productDivs is None or len(productDivs)==0:
            return
        for productDiv in productDivs:
            rank = rank + 1
            info_tag =  productDiv.find('a')
            link = info_tag['href']
            scin = info_tag['data-id'].strip()
            print link
            print scin
            productName = productDiv.find('div',{'class':'pdt-name'}).string
            try:
                productInfo = sc.read(link)
            except Exception as e:
                traceback.print_exc()
                continue
            product = list(get_mongo_connection().Catalog.MasterData.find({'source_id':5,'identifier':scin}))
            if len(product) > 0:
                if product[0].get('ignorePricing') ==1:
                    continue
                if productInfo['inStock'] ==1:
                    netPriceAfterCashBack = getNetPriceForItem(product[0]['_id'], SOURCE_MAP.get('SHOPCLUES.COM'), product[0]['category_id'], productInfo['price'])
                    get_mongo_connection().Catalog.MasterData.update({'_id':product[0]['_id']},{"$set":{'rank':rank, 'available_price':productInfo['price'], \
                                                                                                            'in_stock':productInfo['inStock'], 'codAvailable':productInfo['isCod'], \
                                                                                                            'coupon':productInfo['coupon'], 'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now())}})
                    get_mongo_connection().Catalog.Deals.update({'_id':product[0]['_id']}, {'$set' : {'rank':rank,'available_price':productInfo['price'] , 'in_stock':productInfo['inStock'],'codAvailable':productInfo['isCod'],'netPriceAfterCashBack':netPriceAfterCashBack}})
                else:
                    netPriceAfterCashBack = getNetPriceForItem(product[0]['_id'], SOURCE_MAP.get('SHOPCLUES.COM'), product[0]['category_id'], product[0]['available_price'])
                    get_mongo_connection().Catalog.MasterData.update({'_id':product[0]['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':0,'priceUpdatedOn':to_java_date(datetime.now())}})
                    get_mongo_connection().Catalog.Deals.update({'_id':product[0]['_id']}, {'$set' : {'in_stock':0,'netPriceAfterCashBack':netPriceAfterCashBack}})
        
                try:
                    recomputeDeal(product[0])
                except:
                    print "Unable to compute deal for %s"%(product[0]['skuBundleId'])
                            
            else:
                #Lets bundle product by finding similar url pattern
                uri = link.replace('http://m.shopclues.com','').replace(".html","")
                try:
                    int(uri[uri.rfind('-')+1:])
                    uri =  uri[:uri.rfind('-')]
                except:
                    pass
                product = list(get_mongo_connection().Catalog.MasterData.find({'source_id':5,'marketPlaceUrl':{'$regex': uri}}))
                toBundle = __ProductInfo(scin, rank, link, productInfo['price'], productInfo['inStock'],productInfo['isCod'], productName, productInfo['thumbnail'] ,productInfo['coupon'])
                if len(product) > 0:
                    bundleNewProduct(product[0], toBundle)
                    try:
                        recomputeDeal(product[0])
                    except:
                        print "Unable to compute deal for %s"%(product[0]['skuBundleId'])
                else:
                    exceptionList.append(toBundle)
        page = page+1

def populateNegativeDeals():
    negativeDeals = get_mongo_connection().Catalog.NegativeDeals.find().distinct('sku')
    mc.set("negative_deals", negativeDeals, 600)

#def recomputePoints(item, deal):
#    try:
#        if item.get('available_price') == deal['available_price']:
#            print "No need to compute points for %d , as price is still same" %(item['_id'])
#            raise
#        nlcPoints = getNlcPoints(item, deal['minNlc'], deal['maxNlc'], deal['available_price'])
#    except:
#        print traceback.print_exc()
#        nlcPoints = deal['nlcPoints']
#        
#    
#    bundleDealPoints = list(get_mongo_connection().Catalog.DealPoints.find({'skuBundleId':item['skuBundleId'],'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())}}))
#    if len(bundleDealPoints) > 0:
#        item['manualDealThresholdPrice'] = bundleDealPoints[0]['dealThresholdPrice']
#        dealPoints = bundleDealPoints[0]['dealPoints']
#    else:
#        dealPoints = 0
#        item['manualDealThresholdPrice'] = None    
#    
#    get_mongo_connection().Catalog.Deals.update({'_id':deal['_id']},{"$set":{'totalPoints':deal['totalPoints'] - deal['nlcPoints'] + nlcPoints - deal['dealPoints'] +dealPoints , 'nlcPoints': nlcPoints, 'dealPoints': dealPoints, 'manualDealThresholdPrice': item['manualDealThresholdPrice']}})

    
def recomputeDeal(item):
    """Lets recompute deal for this bundle"""
    print "Recomputing for bundleId %d" %(item.get('skuBundleId'))
    skuBundleId = item['skuBundleId']
    
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('netPriceAfterCashBack',pymongo.ASCENDING)]))
    bestPrice = float("inf")
    bestOne = None
    toUpdate = []
    prepaidBestPrice = float("inf")
    prepaidBestOne = None
    for similarItem in similarItems:
        if similarItem['codAvailable'] ==1:
            if mc.get("negative_deals") is None:
                populateNegativeDeals()
            if similarItem['in_stock'] == 0  or similarItem['_id'] in mc.get("negative_deals"):
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
                continue
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
                continue
            if similarItem.get('netPriceAfterCashBack') < bestPrice:
                bestOne = similarItem
                bestPrice = similarItem.get('netPriceAfterCashBack')
            elif similarItem.get('netPriceAfterCashBack') == bestPrice:
                
                try:
                    if (DEAL_PRIORITY.index(int(similarItem['source_id'])) > DEAL_PRIORITY.index(int(bestOne['source_id']))):
                        continue
                except:
                    traceback.print_exc()
                
                bestOne = similarItem
                bestPrice = similarItem.get('netPriceAfterCashBack')
            else:
                pass
        else:
            if mc.get("negative_deals") is None:
                populateNegativeDeals()
            if similarItem['in_stock'] == 0  or similarItem['_id'] in mc.get("negative_deals"):
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
                continue
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
                continue
            if similarItem.get('netPriceAfterCashBack') < prepaidBestPrice:
                prepaidBestOne = similarItem
                prepaidBestPrice = similarItem.get('netPriceAfterCashBack')
            elif similarItem.get('netPriceAfterCashBack') == prepaidBestPrice:
                
                try:
                    if (DEAL_PRIORITY.index(int(similarItem['source_id'])) > DEAL_PRIORITY.index(int(prepaidBestOne['source_id']))):
                        continue
                except:
                    traceback.print_exc()
                
                prepaidBestOne = similarItem
                prepaidBestPrice = similarItem.get('netPriceAfterCashBack')
            else:
                pass
    if bestOne is not None or prepaidBestOne is not None:
        for similarItem in similarItems:
            toUpdate.append(similarItem['_id'])
        if bestOne is not None:
            toUpdate.remove(bestOne['_id'])
            get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1,'prepaidDeal':0 }})
        if prepaidBestOne is not None:
            if bestOne is not None:
                if prepaidBestOne.get('netPriceAfterCashBack') < bestOne.get('netPriceAfterCashBack'): 
                    toUpdate.remove(prepaidBestOne['_id'])
                    get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
            else:
                toUpdate.remove(prepaidBestOne['_id'])
                get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
    if len(toUpdate) > 0:
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0,'prepaidDeal':0 }},upsert=False, multi=True)



          
def bundleNewProduct(existingProduct, toBundle):
    global bundledProducts
    global exceptionList
    print "Adding new product"
    try:
        max_id = list(get_mongo_connection().Catalog.MasterData.find().sort([('_id',pymongo.DESCENDING)]).limit(1))
        existingProduct['_id'] = max_id[0]['_id'] + 1
        existingProduct['addedOn'] = to_java_date(datetime.now())
        existingProduct['available_price'] = toBundle.available_price
        existingProduct['updatedOn'] = to_java_date(datetime.now())
        existingProduct['codAvailable'] = toBundle.codAvailable
        existingProduct['coupon'] = str(toBundle.coupon)
        existingProduct['identifier'] = str(toBundle.identifier)
        existingProduct['in_stock'] = toBundle.in_stock
        existingProduct['marketPlaceUrl'] = toBundle.url
        existingProduct['rank'] = toBundle.rank
        existingProduct['source_product_name'] = toBundle.source_product_name
        existingProduct['url'] = toBundle.url
        existingProduct['showVideo'] = 0
        existingProduct['shippingCost'] = 0
        existingProduct['quantity'] = 1
        existingProduct['videoLink'] = ""
        existingProduct['showNetPrice'] = 0
        get_mongo_connection().Catalog.MasterData.insert(existingProduct)
        newBundled = __NewBundled(toBundle, existingProduct)
        bundledProducts.append(newBundled)
        return {1:'Data added successfully.'}
    except Exception as e:
        print e
        exceptionList.append(toBundle)
        return {0:'Unable to add data.'}

def sendMail():
    message="""<html>
            <body>
            <h3>ShopClues Best Sellers Auto Bundled</h3>
            <table border="1" style="width:100%;">
            <thead>
            <tr>
            <th>Item Id</th>
            <th>Identifier</th>
            <th>Rank</th>
            <th>Product Name</th>
            <th>Bundle Id</th>
            <th>Bundled with Brand</th>
            <th>Bundled with Product Name</th>
            <th>Available_price</th>
            <th>In Stock</th>
            <th>Coupon</th>
            <th>COD Available</th>
            </tr></thead>
            <tbody>"""
    for bundledProduct in bundledProducts:
        newProduct = bundledProduct.newProduct
        oldProduct = bundledProduct.oldProduct
        message+="""<tr>
        <td style="text-align:center">"""+str(oldProduct.get('_id'))+"""</td>
        <td style="text-align:center">"""+oldProduct.get('identifier')+"""</td>
        <td style="text-align:center">"""+str(oldProduct.get('rank'))+"""</td>
        <td style="text-align:center">"""+(oldProduct.get('source_product_name'))+"""</td>
        <td style="text-align:center">"""+str(oldProduct.get('skuBundleId'))+"""</td>
        <td style="text-align:center">"""+(oldProduct.get('brand'))+"""</td>
        <td style="text-align:center">"""+(oldProduct.get('product_name'))+"""</td>
        <td style="text-align:center">"""+str(oldProduct.get('available_price'))+"""</td>
        <td style="text-align:center">"""+str(oldProduct.get('in_stock'))+"""</td>
        <td style="text-align:center">"""+str(oldProduct.get('coupon'))+"""</td>
        <td style="text-align:center">"""+str(oldProduct.get('codAvailable'))+"""</td>
        </tr>"""
    message+="""</tbody></table><h3>Items not bundled</h3><table border="1" style="width:100%;">
    <tr>
    <th>Identifier</th>
    <th>Rank</th>
    <th>Product Name</th>
    <th>Url</th>
    <th>Available Price</th>
    <th>In Stock</th>
    <th>COD Available</th>
    <th>Coupon</th>
    <th>Thumbnail</th>
    </tr></thead>
    <tbody>"""
    for exceptionItem in exceptionList:
        message+="""<tr>
        <td style="text-align:center">"""+str(exceptionItem.identifier)+"""</td>
        <td style="text-align:center">"""+str(exceptionItem.rank)+"""</td>
        <td style="text-align:center">"""+(exceptionItem.source_product_name)+"""</td>
        <td style="text-align:center">"""+(exceptionItem.url)+"""</td>
        <td style="text-align:center">"""+str(exceptionItem.available_price)+"""</td>
        <td style="text-align:center">"""+str(exceptionItem.in_stock)+"""</td>
        <td style="text-align:center">"""+str(exceptionItem.codAvailable)+"""</td>
        <td style="text-align:center">"""+str(exceptionItem.coupon)+"""</td>
        <td style="text-align:left">"""+(exceptionItem.thumbnail)+"""</td>
        </tr>"""
    message+="""</tbody></table></body></html>"""
    print message
    encoding = chardet.detect(message)
    try:
        message = message.decode(encoding.get('encoding'))
    except:
        pass
    #recipients = ['kshitij.sood@saholic.com']
    recipients = ['rajneesh.arora@saholic.com','kshitij.sood@saholic.com','chaitnaya.vats@saholic.com','ritesh.chauhan@saholic.com','khushal.bhatia@saholic.com']
    msg = MIMEMultipart()
    msg['Subject'] = "Shopclues Best Sellers" + ' - ' + str(datetime.now())
    msg['From'] = ""
    msg['To'] = ",".join(recipients)
    msg.preamble = "Shopclues Best Sellers" + ' - ' + str(datetime.now())
    html_msg = MIMEText(message, 'html')
    msg.attach(html_msg)
    
    smtpServer = smtplib.SMTP('localhost')
    smtpServer.set_debuglevel(1)
    sender = 'dtr@shop2020.in'
    try:
        smtpServer.sendmail(sender, recipients, msg.as_string())
        print "Successfully sent email"
    except:
        traceback.print_exc()
        print "Error: unable to send email."

def resetRanks():
    get_mongo_connection().Catalog.MasterData.update({'rank':{'$gt':0},'source_id':5},{'$set' : {'rank':0,'updatedOn':to_java_date(now)}}, multi=True)

def main():
    if options.reset == 'True':
        resetRanks()
    scrapeBestSellers()
    if len(bundledProducts)>0 or len(exceptionList) > 0:
        sendMail()
    else:
        "print nothing to send"
        
if __name__=='__main__':
    main()