Subversion Repositories SmartDukaan

Rev

Rev 22736 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

from dtr.utils.utils import to_java_date, fetchResponseUsingProxy, get_mongo_connection, CATEGORY_MAP
from datetime import datetime
import optparse
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import json
import chardet
from shop2020.utils.EmailAttachmentSender import get_attachment_part
from shop2020.utils import EmailAttachmentSender
import re


headers = {
           'Browser-Name': 'Chrome',
           'User-Agent': 'Mozilla/5.0 (Linux; Android 5.1.1; A0001 Build/LMY48B; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/43.0.2357.121 Mobile Safari/537.36 FKUA/Retail/550900/Android/Mobile (OnePlus/A0001)',
           'Host': 'mobileapi.flipkart.net'
        }

con = None
parser = optparse.OptionParser()
parser.add_option("-m", "--m", dest="mongoHost",
                      default="localhost",
                      type="string", help="The HOST where the mongo server is running",
                      metavar="mongo_host")

(options, args) = parser.parse_args()

bestSellers = []
now = datetime.now()
exceptionList = []

xstr = lambda s: s or ""

class __RankInfo:
    
    def __init__(self, identifier, rank, category, title, url):
        self.identifier = identifier
        self.rank  = rank
        self.category = category
        self.title = title
        self.url = url


def scrapeBestSellerMobiles():
    global bestSellers
    rank = 1
    for i in range(0,100,10):
        url = "http://mobileapi.flipkart.net/3/discover/getSearch?store=tyy/4io&start=%d&count=10"%(i)
        print url
        response_data = fetchResponseUsingProxy(url, headers, livePricing=None, proxy=False, flipkart=False)
        input_json = json.loads(response_data)
        for identifier, data in (input_json['RESPONSE']['product']).iteritems():
            t_title = data['value']['titles']['title']
            t_subtitle = data['value']['titles']['subtitle']
            title = (xstr(t_title) + " " + xstr(t_subtitle)).strip()
            print title
            url =  (data['value']['smartUrl']).replace("http://dl.flipkart.com/dl", "http://www.flipkart.com")
            r_info = __RankInfo(identifier, rank, 3, title, url)
            bestSellers.append(r_info)
            rank = rank + 1
            

def scrapeBestSellerTablets():
    global bestSellers
    rank = 1
    for i in range(0,100,10):
        url = "http://mobileapi.flipkart.net/3/discover/getSearch?store=tyy/hry&start=%d&count=10"%(i)
        print url
        response_data = fetchResponseUsingProxy(url, headers, livePricing=None, proxy=False, flipkart=False)
        input_json = json.loads(response_data)
        for identifier, data in (input_json['RESPONSE']['product']).iteritems():
            t_title = data['value']['titles']['title']
            t_subtitle = data['value']['titles']['subtitle']
            title = (xstr(t_title) + " " + xstr(t_subtitle)).strip()
            print title
            url =  (data['value']['smartUrl']).replace("http://dl.flipkart.com/dl", "http://www.flipkart.com")
            print url
            print identifier
            print "=================="
            r_info = __RankInfo(identifier, rank, 5, title, url)
            bestSellers.append(r_info)
            rank = rank + 1

def commitBestSellers():
    global exceptionList
    for x in bestSellers:
        col = get_mongo_connection(host=options.mongoHost).Catalog.MasterData.find({'identifier':x.identifier.strip()})
        if len(list(col)) == 0:
            exceptionList.append(x)
        else:
            get_mongo_connection(host=options.mongoHost).Catalog.MasterData.update({'identifier':x.identifier.strip()}, {'$set' : {'rank':x.rank,'updatedOn':to_java_date(now)}}, multi=True)

def resetRanks(category):
    get_mongo_connection(host=options.mongoHost).Catalog.MasterData.update({'category_id':category}, {'$set' : {'rank':0,'updatedOn':to_java_date(now)}}, multi=True)

def sendEmail():
    message="""<html>
            <body>
            <h3>Flipkart Best Sellers not in master</h3>
            <table border="1" style="width:100%;">
            <thead>
            <tr><th>Identifier</th>
            <th>Title</th>
            <th>Category</th>
            <th>Rank</th>
            <th>URL</th>
            </tr></thead>
            <tbody>"""
    for item in exceptionList:
        message+="""<tr>
        <td style="text-align:center">"""+(item.identifier)+"""</td>
        <td style="text-align:center">"""+(item.title)+"""</td>
        <td style="text-align:center">"""+(CATEGORY_MAP.get(item.category))+"""</td>
        <td style="text-align:center">"""+str(item.rank)+"""</td>
        <td style="text-align:center">"""+(item.url)+"""</td>
        </tr>"""
    message+="""</tbody></table></body></html>"""
    message = sanitizeUnicode(message)
    print message
    #recipients = ['kshitij.sood@saholic.com']
    recipients = ['ritesh.chauhan@saholic.com']
    EmailAttachmentSender.mail_send_grid("dtr@profitmandi.com","apikey", "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", recipients, "Flipkart Best Sellers",message ,[],[],[])              
     
    
def sanitizeUnicode(unicodeText):
    #remove unicode characters
    unicodeText = re.sub(r'[^\x00-\x7F]+','', unicodeText)
    #remove whitespaces and strip
    unicodeText = re.sub(r'[^\S]+',' ', unicodeText)
    return unicodeText.strip().encode('utf-8', 'ignore')

def main():
    scrapeBestSellerMobiles()
    resetRanks(3)
    scrapeBestSellerTablets()
    resetRanks(5)
    commitBestSellers()
    sendEmail()
    
if __name__=='__main__':
    main()