Subversion Repositories SmartDukaan

Rev

Rev 22737 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
19645 kshitij.so 1
from dtr.utils.utils import to_java_date, fetchResponseUsingProxy, get_mongo_connection, CATEGORY_MAP
13828 kshitij.so 2
from datetime import datetime
14257 kshitij.so 3
import optparse
14379 kshitij.so 4
import smtplib
5
from email.mime.text import MIMEText
6
from email.mime.multipart import MIMEMultipart
19645 kshitij.so 7
import json
20357 kshitij.so 8
import chardet
21135 kshitij.so 9
from shop2020.utils.EmailAttachmentSender import get_attachment_part
10
from shop2020.utils import EmailAttachmentSender
22736 amit.gupta 11
import re
13754 kshitij.so 12
 
19645 kshitij.so 13
 
14
headers = {
15
           'Browser-Name': 'Chrome',
16
           'User-Agent': 'Mozilla/5.0 (Linux; Android 5.1.1; A0001 Build/LMY48B; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/43.0.2357.121 Mobile Safari/537.36 FKUA/Retail/550900/Android/Mobile (OnePlus/A0001)',
17
           'Host': 'mobileapi.flipkart.net'
18
        }
19
 
13754 kshitij.so 20
con = None
14257 kshitij.so 21
parser = optparse.OptionParser()
22
parser.add_option("-m", "--m", dest="mongoHost",
23
                      default="localhost",
24
                      type="string", help="The HOST where the mongo server is running",
25
                      metavar="mongo_host")
26
 
27
(options, args) = parser.parse_args()
28
 
13754 kshitij.so 29
bestSellers = []
13828 kshitij.so 30
now = datetime.now()
14379 kshitij.so 31
exceptionList = []
13754 kshitij.so 32
 
20357 kshitij.so 33
xstr = lambda s: s or ""
19645 kshitij.so 34
 
13754 kshitij.so 35
class __RankInfo:
36
 
19645 kshitij.so 37
    def __init__(self, identifier, rank, category, title, url):
13754 kshitij.so 38
        self.identifier = identifier
39
        self.rank  = rank
14379 kshitij.so 40
        self.category = category
19645 kshitij.so 41
        self.title = title
42
        self.url = url
13754 kshitij.so 43
 
18284 kshitij.so 44
 
13754 kshitij.so 45
def scrapeBestSellerMobiles():
46
    global bestSellers
19645 kshitij.so 47
    rank = 1
48
    for i in range(0,100,10):
49
        url = "http://mobileapi.flipkart.net/3/discover/getSearch?store=tyy/4io&start=%d&count=10"%(i)
50
        print url
51
        response_data = fetchResponseUsingProxy(url, headers, livePricing=None, proxy=False, flipkart=False)
52
        input_json = json.loads(response_data)
53
        for identifier, data in (input_json['RESPONSE']['product']).iteritems():
20357 kshitij.so 54
            t_title = data['value']['titles']['title']
55
            t_subtitle = data['value']['titles']['subtitle']
56
            title = (xstr(t_title) + " " + xstr(t_subtitle)).strip()
57
            print title
19645 kshitij.so 58
            url =  (data['value']['smartUrl']).replace("http://dl.flipkart.com/dl", "http://www.flipkart.com")
59
            r_info = __RankInfo(identifier, rank, 3, title, url)
13754 kshitij.so 60
            bestSellers.append(r_info)
19645 kshitij.so 61
            rank = rank + 1
62
 
13754 kshitij.so 63
 
64
def scrapeBestSellerTablets():
65
    global bestSellers
19645 kshitij.so 66
    rank = 1
67
    for i in range(0,100,10):
68
        url = "http://mobileapi.flipkart.net/3/discover/getSearch?store=tyy/hry&start=%d&count=10"%(i)
69
        print url
70
        response_data = fetchResponseUsingProxy(url, headers, livePricing=None, proxy=False, flipkart=False)
71
        input_json = json.loads(response_data)
72
        for identifier, data in (input_json['RESPONSE']['product']).iteritems():
20357 kshitij.so 73
            t_title = data['value']['titles']['title']
74
            t_subtitle = data['value']['titles']['subtitle']
75
            title = (xstr(t_title) + " " + xstr(t_subtitle)).strip()
76
            print title
19645 kshitij.so 77
            url =  (data['value']['smartUrl']).replace("http://dl.flipkart.com/dl", "http://www.flipkart.com")
20358 kshitij.so 78
            print url
79
            print identifier
80
            print "=================="
19645 kshitij.so 81
            r_info = __RankInfo(identifier, rank, 5, title, url)
13754 kshitij.so 82
            bestSellers.append(r_info)
19645 kshitij.so 83
            rank = rank + 1
13754 kshitij.so 84
 
19648 kshitij.so 85
def commitBestSellers():
14379 kshitij.so 86
    global exceptionList
13754 kshitij.so 87
    for x in bestSellers:
19648 kshitij.so 88
        col = get_mongo_connection(host=options.mongoHost).Catalog.MasterData.find({'identifier':x.identifier.strip()})
14379 kshitij.so 89
        if len(list(col)) == 0:
90
            exceptionList.append(x)
91
        else:
19648 kshitij.so 92
            get_mongo_connection(host=options.mongoHost).Catalog.MasterData.update({'identifier':x.identifier.strip()}, {'$set' : {'rank':x.rank,'updatedOn':to_java_date(now)}}, multi=True)
13754 kshitij.so 93
 
94
def resetRanks(category):
19647 kshitij.so 95
    get_mongo_connection(host=options.mongoHost).Catalog.MasterData.update({'category_id':category}, {'$set' : {'rank':0,'updatedOn':to_java_date(now)}}, multi=True)
14379 kshitij.so 96
 
97
def sendEmail():
98
    message="""<html>
99
            <body>
100
            <h3>Flipkart Best Sellers not in master</h3>
101
            <table border="1" style="width:100%;">
102
            <thead>
103
            <tr><th>Identifier</th>
19645 kshitij.so 104
            <th>Title</th>
14379 kshitij.so 105
            <th>Category</th>
106
            <th>Rank</th>
19645 kshitij.so 107
            <th>URL</th>
14379 kshitij.so 108
            </tr></thead>
109
            <tbody>"""
110
    for item in exceptionList:
111
        message+="""<tr>
112
        <td style="text-align:center">"""+(item.identifier)+"""</td>
19645 kshitij.so 113
        <td style="text-align:center">"""+(item.title)+"""</td>
114
        <td style="text-align:center">"""+(CATEGORY_MAP.get(item.category))+"""</td>
14379 kshitij.so 115
        <td style="text-align:center">"""+str(item.rank)+"""</td>
19645 kshitij.so 116
        <td style="text-align:center">"""+(item.url)+"""</td>
14379 kshitij.so 117
        </tr>"""
118
    message+="""</tbody></table></body></html>"""
22736 amit.gupta 119
    message = sanitizeUnicode(message)
120
    print message
19645 kshitij.so 121
    #recipients = ['kshitij.sood@saholic.com']
22737 amit.gupta 122
    recipients = ['ritesh.chauhan@saholic.com']
23839 amit.gupta 123
    EmailAttachmentSender.mail_send_grid("dtr@smartdukaan.com","apikey", "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", recipients, "Flipkart Best Sellers",message ,[],[],[])              
21135 kshitij.so 124
 
14379 kshitij.so 125
 
22736 amit.gupta 126
def sanitizeUnicode(unicodeText):
127
    #remove unicode characters
128
    unicodeText = re.sub(r'[^\x00-\x7F]+','', unicodeText)
129
    #remove whitespaces and strip
130
    unicodeText = re.sub(r'[^\S]+',' ', unicodeText)
131
    return unicodeText.strip().encode('utf-8', 'ignore')
132
 
13754 kshitij.so 133
def main():
134
    scrapeBestSellerMobiles()
19648 kshitij.so 135
    resetRanks(3)
13754 kshitij.so 136
    scrapeBestSellerTablets()
19648 kshitij.so 137
    resetRanks(5)
138
    commitBestSellers()
14379 kshitij.so 139
    sendEmail()
13754 kshitij.so 140
 
141
if __name__=='__main__':
142
    main()