Subversion Repositories SmartDukaan

Rev

Rev 21135 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13754 kshitij.so 1
import urllib2
2
import simplejson as json
3
import pymongo
13828 kshitij.so 4
from dtr.utils.utils import to_java_date
5
from datetime import datetime
14257 kshitij.so 6
import optparse
14379 kshitij.so 7
import smtplib
8
from email.mime.text import MIMEText
9
from email.mime.multipart import MIMEMultipart
15829 kshitij.so 10
from BeautifulSoup import BeautifulSoup
11
from dtr.utils.utils import ungzipResponse
12
import json
20343 kshitij.so 13
import chardet
21135 kshitij.so 14
from shop2020.utils.EmailAttachmentSender import get_attachment_part
15
from shop2020.utils import EmailAttachmentSender
13754 kshitij.so 16
 
14257 kshitij.so 17
con = None
18
parser = optparse.OptionParser()
19
parser.add_option("-m", "--m", dest="mongoHost",
20
                      default="localhost",
21
                      type="string", help="The HOST where the mongo server is running",
22
                      metavar="mongo_host")
23
 
24
(options, args) = parser.parse_args()
25
 
14379 kshitij.so 26
exceptionList = []
15829 kshitij.so 27
noAttributesSupc = []
14379 kshitij.so 28
 
15829 kshitij.so 29
categoryMap = {3:'Mobiles',5:'Tablets'}
30
 
13754 kshitij.so 31
headers = { 
32
           'User-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
33
            'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      
34
            'Accept-Language' : 'en-US,en;q=0.8',                     
15829 kshitij.so 35
            'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
36
            'Accept-Encoding' : 'gzip,deflate,sdch'
13754 kshitij.so 37
        }
38
 
39
bestSellers = []
13828 kshitij.so 40
now = datetime.now()
15088 kshitij.so 41
BASE_URL = "snapdeal.com/"
13754 kshitij.so 42
 
43
class __RankInfo:
44
 
15829 kshitij.so 45
    def __init__(self, identifier, rank, category, product_name, page_url, baseColor, ppid, subAttributes, live):
13754 kshitij.so 46
        self.identifier = identifier
47
        self.rank  = rank
14379 kshitij.so 48
        self.category  =category
15088 kshitij.so 49
        self.product_name = product_name
50
        self.page_url = page_url 
15829 kshitij.so 51
        self.baseColor = baseColor
52
        self.ppid = ppid
53
        self.subAttributes = subAttributes
54
        self.live = live
55
 
56
class __SubAttributes:
57
 
58
    def __init__(self, identifier, color, name, value):
59
        self.identifier = identifier
60
        self.color = color
61
        self.name = name
62
        self.value = value
13754 kshitij.so 63
 
15829 kshitij.so 64
class __Exception:
65
 
66
    def __init__(self, identifier, color, desc, pageurl, rank, category, product_name):
67
        self.identifier = identifier
68
        self.color = color
69
        self.desc = desc
70
        self.pageurl = pageurl
71
        self.rank = rank
72
        self.category = category
73
        self.product_name = product_name
74
 
75
 
76
 
14257 kshitij.so 77
def get_mongo_connection(host=options.mongoHost, port=27017):
13754 kshitij.so 78
    global con
79
    if con is None:
80
        print "Establishing connection %s host and port %d" %(host,port)
81
        try:
82
            con = pymongo.MongoClient(host, port)
83
        except Exception, e:
84
            print e
85
            return None
86
    return con
87
 
15829 kshitij.so 88
def getSoupObject(url):
89
    print "Getting soup object for"
90
    print url
91
    global RETRY_COUNT
92
    RETRY_COUNT = 1 
93
    while RETRY_COUNT < 10:
94
        try:
95
            soup = None
96
            request = urllib2.Request(url,headers=headers)
97
            response = urllib2.urlopen(request)
98
            response_data = ungzipResponse(response)   
99
            response.close()
100
            try:
101
                page=response_data.decode("utf-8")
102
                soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
103
            except:
104
                soup = BeautifulSoup(response_data,convertEntities=BeautifulSoup.HTML_ENTITIES)
105
            if soup is None:
106
                raise
107
            return soup
108
        except Exception as e:
109
            print e
110
            print "Retrying"
111
            RETRY_COUNT = RETRY_COUNT + 1
112
 
113
def fetchSupcDetails(p_url, rank, category):
114
    supcMap = {}
115
    soup = getSoupObject(p_url)
116
    attributes =  soup.find('div',{'id':'attributesJson'}).string
117
    try:
118
        ppid = soup.find('input',{'id':'pppid'})['value']
119
    except:
120
        ppid = p_url[p_url.rfind('/')+1:]
121
    productName = soup.find('input',{'id':'productNamePDP'})['value']
122
    supcMap[ppid] = []
123
    if attributes == "[]":
124
        supc = soup.find('div',{'id':'defaultSupc'}).string
125
        r_info = __RankInfo(supc, rank, category, productName, p_url, "", ppid, [], True)
126
        supcMap[ppid] = [r_info]
127
    p_info = json.loads(attributes)
128
    for product in p_info:
129
        color = product['value']
130
        supc = product['supc']
131
        live = product['live']
132
        r_info = __RankInfo(supc, rank, category, productName, p_url, color, ppid, [], live)
133
        temp = supcMap.get(ppid)
134
        temp.append(r_info)
135
        supcMap[ppid] = temp  
20376 kshitij.so 136
        if product['subAttributes'] is not None:
137
            for subAttribute in product['subAttributes']:
138
                sub_supc = subAttribute['supc']
139
                sub_value = subAttribute['value']
140
                sub_name = subAttribute['name']
141
                subAttr = __SubAttributes(sub_supc, color, sub_name, sub_value)
142
                subAttributes_list = r_info.subAttributes
143
                subAttributes_list.append(subAttr)
15829 kshitij.so 144
    return supcMap
145
 
146
 
147
 
13754 kshitij.so 148
def scrapeBestSellerMobiles():
149
    global bestSellers
15829 kshitij.so 150
    bestSellers = []
13754 kshitij.so 151
    rank = 1
152
    for z in [0,20,40,60,80]:
153
        url = "http://www.snapdeal.com/acors/json/product/get/search/175/%d/20?q=&sort=bstslr&keyword=&clickSrc=&viewType=List&lang=en&snr=false" %(z)
15829 kshitij.so 154
        soup = getSoupObject(url)
20376 kshitij.so 155
        for product in soup.findAll('a',{'class':'dp-widget-link'}):
156
            print product['href']
17390 kshitij.so 157
            try:
20376 kshitij.so 158
                supcMap = fetchSupcDetails(product['href'],rank, 3)
17390 kshitij.so 159
            except:
160
                continue
20376 kshitij.so 161
            print "supcMap ",supcMap
15829 kshitij.so 162
            bestSellers.append(supcMap)
163
            rank = rank + 1
13754 kshitij.so 164
 
165
def scrapeBestSellerTablets():
166
    global bestSellers
167
    bestSellers = []
168
    rank = 1
169
    for z in [0,20,40,60,80]:
170
        url = "http://www.snapdeal.com/acors/json/product/get/search/133/%d/20?sort=bstslr&keyword=&clickSrc=&viewType=List&lang=en&snr=false" %(z)
15829 kshitij.so 171
        soup = getSoupObject(url)
20376 kshitij.so 172
        for product in soup.findAll('a',{'class':'dp-widget-link'}):
173
            print product['href']
174
            try:
175
                supcMap = fetchSupcDetails(product['href'],rank, 5)
176
            except:
177
                continue
178
            print "supcMap ",supcMap
15829 kshitij.so 179
            bestSellers.append(supcMap)
180
            rank = rank + 1
13754 kshitij.so 181
 
182
 
183
def resetRanks(category):
15829 kshitij.so 184
    get_mongo_connection().Catalog.MasterData.update({'rank':{'$gt':0},'source_id':3,'category_id':category},{'$set' : {'rank':0,'updatedOn':to_java_date(now)}}, multi=True)
13754 kshitij.so 185
 
14379 kshitij.so 186
def commitBestSellers(category):
187
    global exceptionList
15829 kshitij.so 188
    for bestSeller in bestSellers:
189
        for mapVal in bestSeller.itervalues():
190
            for v in mapVal:
191
                col = list(get_mongo_connection().Catalog.MasterData.find({'identifier':v.identifier.strip(),'source_id':3}))
192
                if len(col) ==0 and len(v.subAttributes) == 0:
193
                    exObj = __Exception(v.identifier.strip(), v.baseColor, "", v.page_url ,v.rank, category, v.product_name)
194
                    exceptionList.append(exObj)
195
                    continue
196
                print v.identifier
197
                print v.rank
198
                get_mongo_connection().Catalog.MasterData.update({'identifier':v.identifier,'source_id':3},{'$set' : {'rank':v.rank,'updatedOn':to_java_date(now)}}, multi=True)
199
                for subAttr in v.subAttributes:
200
                    print "Inside subattr"
201
                    print vars(subAttr)
202
                    col = list(get_mongo_connection().Catalog.MasterData.find({'identifier':subAttr.identifier.strip(),'source_id':3}))
203
                    if len(col) ==0:
204
                        exObj = __Exception(subAttr.identifier.strip(), subAttr.color, subAttr.name+" "+subAttr.value, v.page_url ,v.rank, category, v.product_name)
205
                        exceptionList.append(exObj)
206
                    else:
207
                        print v.identifier
208
                        print v.rank
209
                        get_mongo_connection().Catalog.MasterData.update({'identifier':subAttr.identifier,'source_id':3},{'$set' : {'rank':v.rank,'updatedOn':to_java_date(now)}}, multi=True)
210
    print exceptionList
13754 kshitij.so 211
 
14379 kshitij.so 212
def sendEmail():
213
    message="""<html>
214
            <body>
215
            <h3>Snapdeal Best Sellers not in master</h3>
216
            <table border="1" style="width:100%;">
217
            <thead>
218
            <tr><th>Identifier</th>
219
            <th>Category</th>
220
            <th>Rank</th>
15088 kshitij.so 221
            <th>Product Name</th>
15829 kshitij.so 222
            <th>Color</th>
223
            <th>Description</th>
15088 kshitij.so 224
            <th>Page Url</th>
14379 kshitij.so 225
            </tr></thead>
226
            <tbody>"""
227
    for item in exceptionList:
20343 kshitij.so 228
        encoding =  chardet.detect(item.pageurl)
229
        try:
20344 kshitij.so 230
            message+="""<tr>
231
            <td style="text-align:center">"""+(item.identifier)+"""</td>
232
            <td style="text-align:center">"""+(categoryMap.get(item.category))+"""</td>
233
            <td style="text-align:center">"""+str(item.rank)+"""</td>
234
            <td style="text-align:center">"""+str(item.product_name)+"""</td>
235
            <td style="text-align:center">"""+item.color+"""</td>
236
            <td style="text-align:center">"""+item.desc+"""</td>
237
            <td style="text-align:center">"""+item.pageurl+"""</td>
238
            </tr>"""
20343 kshitij.so 239
        except:
20344 kshitij.so 240
            pass
14379 kshitij.so 241
    message+="""</tbody></table></body></html>"""
242
    print message
243
    #recipients = ['kshitij.sood@saholic.com']
21135 kshitij.so 244
    recipients = ['kshitij.sood@saholic.com','ritesh.chauhan@saholic.com','aishwarya.singh@saholic.com']
23839 amit.gupta 245
    EmailAttachmentSender.mail_send_grid("dtr@smartdukaan.com","apikey", "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", recipients, "Snapdeal Best Sellers",message ,[],[],[])              
14379 kshitij.so 246
 
247
 
13754 kshitij.so 248
def main():
15829 kshitij.so 249
    import time
13754 kshitij.so 250
    scrapeBestSellerMobiles()
251
    if len(bestSellers) > 0:
15829 kshitij.so 252
        resetRanks(3)
253
        commitBestSellers(3)
13754 kshitij.so 254
    scrapeBestSellerTablets()
255
    if len(bestSellers) > 0:
15829 kshitij.so 256
        resetRanks(5)
257
        commitBestSellers(5)
18202 kshitij.so 258
    print bestSellers
14379 kshitij.so 259
    sendEmail()
13754 kshitij.so 260
 
261
if __name__=='__main__':
262
    main()