Subversion Repositories SmartDukaan

Rev

Rev 21136 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13828 kshitij.so 1
import urllib
2
import urllib2
3
from BeautifulSoup import BeautifulSoup
4
import pymongo
5
import re
6
from dtr.utils.utils import to_java_date
7
from datetime import datetime
14257 kshitij.so 8
import optparse
14379 kshitij.so 9
import smtplib
10
from email.mime.text import MIMEText
11
from email.mime.multipart import MIMEMultipart
21135 kshitij.so 12
from shop2020.utils.EmailAttachmentSender import get_attachment_part
13
from shop2020.utils import EmailAttachmentSender
13828 kshitij.so 14
 
15
con = None
14257 kshitij.so 16
parser = optparse.OptionParser()
17
parser.add_option("-m", "--m", dest="mongoHost",
18
                      default="localhost",
19
                      type="string", help="The HOST where the mongo server is running",
20
                      metavar="mongo_host")
21
 
22
(options, args) = parser.parse_args()
23
 
13828 kshitij.so 24
bestSellers = []
25
now = datetime.now()
26
mobUrl = "http://www.saholic.com/mobile-phone/10006"
27
tabUrl = "http://www.saholic.com/all-tablets/10010"
28
 
14379 kshitij.so 29
exceptionList = []
30
 
13828 kshitij.so 31
class __RankInfo:
32
 
14379 kshitij.so 33
    def __init__(self, identifier, rank, category):
13828 kshitij.so 34
        self.identifier = identifier
35
        self.rank  = rank
14379 kshitij.so 36
        self.category = category
13828 kshitij.so 37
 
14257 kshitij.so 38
def get_mongo_connection(host=options.mongoHost, port=27017):
13828 kshitij.so 39
    global con
40
    if con is None:
41
        print "Establishing connection %s host and port %d" %(host,port)
42
        try:
43
            con = pymongo.MongoClient(host, port)
44
        except Exception, e:
45
            print e
46
            return None
47
    return con
48
 
49
def getSoupObject(url,data):
50
    print "Getting soup object for"
51
    global RETRY_COUNT
52
    RETRY_COUNT = 1 
53
    while RETRY_COUNT < 10:
54
        try:
55
            soup = None
56
            request = urllib2.Request(url,data)
57
            request.add_header('Accept','text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
58
            request.add_header('Accept-Language','en-US,en;q=0.8,hi;q=0.6')
59
            request.add_header('Connection','keep-alive')
60
            request.add_header('User-Agent','Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36')
61
 
62
            response = urllib2.urlopen(request)   
63
            response_data = response.read()
64
            response.close()
65
            try:
66
                page=response_data.decode("utf-8")
67
                soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
68
            except:
69
                soup = BeautifulSoup(response_data,convertEntities=BeautifulSoup.HTML_ENTITIES)
70
            if soup is None:
71
                raise
72
            return soup
73
        except Exception as e:
74
            print e
75
            print "Retrying"
76
            RETRY_COUNT = RETRY_COUNT + 1
77
 
78
def scrapeBestSellerMobiles():
79
    global bestSellers
80
    rank = 1
81
    for i in range(1,6):
82
        query_args = { 'fq':'F_50028:In Stock', 'page':i }
83
        data = urllib.urlencode(query_args)
84
        soup = getSoupObject(mobUrl,data)
85
        titleTags = soup.findAll('div',{'class':'title'})
86
        for titleTag in titleTags:
87
            print titleTag.find('a').text
88
            identifier = getSaholicIdentifier(titleTag.find('a')['href'])
14379 kshitij.so 89
            r_info = __RankInfo(str(identifier), rank ,None)
13828 kshitij.so 90
            bestSellers.append(r_info)
91
            rank = rank + 1
92
 
93
def scrapeBestSellerTablets():
94
    global bestSellers
95
    bestSellers = [] 
96
    rank = 1
97
    for i in range(1,6):
98
        query_args = { 'fq':'F_50028:In Stock', 'page':i }
99
        data = urllib.urlencode(query_args)
100
        soup = getSoupObject(tabUrl, data)
101
        titleTags = soup.findAll('div',{'class':'title'})
102
        for titleTag in titleTags:
103
            print titleTag.find('a').text
104
            identifier = getSaholicIdentifier(titleTag.find('a')['href'])
14379 kshitij.so 105
            r_info = __RankInfo(str(identifier), rank, None)
13828 kshitij.so 106
            bestSellers.append(r_info)
107
            rank = rank + 1
108
 
109
def getSaholicIdentifier(url):
110
    return url[url.rfind('-')+len('-'):]
111
 
112
def resetRanks(category):
113
    oldRankedItems = get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0},'source_id':4,'category':category})
114
    for item in oldRankedItems:
115
        print item['_id']
116
        get_mongo_connection().Catalog.MasterData.update({'_id':item['_id']}, {'$set' : {'rank':0,'updatedOn':to_java_date(now)}}, multi=True)
117
 
14379 kshitij.so 118
def commitBestSellers(category):
119
    global exceptionList
13828 kshitij.so 120
    print "Rank",
121
    print '\t',
122
    print 'Identifier'
123
    for x in bestSellers:
124
        print x.rank,
125
        print '\t',
14057 kshitij.so 126
        print x.identifier,
13828 kshitij.so 127
        col = get_mongo_connection().Catalog.MasterData.find({'identifier':x.identifier})
128
        print '\t',
21136 kshitij.so 129
        print "count sku"
14379 kshitij.so 130
        if len(list(col)) == 0:
131
            x.category = category
132
            exceptionList.append(x)
133
        else:
134
            get_mongo_connection().Catalog.MasterData.update({'identifier':x.identifier,'source_id':4}, {'$set' : {'rank':x.rank,'updatedOn':to_java_date(now)}}, multi=True)
13828 kshitij.so 135
 
14379 kshitij.so 136
def sendEmail():
137
    message="""<html>
138
            <body>
139
            <h3>Saholic Best Sellers not in master</h3>
140
            <table border="1" style="width:100%;">
141
            <thead>
142
            <tr><th>Identifier</th>
143
            <th>Category</th>
144
            <th>Rank</th>
145
            </tr></thead>
146
            <tbody>"""
147
    for item in exceptionList:
148
        message+="""<tr>
149
        <td style="text-align:center">"""+(item.identifier)+"""</td>
150
        <td style="text-align:center">"""+(item.category)+"""</td>
151
        <td style="text-align:center">"""+str(item.rank)+"""</td>
152
        </tr>"""
153
    message+="""</tbody></table></body></html>"""
154
    print message
21135 kshitij.so 155
    recipients = ['kshitij.sood@saholic.com','ritesh.chauhan@saholic.com','aishwarya.singh@saholic.com']
23839 amit.gupta 156
    EmailAttachmentSender.mail_send_grid("dtr@smartdukaan.com","apikey", "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", recipients, "Saholic Best Sellers",message ,[],[],[])              
14379 kshitij.so 157
 
21135 kshitij.so 158
 
159
 
160
 
13828 kshitij.so 161
def main():
162
    scrapeBestSellerMobiles()
163
    if len(bestSellers) > 0:
164
        resetRanks('Mobiles')
14379 kshitij.so 165
        commitBestSellers('MOBILES')
13828 kshitij.so 166
    scrapeBestSellerTablets()
167
    if len(bestSellers) > 0:
168
        resetRanks('Tablets')
14379 kshitij.so 169
        commitBestSellers('TABLETS')
170
 
171
    sendEmail()
172
 
13828 kshitij.so 173
if __name__=='__main__':
174
    main()