Subversion Repositories SmartDukaan

Rev

Rev 15826 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
12256 kshitij.so 1
from elixir import session
12268 kshitij.so 2
from shop2020.config.client.ConfigClient import ConfigClient
12256 kshitij.so 3
from sqlalchemy.sql import asc
4
from sqlalchemy.sql.expression import or_
5
from shop2020.utils.daemon import Daemon
6
import optparse
7
import sys
8
import mechanize
9
import time
10
from shop2020.model.v1.catalog.impl import DataService
12297 kshitij.so 11
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
12272 kshitij.so 12
import gc 
15825 kshitij.so 13
from operator import itemgetter
12256 kshitij.so 14
 
12268 kshitij.so 15
config_client = ConfigClient()
16
host = config_client.get_property('staging_hostname')
17
DataService.initialize(db_hostname=host)
12256 kshitij.so 18
 
15498 kshitij.so 19
headers = { 
20
           'User-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
21
            'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      
22
            'Accept-Language' : 'en-US,en;q=0.8',                     
23
            'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
24
        }
25
 
12256 kshitij.so 26
class CompetitorScraping(Daemon):
27
    def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
28
        Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
29
 
30
    def run(self):
31
        start()
32
 
33
def start():
34
    try:
35
        while True:
36
            requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
37
            if requests ==[] or requests is None:
38
                print "No new request to process, sleeeeeeping....."
12281 kshitij.so 39
                close_session()
40
                collected = gc.collect()
12292 kshitij.so 41
                print locals()
42
                print globals()
12281 kshitij.so 43
                print "Garbage collector: collected %d objects." % (collected)
12256 kshitij.so 44
                time.sleep(600)
45
            for request in requests:
46
                fetchDetails(request)
47
                request.isProcessed = True
48
                session.commit()
12314 kshitij.so 49
                sendMail(request)
12256 kshitij.so 50
            close_session()
12279 kshitij.so 51
            collected = gc.collect()
52
            print "Garbage collector: collected %d objects." % (collected)
12292 kshitij.so 53
            print locals()
54
            print globals()
12279 kshitij.so 55
            requests = []
12256 kshitij.so 56
    except Exception as e:
57
        print e
58
        sys.exit(2)
59
 
60
def fetchDetails(request):
12297 kshitij.so 61
    import threading
12256 kshitij.so 62
    items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
63
    print items
64
    snapdeal, flipkart, amazon =[],[],[]
65
    for item in items:
66
        if item.snapdealScraping:
67
            snapdeal.append(item)
68
        if item.flipkartScraping:
69
            flipkart.append(item)
70
        if item.amazonScraping:
71
            amazon.append(item)
72
    threads = []
73
    t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
74
    t1.daemon = True
75
    t1.start()
76
    t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
77
    t2.daemon = True
78
    t2.start()
15493 kshitij.so 79
    t3 = threading.Thread(target=scrapAmazon, args = (amazon,))
12278 kshitij.so 80
    t3.daemon = True
81
    t3.start()
12256 kshitij.so 82
    threads.append(t1)
83
    threads.append(t2)
84
    threads.append(t3)
85
    for th in threads:
86
        th.join()
12284 kshitij.so 87
    br,t1,t2,t3 =None,None,None,None
88
    items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]
12256 kshitij.so 89
 
90
def scrapSnapdeal(snapdealItems):
12297 kshitij.so 91
    import simplejson as json
92
    import urllib2
93
    from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
12256 kshitij.so 94
    for snapdealItem in snapdealItems:
95
        sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
96
        if sdItem is None:
97
            continue
98
        try:
15825 kshitij.so 99
            url="http://www.snapdeal.com/acors/json/v2/gvbps?supc=%s&catUrl=&bn=&catId=175&start=0&count=10000"%(sdItem.supc)
12256 kshitij.so 100
            print url
101
            time.sleep(1)
15498 kshitij.so 102
            req = urllib2.Request(url,headers=headers)
12256 kshitij.so 103
            response = urllib2.urlopen(req)
15826 kshitij.so 104
            vendorInfo = json.load(response)
15825 kshitij.so 105
            response.close()
15496 kshitij.so 106
            print vendorInfo
12256 kshitij.so 107
            lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice   = (0,)*8
108
            lowestSellerName = ''
15825 kshitij.so 109
            sortedVendorsData = sorted(vendorInfo['vendors'], key=itemgetter('sellingPrice'))
110
            for vendor in sortedVendorsData:
12256 kshitij.so 111
                if iterator == 0:
15536 kshitij.so 112
                    lowestSellerName = vendor['vendorDisplayName'].encode('utf-8')
12256 kshitij.so 113
                    try:
114
                        lowestSp = vendor['sellingPriceBefIntCashBack']
115
                    except:
116
                        lowestSp = vendor['sellingPrice']
117
                    lowestOfferPrice = vendor['sellingPrice']
118
                    lowestSellerInventory = vendor['buyableInventory']
119
 
120
                if vendor['vendorDisplayName'] == 'MobilesnMore':
121
                    ourInventory = vendor['buyableInventory']
122
                    try:
123
                        ourSp = vendor['sellingPriceBefIntCashBack']
124
                    except:
125
                        ourSp = vendor['sellingPrice']
126
                    ourOfferPrice = vendor['sellingPrice']
127
                iterator+=1
15497 kshitij.so 128
        except Exception as e:
129
            import traceback
130
            print traceback.print_exc()
12256 kshitij.so 131
            continue
12286 kshitij.so 132
        finally:
12287 kshitij.so 133
            sdItem =None
15496 kshitij.so 134
        print ourSp
135
        print ourOfferPrice
136
        print ourInventory
12256 kshitij.so 137
        snapdealItem.ourSnapdealPrice = ourSp
138
        snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
139
        snapdealItem.ourSnapdealInventory = ourInventory
140
        snapdealItem.lowestSnapdealPrice = lowestSp
141
        snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
142
        snapdealItem.lowestSnapdealSeller = lowestSellerName 
12314 kshitij.so 143
        snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
144
    snapdealItems[:]=[]  
12256 kshitij.so 145
 
146
def scrapFlipkart(flipkartItems):
15520 kshitij.so 147
    from shop2020.model.v1.catalog.script import FlipkartProductPageParser
12297 kshitij.so 148
    from operator import itemgetter
149
    import requests as httpRequest
150
    from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
15520 kshitij.so 151
    scraperFk = FlipkartProductPageParser.FlipkartProductPageScraper()
12256 kshitij.so 152
    for flipkartItem in flipkartItems:
153
        fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
154
        if fkItem is None:
155
            continue
156
        try:
15520 kshitij.so 157
            vendorsData = scraperFk.read(fkItem.flipkartSerialNumber.strip())
15536 kshitij.so 158
            lowestSellerName = vendorsData['cheapestSeller'].encode('utf-8')
15520 kshitij.so 159
            lowestSellerSp = vendorsData['lowestSellingPrice']
160
            ourSp = vendorsData['saholicSellingPrice']
12256 kshitij.so 161
        except:
162
            continue
12286 kshitij.so 163
        finally:
12287 kshitij.so 164
            fkItem=None
12256 kshitij.so 165
        try:
166
            request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
167
            r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
168
            print "Inventory info",r.json()
169
            stock_count = int((r.json()['attributeValues'])['stock_count'])
170
        except:
171
            stock_count = 0
172
        finally:
173
                r={}
174
        flipkartItem.ourFlipkartPrice = ourSp
175
        flipkartItem.ourFlipkartInventory = stock_count
176
        flipkartItem.lowestFlipkartPrice = lowestSellerSp
177
        flipkartItem.lowestFlipkartSeller =  lowestSellerName
12283 kshitij.so 178
    scraperFk = None
12314 kshitij.so 179
    flipkartItems[:] =[] 
12256 kshitij.so 180
 
181
 
182
def close_session():
35718 amit 183
    session.remove()
12256 kshitij.so 184
 
15492 kshitij.so 185
def scrapAmazon(amazonItems):
15484 kshitij.so 186
    from shop2020.model.v1.catalog.script import AmazonScraper
187
    from shop2020.model.v1.catalog.impl.DataService import Amazonlisted
12277 kshitij.so 188
    print "Inside amazonitems ",amazonItems
189
    print "len amazon items ",len(amazonItems)
190
    time.sleep(5)
15484 kshitij.so 191
    amScraper = AmazonScraper.AmazonScraper()
12256 kshitij.so 192
    for amazonItem in amazonItems:
15484 kshitij.so 193
        amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)
15746 kshitij.so 194
        if amazon_d_item is None:
195
            continue
15484 kshitij.so 196
        if len(amazon_d_item.asin)==0:
197
            print "No asin found for ",amazonItem.item_id
12256 kshitij.so 198
            continue
15484 kshitij.so 199
        saholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())
15500 kshitij.so 200
        generalUrl = "http://www.amazon.in/gp/offer-listing/%s"%(amazon_d_item.asin.strip())
12256 kshitij.so 201
        try:
15484 kshitij.so 202
            saholicPrice = (amScraper.read(saholicUrl, False))[0]
203
        except:
204
            saholicPrice = 0.0
205
        try:
206
            cheapestSeller = (amScraper.read(generalUrl, True))
207
            cheapestSellerPrice = cheapestSeller[0]
15536 kshitij.so 208
            cheapestSellerName = cheapestSeller[1].encode('utf-8')
15484 kshitij.so 209
        except:
210
            cheapestSellerPrice = 0.0
211
            cheapestSellerName = ""
212
        amazonItem.ourAmazonPrice = saholicPrice
213
        amazonItem.lowestAmazonPrice = cheapestSellerPrice
214
        amazonItem.lowestAmazonSeller = cheapestSellerName 
12314 kshitij.so 215
    amazonItems[:] =[] 
12256 kshitij.so 216
 
217
 
12314 kshitij.so 218
def sendMail(request):
219
    import smtplib
220
    from email.mime.text import MIMEText
221
    from email.mime.multipart import MIMEMultipart
222
    mailServer = smtplib.SMTP("smtp.gmail.com", 587)
223
    mailServer.ehlo()
224
    mailServer.starttls()
225
    mailServer.ehlo()
226
    recipients = []
227
    recipients.append(request.user)
228
    message = "Your Request has been processed.Visit dashboard to check & download report" 
229
    msg = MIMEMultipart()
12329 kshitij.so 230
    msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
12314 kshitij.so 231
    msg['From'] = ""
232
    msg['To'] = ",".join(recipients)
233
    msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
234
    html_msg = MIMEText(message, 'html')
235
    msg.attach(html_msg)
236
    try:
237
        mailServer.login("build@shop2020.in", "cafe@nes")
238
        #mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
239
        mailServer.sendmail("cafe@nes", recipients, msg.as_string())
240
    except Exception as e:
241
        print e
242
        print "Unable to send mail.Lets try with local SMTP."
243
        smtpServer = smtplib.SMTP('localhost')
244
        smtpServer.set_debuglevel(1)
245
        sender = 'build@shop2020.in'
246
        try:
247
            smtpServer.sendmail(sender, recipients, msg.as_string())
248
            print "Successfully sent email"
249
        except:
250
            print "Error: unable to send email."
251
 
12256 kshitij.so 252
 
253
 
254
if __name__ == "__main__":
255
    parser = optparse.OptionParser()
256
    parser.add_option("-l", "--logfile", dest="logfile",
257
                      type="string",
258
                      help="Log all output to LOG_FILE",
259
                      )
260
    parser.add_option("-i", "--pidfile", dest="pidfile",
261
                      type="string",
262
                      help="Write the PID to pidfile")
263
    (options, args) = parser.parse_args()
264
    daemon = CompetitorScraping(options.logfile, options.pidfile)
265
    if len(args) == 0:
266
        daemon.run()
267
    elif len(args) == 1:
268
        if 'start' == args[0]:
269
            daemon.start()
270
        elif 'stop' == args[0]:
271
            daemon.stop()
272
        elif 'restart' == args[0]:
273
            daemon.restart()
274
        else:
275
            print "Unknown command"
276
            sys.exit(2)
277
        sys.exit(0)
278
    else:
279
        print "usage: %s start|stop|restart" % sys.argv[0]
280
        sys.exit(2)