Subversion Repositories SmartDukaan

Rev

Rev 15497 | Rev 15499 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
12256 kshitij.so 1
from elixir import session
12268 kshitij.so 2
from shop2020.config.client.ConfigClient import ConfigClient
12256 kshitij.so 3
from sqlalchemy.sql import asc
4
from sqlalchemy.sql.expression import or_
5
from shop2020.utils.daemon import Daemon
6
import optparse
7
import sys
8
import mechanize
9
import time
10
from shop2020.model.v1.catalog.impl import DataService
12297 kshitij.so 11
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
12272 kshitij.so 12
import gc 
12256 kshitij.so 13
 
12268 kshitij.so 14
config_client = ConfigClient()
15
host = config_client.get_property('staging_hostname')
16
DataService.initialize(db_hostname=host)
12256 kshitij.so 17
 
15498 kshitij.so 18
headers = { 
19
           'User-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
20
            'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      
21
            'Accept-Language' : 'en-US,en;q=0.8',                     
22
            'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
23
        }
24
 
12256 kshitij.so 25
class CompetitorScraping(Daemon):
26
    def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
27
        Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
28
 
29
    def run(self):
30
        start()
31
 
32
def start():
33
    try:
34
        while True:
35
            requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
36
            if requests ==[] or requests is None:
37
                print "No new request to process, sleeeeeeping....."
12281 kshitij.so 38
                close_session()
39
                collected = gc.collect()
12292 kshitij.so 40
                print locals()
41
                print globals()
12281 kshitij.so 42
                print "Garbage collector: collected %d objects." % (collected)
12256 kshitij.so 43
                time.sleep(600)
44
            for request in requests:
45
                fetchDetails(request)
46
                request.isProcessed = True
47
                session.commit()
12314 kshitij.so 48
                sendMail(request)
12256 kshitij.so 49
            close_session()
12279 kshitij.so 50
            collected = gc.collect()
51
            print "Garbage collector: collected %d objects." % (collected)
12292 kshitij.so 52
            print locals()
53
            print globals()
12279 kshitij.so 54
            requests = []
12256 kshitij.so 55
    except Exception as e:
56
        print e
57
        sys.exit(2)
58
 
59
def fetchDetails(request):
12297 kshitij.so 60
    import threading
12256 kshitij.so 61
    items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
62
    print items
63
    snapdeal, flipkart, amazon =[],[],[]
64
    for item in items:
65
        if item.snapdealScraping:
66
            snapdeal.append(item)
67
        if item.flipkartScraping:
68
            flipkart.append(item)
69
        if item.amazonScraping:
70
            amazon.append(item)
71
    threads = []
72
    t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
73
    t1.daemon = True
74
    t1.start()
75
    t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
76
    t2.daemon = True
77
    t2.start()
15493 kshitij.so 78
    t3 = threading.Thread(target=scrapAmazon, args = (amazon,))
12278 kshitij.so 79
    t3.daemon = True
80
    t3.start()
12256 kshitij.so 81
    threads.append(t1)
82
    threads.append(t2)
83
    threads.append(t3)
84
    for th in threads:
85
        th.join()
12284 kshitij.so 86
    br,t1,t2,t3 =None,None,None,None
87
    items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]
12256 kshitij.so 88
 
89
def scrapSnapdeal(snapdealItems):
12297 kshitij.so 90
    import simplejson as json
91
    import urllib2
92
    from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
12256 kshitij.so 93
    for snapdealItem in snapdealItems:
94
        sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
95
        if sdItem is None:
96
            continue
97
        try:
15484 kshitij.so 98
            url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=175&sort=sellingPrice"%(sdItem.supc)
12256 kshitij.so 99
            print url
100
            time.sleep(1)
15498 kshitij.so 101
            req = urllib2.Request(url,headers=headers)
12256 kshitij.so 102
            response = urllib2.urlopen(req)
103
            json_input = response.read()
104
            vendorInfo = json.loads(json_input)
15496 kshitij.so 105
            print vendorInfo
12256 kshitij.so 106
            lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice   = (0,)*8
107
            lowestSellerName = ''
108
            for vendor in vendorInfo:
109
                if iterator == 0:
110
                    lowestSellerName = vendor['vendorDisplayName']
111
                    try:
112
                        lowestSp = vendor['sellingPriceBefIntCashBack']
113
                    except:
114
                        lowestSp = vendor['sellingPrice']
115
                    lowestOfferPrice = vendor['sellingPrice']
116
                    lowestSellerInventory = vendor['buyableInventory']
117
 
118
                if vendor['vendorDisplayName'] == 'MobilesnMore':
119
                    ourInventory = vendor['buyableInventory']
120
                    try:
121
                        ourSp = vendor['sellingPriceBefIntCashBack']
122
                    except:
123
                        ourSp = vendor['sellingPrice']
124
                    ourOfferPrice = vendor['sellingPrice']
125
                iterator+=1
15497 kshitij.so 126
        except Exception as e:
127
            import traceback
128
            print traceback.print_exc()
12256 kshitij.so 129
            continue
12286 kshitij.so 130
        finally:
12287 kshitij.so 131
            sdItem =None
15496 kshitij.so 132
        print  "Item id ",sdItem.item_id
133
        print ourSp
134
        print ourOfferPrice
135
        print ourInventory
12256 kshitij.so 136
        snapdealItem.ourSnapdealPrice = ourSp
137
        snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
138
        snapdealItem.ourSnapdealInventory = ourInventory
139
        snapdealItem.lowestSnapdealPrice = lowestSp
140
        snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
141
        snapdealItem.lowestSnapdealSeller = lowestSellerName 
12314 kshitij.so 142
        snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
143
    snapdealItems[:]=[]  
12256 kshitij.so 144
 
145
def scrapFlipkart(flipkartItems):
12297 kshitij.so 146
    from shop2020.model.v1.catalog.script import FlipkartScraper
147
    from operator import itemgetter
148
    import requests as httpRequest
149
    from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
12276 kshitij.so 150
    scraperFk = FlipkartScraper.FlipkartScraper()
12256 kshitij.so 151
    for flipkartItem in flipkartItems:
152
        fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
153
        if fkItem is None:
154
            continue
155
        try:
156
            url = "http://www.flipkart.com/ps/%s"%(fkItem.flipkartSerialNumber)
157
            vendorsData = scraperFk.read(url)
158
            sortedVendorsData = []
159
            sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))
160
            lowestSellerSp, iterator, ourSp = (0,)*3
161
            lowestSellerName = ''
162
            for data in sortedVendorsData:
163
                if iterator == 0:
164
                    lowestSellerName = data['sellerName']
165
                    lowestSellerSp = data['sellingPrice']
166
 
167
                if data['sellerName'] == 'Saholic':
168
                    ourSp = data['sellingPrice']
169
 
170
                iterator+=1
171
        except:
172
            continue
12286 kshitij.so 173
        finally:
12287 kshitij.so 174
            fkItem=None
12256 kshitij.so 175
        try:
176
            request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
177
            r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
178
            print "Inventory info",r.json()
179
            stock_count = int((r.json()['attributeValues'])['stock_count'])
180
        except:
181
            stock_count = 0
182
        finally:
183
                r={}
184
        flipkartItem.ourFlipkartPrice = ourSp
185
        flipkartItem.ourFlipkartInventory = stock_count
186
        flipkartItem.lowestFlipkartPrice = lowestSellerSp
187
        flipkartItem.lowestFlipkartSeller =  lowestSellerName
12283 kshitij.so 188
    scraperFk = None
12314 kshitij.so 189
    flipkartItems[:] =[] 
12256 kshitij.so 190
 
191
 
192
def close_session():
193
    if session.is_active:
194
        print "session is active. closing it."
195
        session.close()
196
 
15492 kshitij.so 197
def scrapAmazon(amazonItems):
15484 kshitij.so 198
    from shop2020.model.v1.catalog.script import AmazonScraper
199
    from shop2020.model.v1.catalog.impl.DataService import Amazonlisted
12277 kshitij.so 200
    print "Inside amazonitems ",amazonItems
201
    print "len amazon items ",len(amazonItems)
202
    time.sleep(5)
15484 kshitij.so 203
    amScraper = AmazonScraper.AmazonScraper()
12256 kshitij.so 204
    for amazonItem in amazonItems:
15484 kshitij.so 205
        amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)
206
        if len(amazon_d_item.asin)==0:
207
            print "No asin found for ",amazonItem.item_id
12256 kshitij.so 208
            continue
15484 kshitij.so 209
        saholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())
15495 kshitij.so 210
        generalUrl = "www.amazon.in/gp/offer-listing/%s"%(amazon_d_item.asin.strip())
12256 kshitij.so 211
        try:
15484 kshitij.so 212
            saholicPrice = (amScraper.read(saholicUrl, False))[0]
213
        except:
214
            saholicPrice = 0.0
215
        try:
216
            cheapestSeller = (amScraper.read(generalUrl, True))
217
            cheapestSellerPrice = cheapestSeller[0]
218
            cheapestSellerName = cheapestSeller[1]
219
        except:
220
            cheapestSellerPrice = 0.0
221
            cheapestSellerName = ""
222
        amazonItem.ourAmazonPrice = saholicPrice
223
        amazonItem.lowestAmazonPrice = cheapestSellerPrice
224
        amazonItem.lowestAmazonSeller = cheapestSellerName 
12314 kshitij.so 225
    amazonItems[:] =[] 
12256 kshitij.so 226
 
227
 
12314 kshitij.so 228
def sendMail(request):
229
    import smtplib
230
    from email.mime.text import MIMEText
231
    from email.mime.multipart import MIMEMultipart
232
    mailServer = smtplib.SMTP("smtp.gmail.com", 587)
233
    mailServer.ehlo()
234
    mailServer.starttls()
235
    mailServer.ehlo()
236
    recipients = []
237
    recipients.append(request.user)
238
    message = "Your Request has been processed.Visit dashboard to check & download report" 
239
    msg = MIMEMultipart()
12329 kshitij.so 240
    msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
12314 kshitij.so 241
    msg['From'] = ""
242
    msg['To'] = ",".join(recipients)
243
    msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
244
    html_msg = MIMEText(message, 'html')
245
    msg.attach(html_msg)
246
    try:
247
        mailServer.login("build@shop2020.in", "cafe@nes")
248
        #mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
249
        mailServer.sendmail("cafe@nes", recipients, msg.as_string())
250
    except Exception as e:
251
        print e
252
        print "Unable to send mail.Lets try with local SMTP."
253
        smtpServer = smtplib.SMTP('localhost')
254
        smtpServer.set_debuglevel(1)
255
        sender = 'build@shop2020.in'
256
        try:
257
            smtpServer.sendmail(sender, recipients, msg.as_string())
258
            print "Successfully sent email"
259
        except:
260
            print "Error: unable to send email."
261
 
12256 kshitij.so 262
 
263
 
264
if __name__ == "__main__":
265
    parser = optparse.OptionParser()
266
    parser.add_option("-l", "--logfile", dest="logfile",
267
                      type="string",
268
                      help="Log all output to LOG_FILE",
269
                      )
270
    parser.add_option("-i", "--pidfile", dest="pidfile",
271
                      type="string",
272
                      help="Write the PID to pidfile")
273
    (options, args) = parser.parse_args()
274
    daemon = CompetitorScraping(options.logfile, options.pidfile)
275
    if len(args) == 0:
276
        daemon.run()
277
    elif len(args) == 1:
278
        if 'start' == args[0]:
279
            daemon.start()
280
        elif 'stop' == args[0]:
281
            daemon.stop()
282
        elif 'restart' == args[0]:
283
            daemon.restart()
284
        else:
285
            print "Unknown command"
286
            sys.exit(2)
287
        sys.exit(0)
288
    else:
289
        print "usage: %s start|stop|restart" % sys.argv[0]
290
        sys.exit(2)