Subversion Repositories SmartDukaan

Rev

Rev 15499 | Rev 15520 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
12256 kshitij.so 1
from elixir import session
12268 kshitij.so 2
from shop2020.config.client.ConfigClient import ConfigClient
12256 kshitij.so 3
from sqlalchemy.sql import asc
4
from sqlalchemy.sql.expression import or_
5
from shop2020.utils.daemon import Daemon
6
import optparse
7
import sys
8
import mechanize
9
import time
10
from shop2020.model.v1.catalog.impl import DataService
12297 kshitij.so 11
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
12272 kshitij.so 12
import gc 
12256 kshitij.so 13
 
12268 kshitij.so 14
config_client = ConfigClient()
15
host = config_client.get_property('staging_hostname')
16
DataService.initialize(db_hostname=host)
12256 kshitij.so 17
 
15498 kshitij.so 18
headers = { 
19
           'User-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
20
            'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      
21
            'Accept-Language' : 'en-US,en;q=0.8',                     
22
            'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
23
        }
24
 
12256 kshitij.so 25
class CompetitorScraping(Daemon):
26
    def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
27
        Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
28
 
29
    def run(self):
30
        start()
31
 
32
def start():
33
    try:
34
        while True:
35
            requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
36
            if requests ==[] or requests is None:
37
                print "No new request to process, sleeeeeeping....."
12281 kshitij.so 38
                close_session()
39
                collected = gc.collect()
12292 kshitij.so 40
                print locals()
41
                print globals()
12281 kshitij.so 42
                print "Garbage collector: collected %d objects." % (collected)
12256 kshitij.so 43
                time.sleep(600)
44
            for request in requests:
45
                fetchDetails(request)
46
                request.isProcessed = True
47
                session.commit()
12314 kshitij.so 48
                sendMail(request)
12256 kshitij.so 49
            close_session()
12279 kshitij.so 50
            collected = gc.collect()
51
            print "Garbage collector: collected %d objects." % (collected)
12292 kshitij.so 52
            print locals()
53
            print globals()
12279 kshitij.so 54
            requests = []
12256 kshitij.so 55
    except Exception as e:
56
        print e
57
        sys.exit(2)
58
 
59
def fetchDetails(request):
12297 kshitij.so 60
    import threading
12256 kshitij.so 61
    items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
62
    print items
63
    snapdeal, flipkart, amazon =[],[],[]
64
    for item in items:
65
        if item.snapdealScraping:
66
            snapdeal.append(item)
67
        if item.flipkartScraping:
68
            flipkart.append(item)
69
        if item.amazonScraping:
70
            amazon.append(item)
71
    threads = []
72
    t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
73
    t1.daemon = True
74
    t1.start()
75
    t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
76
    t2.daemon = True
77
    t2.start()
15493 kshitij.so 78
    t3 = threading.Thread(target=scrapAmazon, args = (amazon,))
12278 kshitij.so 79
    t3.daemon = True
80
    t3.start()
12256 kshitij.so 81
    threads.append(t1)
82
    threads.append(t2)
83
    threads.append(t3)
84
    for th in threads:
85
        th.join()
12284 kshitij.so 86
    br,t1,t2,t3 =None,None,None,None
87
    items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]
12256 kshitij.so 88
 
89
def scrapSnapdeal(snapdealItems):
12297 kshitij.so 90
    import simplejson as json
91
    import urllib2
92
    from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
12256 kshitij.so 93
    for snapdealItem in snapdealItems:
94
        sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
95
        if sdItem is None:
96
            continue
97
        try:
15484 kshitij.so 98
            url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=175&sort=sellingPrice"%(sdItem.supc)
12256 kshitij.so 99
            print url
100
            time.sleep(1)
15498 kshitij.so 101
            req = urllib2.Request(url,headers=headers)
12256 kshitij.so 102
            response = urllib2.urlopen(req)
103
            json_input = response.read()
104
            vendorInfo = json.loads(json_input)
15496 kshitij.so 105
            print vendorInfo
12256 kshitij.so 106
            lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice   = (0,)*8
107
            lowestSellerName = ''
108
            for vendor in vendorInfo:
109
                if iterator == 0:
110
                    lowestSellerName = vendor['vendorDisplayName']
111
                    try:
112
                        lowestSp = vendor['sellingPriceBefIntCashBack']
113
                    except:
114
                        lowestSp = vendor['sellingPrice']
115
                    lowestOfferPrice = vendor['sellingPrice']
116
                    lowestSellerInventory = vendor['buyableInventory']
117
 
118
                if vendor['vendorDisplayName'] == 'MobilesnMore':
119
                    ourInventory = vendor['buyableInventory']
120
                    try:
121
                        ourSp = vendor['sellingPriceBefIntCashBack']
122
                    except:
123
                        ourSp = vendor['sellingPrice']
124
                    ourOfferPrice = vendor['sellingPrice']
125
                iterator+=1
15497 kshitij.so 126
        except Exception as e:
127
            import traceback
128
            print traceback.print_exc()
12256 kshitij.so 129
            continue
12286 kshitij.so 130
        finally:
12287 kshitij.so 131
            sdItem =None
15496 kshitij.so 132
        print ourSp
133
        print ourOfferPrice
134
        print ourInventory
12256 kshitij.so 135
        snapdealItem.ourSnapdealPrice = ourSp
136
        snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
137
        snapdealItem.ourSnapdealInventory = ourInventory
138
        snapdealItem.lowestSnapdealPrice = lowestSp
139
        snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
140
        snapdealItem.lowestSnapdealSeller = lowestSellerName 
12314 kshitij.so 141
        snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
142
    snapdealItems[:]=[]  
12256 kshitij.so 143
 
144
def scrapFlipkart(flipkartItems):
12297 kshitij.so 145
    from shop2020.model.v1.catalog.script import FlipkartScraper
146
    from operator import itemgetter
147
    import requests as httpRequest
148
    from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
12276 kshitij.so 149
    scraperFk = FlipkartScraper.FlipkartScraper()
12256 kshitij.so 150
    for flipkartItem in flipkartItems:
151
        fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
152
        if fkItem is None:
153
            continue
154
        try:
155
            url = "http://www.flipkart.com/ps/%s"%(fkItem.flipkartSerialNumber)
156
            vendorsData = scraperFk.read(url)
157
            sortedVendorsData = []
158
            sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))
159
            lowestSellerSp, iterator, ourSp = (0,)*3
160
            lowestSellerName = ''
161
            for data in sortedVendorsData:
162
                if iterator == 0:
163
                    lowestSellerName = data['sellerName']
164
                    lowestSellerSp = data['sellingPrice']
165
 
166
                if data['sellerName'] == 'Saholic':
167
                    ourSp = data['sellingPrice']
168
 
169
                iterator+=1
170
        except:
171
            continue
12286 kshitij.so 172
        finally:
12287 kshitij.so 173
            fkItem=None
12256 kshitij.so 174
        try:
175
            request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
176
            r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
177
            print "Inventory info",r.json()
178
            stock_count = int((r.json()['attributeValues'])['stock_count'])
179
        except:
180
            stock_count = 0
181
        finally:
182
                r={}
183
        flipkartItem.ourFlipkartPrice = ourSp
184
        flipkartItem.ourFlipkartInventory = stock_count
185
        flipkartItem.lowestFlipkartPrice = lowestSellerSp
186
        flipkartItem.lowestFlipkartSeller =  lowestSellerName
12283 kshitij.so 187
    scraperFk = None
12314 kshitij.so 188
    flipkartItems[:] =[] 
12256 kshitij.so 189
 
190
 
191
def close_session():
192
    if session.is_active:
193
        print "session is active. closing it."
194
        session.close()
195
 
15492 kshitij.so 196
def scrapAmazon(amazonItems):
15484 kshitij.so 197
    from shop2020.model.v1.catalog.script import AmazonScraper
198
    from shop2020.model.v1.catalog.impl.DataService import Amazonlisted
12277 kshitij.so 199
    print "Inside amazonitems ",amazonItems
200
    print "len amazon items ",len(amazonItems)
201
    time.sleep(5)
15484 kshitij.so 202
    amScraper = AmazonScraper.AmazonScraper()
12256 kshitij.so 203
    for amazonItem in amazonItems:
15484 kshitij.so 204
        amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)
205
        if len(amazon_d_item.asin)==0:
206
            print "No asin found for ",amazonItem.item_id
12256 kshitij.so 207
            continue
15484 kshitij.so 208
        saholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())
15500 kshitij.so 209
        generalUrl = "http://www.amazon.in/gp/offer-listing/%s"%(amazon_d_item.asin.strip())
12256 kshitij.so 210
        try:
15484 kshitij.so 211
            saholicPrice = (amScraper.read(saholicUrl, False))[0]
212
        except:
213
            saholicPrice = 0.0
214
        try:
215
            cheapestSeller = (amScraper.read(generalUrl, True))
216
            cheapestSellerPrice = cheapestSeller[0]
217
            cheapestSellerName = cheapestSeller[1]
218
        except:
219
            cheapestSellerPrice = 0.0
220
            cheapestSellerName = ""
221
        amazonItem.ourAmazonPrice = saholicPrice
222
        amazonItem.lowestAmazonPrice = cheapestSellerPrice
223
        amazonItem.lowestAmazonSeller = cheapestSellerName 
12314 kshitij.so 224
    amazonItems[:] =[] 
12256 kshitij.so 225
 
226
 
12314 kshitij.so 227
def sendMail(request):
228
    import smtplib
229
    from email.mime.text import MIMEText
230
    from email.mime.multipart import MIMEMultipart
231
    mailServer = smtplib.SMTP("smtp.gmail.com", 587)
232
    mailServer.ehlo()
233
    mailServer.starttls()
234
    mailServer.ehlo()
235
    recipients = []
236
    recipients.append(request.user)
237
    message = "Your Request has been processed.Visit dashboard to check & download report" 
238
    msg = MIMEMultipart()
12329 kshitij.so 239
    msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
12314 kshitij.so 240
    msg['From'] = ""
241
    msg['To'] = ",".join(recipients)
242
    msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
243
    html_msg = MIMEText(message, 'html')
244
    msg.attach(html_msg)
245
    try:
246
        mailServer.login("build@shop2020.in", "cafe@nes")
247
        #mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
248
        mailServer.sendmail("cafe@nes", recipients, msg.as_string())
249
    except Exception as e:
250
        print e
251
        print "Unable to send mail.Lets try with local SMTP."
252
        smtpServer = smtplib.SMTP('localhost')
253
        smtpServer.set_debuglevel(1)
254
        sender = 'build@shop2020.in'
255
        try:
256
            smtpServer.sendmail(sender, recipients, msg.as_string())
257
            print "Successfully sent email"
258
        except:
259
            print "Error: unable to send email."
260
 
12256 kshitij.so 261
 
262
 
263
if __name__ == "__main__":
264
    parser = optparse.OptionParser()
265
    parser.add_option("-l", "--logfile", dest="logfile",
266
                      type="string",
267
                      help="Log all output to LOG_FILE",
268
                      )
269
    parser.add_option("-i", "--pidfile", dest="pidfile",
270
                      type="string",
271
                      help="Write the PID to pidfile")
272
    (options, args) = parser.parse_args()
273
    daemon = CompetitorScraping(options.logfile, options.pidfile)
274
    if len(args) == 0:
275
        daemon.run()
276
    elif len(args) == 1:
277
        if 'start' == args[0]:
278
            daemon.start()
279
        elif 'stop' == args[0]:
280
            daemon.stop()
281
        elif 'restart' == args[0]:
282
            daemon.restart()
283
        else:
284
            print "Unknown command"
285
            sys.exit(2)
286
        sys.exit(0)
287
    else:
288
        print "usage: %s start|stop|restart" % sys.argv[0]
289
        sys.exit(2)