Subversion Repositories SmartDukaan

Rev

Rev 15496 | Rev 15498 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
12256 kshitij.so 1
from elixir import session
12268 kshitij.so 2
from shop2020.config.client.ConfigClient import ConfigClient
12256 kshitij.so 3
from sqlalchemy.sql import asc
4
from sqlalchemy.sql.expression import or_
5
from shop2020.utils.daemon import Daemon
6
import optparse
7
import sys
8
import mechanize
9
import time
10
from shop2020.model.v1.catalog.impl import DataService
12297 kshitij.so 11
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
12272 kshitij.so 12
import gc 
12256 kshitij.so 13
 
12268 kshitij.so 14
config_client = ConfigClient()
15
host = config_client.get_property('staging_hostname')
16
DataService.initialize(db_hostname=host)
12256 kshitij.so 17
 
18
class CompetitorScraping(Daemon):
19
    def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
20
        Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
21
 
22
    def run(self):
23
        start()
24
 
25
def start():
26
    try:
27
        while True:
28
            requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
29
            if requests ==[] or requests is None:
30
                print "No new request to process, sleeeeeeping....."
12281 kshitij.so 31
                close_session()
32
                collected = gc.collect()
12292 kshitij.so 33
                print locals()
34
                print globals()
12281 kshitij.so 35
                print "Garbage collector: collected %d objects." % (collected)
12256 kshitij.so 36
                time.sleep(600)
37
            for request in requests:
38
                fetchDetails(request)
39
                request.isProcessed = True
40
                session.commit()
12314 kshitij.so 41
                sendMail(request)
12256 kshitij.so 42
            close_session()
12279 kshitij.so 43
            collected = gc.collect()
44
            print "Garbage collector: collected %d objects." % (collected)
12292 kshitij.so 45
            print locals()
46
            print globals()
12279 kshitij.so 47
            requests = []
12256 kshitij.so 48
    except Exception as e:
49
        print e
50
        sys.exit(2)
51
 
52
def fetchDetails(request):
12297 kshitij.so 53
    import threading
12256 kshitij.so 54
    items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
55
    print items
56
    snapdeal, flipkart, amazon =[],[],[]
57
    for item in items:
58
        if item.snapdealScraping:
59
            snapdeal.append(item)
60
        if item.flipkartScraping:
61
            flipkart.append(item)
62
        if item.amazonScraping:
63
            amazon.append(item)
64
    threads = []
65
    t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
66
    t1.daemon = True
67
    t1.start()
68
    t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
69
    t2.daemon = True
70
    t2.start()
15493 kshitij.so 71
    t3 = threading.Thread(target=scrapAmazon, args = (amazon,))
12278 kshitij.so 72
    t3.daemon = True
73
    t3.start()
12256 kshitij.so 74
    threads.append(t1)
75
    threads.append(t2)
76
    threads.append(t3)
77
    for th in threads:
78
        th.join()
12284 kshitij.so 79
    br,t1,t2,t3 =None,None,None,None
80
    items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]
12256 kshitij.so 81
 
82
def scrapSnapdeal(snapdealItems):
12297 kshitij.so 83
    import simplejson as json
84
    import urllib2
85
    from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
12256 kshitij.so 86
    for snapdealItem in snapdealItems:
87
        sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
88
        if sdItem is None:
89
            continue
90
        try:
15484 kshitij.so 91
            url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=175&sort=sellingPrice"%(sdItem.supc)
12256 kshitij.so 92
            print url
93
            time.sleep(1)
94
            req = urllib2.Request(url)
95
            response = urllib2.urlopen(req)
96
            json_input = response.read()
97
            vendorInfo = json.loads(json_input)
15496 kshitij.so 98
            print vendorInfo
12256 kshitij.so 99
            lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice   = (0,)*8
100
            lowestSellerName = ''
101
            for vendor in vendorInfo:
102
                if iterator == 0:
103
                    lowestSellerName = vendor['vendorDisplayName']
104
                    try:
105
                        lowestSp = vendor['sellingPriceBefIntCashBack']
106
                    except:
107
                        lowestSp = vendor['sellingPrice']
108
                    lowestOfferPrice = vendor['sellingPrice']
109
                    lowestSellerInventory = vendor['buyableInventory']
110
 
111
                if vendor['vendorDisplayName'] == 'MobilesnMore':
112
                    ourInventory = vendor['buyableInventory']
113
                    try:
114
                        ourSp = vendor['sellingPriceBefIntCashBack']
115
                    except:
116
                        ourSp = vendor['sellingPrice']
117
                    ourOfferPrice = vendor['sellingPrice']
118
                iterator+=1
15497 kshitij.so 119
        except Exception as e:
120
            import traceback
121
            print traceback.print_exc()
12256 kshitij.so 122
            continue
12286 kshitij.so 123
        finally:
12287 kshitij.so 124
            sdItem =None
15496 kshitij.so 125
        print  "Item id ",sdItem.item_id
126
        print ourSp
127
        print ourOfferPrice
128
        print ourInventory
12256 kshitij.so 129
        snapdealItem.ourSnapdealPrice = ourSp
130
        snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
131
        snapdealItem.ourSnapdealInventory = ourInventory
132
        snapdealItem.lowestSnapdealPrice = lowestSp
133
        snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
134
        snapdealItem.lowestSnapdealSeller = lowestSellerName 
12314 kshitij.so 135
        snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
136
    snapdealItems[:]=[]  
12256 kshitij.so 137
 
138
def scrapFlipkart(flipkartItems):
12297 kshitij.so 139
    from shop2020.model.v1.catalog.script import FlipkartScraper
140
    from operator import itemgetter
141
    import requests as httpRequest
142
    from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
12276 kshitij.so 143
    scraperFk = FlipkartScraper.FlipkartScraper()
12256 kshitij.so 144
    for flipkartItem in flipkartItems:
145
        fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
146
        if fkItem is None:
147
            continue
148
        try:
149
            url = "http://www.flipkart.com/ps/%s"%(fkItem.flipkartSerialNumber)
150
            vendorsData = scraperFk.read(url)
151
            sortedVendorsData = []
152
            sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))
153
            lowestSellerSp, iterator, ourSp = (0,)*3
154
            lowestSellerName = ''
155
            for data in sortedVendorsData:
156
                if iterator == 0:
157
                    lowestSellerName = data['sellerName']
158
                    lowestSellerSp = data['sellingPrice']
159
 
160
                if data['sellerName'] == 'Saholic':
161
                    ourSp = data['sellingPrice']
162
 
163
                iterator+=1
164
        except:
165
            continue
12286 kshitij.so 166
        finally:
12287 kshitij.so 167
            fkItem=None
12256 kshitij.so 168
        try:
169
            request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
170
            r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
171
            print "Inventory info",r.json()
172
            stock_count = int((r.json()['attributeValues'])['stock_count'])
173
        except:
174
            stock_count = 0
175
        finally:
176
                r={}
177
        flipkartItem.ourFlipkartPrice = ourSp
178
        flipkartItem.ourFlipkartInventory = stock_count
179
        flipkartItem.lowestFlipkartPrice = lowestSellerSp
180
        flipkartItem.lowestFlipkartSeller =  lowestSellerName
12283 kshitij.so 181
    scraperFk = None
12314 kshitij.so 182
    flipkartItems[:] =[] 
12256 kshitij.so 183
 
184
 
185
def close_session():
186
    if session.is_active:
187
        print "session is active. closing it."
188
        session.close()
189
 
15492 kshitij.so 190
def scrapAmazon(amazonItems):
15484 kshitij.so 191
    from shop2020.model.v1.catalog.script import AmazonScraper
192
    from shop2020.model.v1.catalog.impl.DataService import Amazonlisted
12277 kshitij.so 193
    print "Inside amazonitems ",amazonItems
194
    print "len amazon items ",len(amazonItems)
195
    time.sleep(5)
15484 kshitij.so 196
    amScraper = AmazonScraper.AmazonScraper()
12256 kshitij.so 197
    for amazonItem in amazonItems:
15484 kshitij.so 198
        amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)
199
        if len(amazon_d_item.asin)==0:
200
            print "No asin found for ",amazonItem.item_id
12256 kshitij.so 201
            continue
15484 kshitij.so 202
        saholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())
15495 kshitij.so 203
        generalUrl = "www.amazon.in/gp/offer-listing/%s"%(amazon_d_item.asin.strip())
12256 kshitij.so 204
        try:
15484 kshitij.so 205
            saholicPrice = (amScraper.read(saholicUrl, False))[0]
206
        except:
207
            saholicPrice = 0.0
208
        try:
209
            cheapestSeller = (amScraper.read(generalUrl, True))
210
            cheapestSellerPrice = cheapestSeller[0]
211
            cheapestSellerName = cheapestSeller[1]
212
        except:
213
            cheapestSellerPrice = 0.0
214
            cheapestSellerName = ""
215
        amazonItem.ourAmazonPrice = saholicPrice
216
        amazonItem.lowestAmazonPrice = cheapestSellerPrice
217
        amazonItem.lowestAmazonSeller = cheapestSellerName 
12314 kshitij.so 218
    amazonItems[:] =[] 
12256 kshitij.so 219
 
220
 
12314 kshitij.so 221
def sendMail(request):
222
    import smtplib
223
    from email.mime.text import MIMEText
224
    from email.mime.multipart import MIMEMultipart
225
    mailServer = smtplib.SMTP("smtp.gmail.com", 587)
226
    mailServer.ehlo()
227
    mailServer.starttls()
228
    mailServer.ehlo()
229
    recipients = []
230
    recipients.append(request.user)
231
    message = "Your Request has been processed.Visit dashboard to check & download report" 
232
    msg = MIMEMultipart()
12329 kshitij.so 233
    msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
12314 kshitij.so 234
    msg['From'] = ""
235
    msg['To'] = ",".join(recipients)
236
    msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
237
    html_msg = MIMEText(message, 'html')
238
    msg.attach(html_msg)
239
    try:
240
        mailServer.login("build@shop2020.in", "cafe@nes")
241
        #mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
242
        mailServer.sendmail("cafe@nes", recipients, msg.as_string())
243
    except Exception as e:
244
        print e
245
        print "Unable to send mail.Lets try with local SMTP."
246
        smtpServer = smtplib.SMTP('localhost')
247
        smtpServer.set_debuglevel(1)
248
        sender = 'build@shop2020.in'
249
        try:
250
            smtpServer.sendmail(sender, recipients, msg.as_string())
251
            print "Successfully sent email"
252
        except:
253
            print "Error: unable to send email."
254
 
12256 kshitij.so 255
 
256
 
257
if __name__ == "__main__":
258
    parser = optparse.OptionParser()
259
    parser.add_option("-l", "--logfile", dest="logfile",
260
                      type="string",
261
                      help="Log all output to LOG_FILE",
262
                      )
263
    parser.add_option("-i", "--pidfile", dest="pidfile",
264
                      type="string",
265
                      help="Write the PID to pidfile")
266
    (options, args) = parser.parse_args()
267
    daemon = CompetitorScraping(options.logfile, options.pidfile)
268
    if len(args) == 0:
269
        daemon.run()
270
    elif len(args) == 1:
271
        if 'start' == args[0]:
272
            daemon.start()
273
        elif 'stop' == args[0]:
274
            daemon.stop()
275
        elif 'restart' == args[0]:
276
            daemon.restart()
277
        else:
278
            print "Unknown command"
279
            sys.exit(2)
280
        sys.exit(0)
281
    else:
282
        print "usage: %s start|stop|restart" % sys.argv[0]
283
        sys.exit(2)