Subversion Repositories SmartDukaan

Rev

Rev 15495 | Rev 15497 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
12256 kshitij.so 1
from elixir import session
12268 kshitij.so 2
from shop2020.config.client.ConfigClient import ConfigClient
12256 kshitij.so 3
from sqlalchemy.sql import asc
4
from sqlalchemy.sql.expression import or_
5
from shop2020.utils.daemon import Daemon
6
import optparse
7
import sys
8
import mechanize
9
import time
10
from shop2020.model.v1.catalog.impl import DataService
12297 kshitij.so 11
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
12272 kshitij.so 12
import gc 
12256 kshitij.so 13
 
12268 kshitij.so 14
config_client = ConfigClient()
15
host = config_client.get_property('staging_hostname')
16
DataService.initialize(db_hostname=host)
12256 kshitij.so 17
 
18
class CompetitorScraping(Daemon):
19
    def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
20
        Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
21
 
22
    def run(self):
23
        start()
24
 
25
def start():
26
    try:
27
        while True:
28
            requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
29
            if requests ==[] or requests is None:
30
                print "No new request to process, sleeeeeeping....."
12281 kshitij.so 31
                close_session()
32
                collected = gc.collect()
12292 kshitij.so 33
                print locals()
34
                print globals()
12281 kshitij.so 35
                print "Garbage collector: collected %d objects." % (collected)
12256 kshitij.so 36
                time.sleep(600)
37
            for request in requests:
38
                fetchDetails(request)
39
                request.isProcessed = True
40
                session.commit()
12314 kshitij.so 41
                sendMail(request)
12256 kshitij.so 42
            close_session()
12279 kshitij.so 43
            collected = gc.collect()
44
            print "Garbage collector: collected %d objects." % (collected)
12292 kshitij.so 45
            print locals()
46
            print globals()
12279 kshitij.so 47
            requests = []
12256 kshitij.so 48
    except Exception as e:
49
        print e
50
        sys.exit(2)
51
 
52
def fetchDetails(request):
12297 kshitij.so 53
    import threading
12256 kshitij.so 54
    items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
55
    print items
56
    snapdeal, flipkart, amazon =[],[],[]
57
    for item in items:
58
        if item.snapdealScraping:
59
            snapdeal.append(item)
60
        if item.flipkartScraping:
61
            flipkart.append(item)
62
        if item.amazonScraping:
63
            amazon.append(item)
64
    threads = []
65
    t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
66
    t1.daemon = True
67
    t1.start()
68
    t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
69
    t2.daemon = True
70
    t2.start()
15493 kshitij.so 71
    t3 = threading.Thread(target=scrapAmazon, args = (amazon,))
12278 kshitij.so 72
    t3.daemon = True
73
    t3.start()
12256 kshitij.so 74
    threads.append(t1)
75
    threads.append(t2)
76
    threads.append(t3)
77
    for th in threads:
78
        th.join()
12284 kshitij.so 79
    br,t1,t2,t3 =None,None,None,None
80
    items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]
12256 kshitij.so 81
 
82
def scrapSnapdeal(snapdealItems):
12297 kshitij.so 83
    import simplejson as json
84
    import urllib2
85
    from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
12256 kshitij.so 86
    for snapdealItem in snapdealItems:
87
        sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
88
        if sdItem is None:
89
            continue
90
        try:
15484 kshitij.so 91
            url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=175&sort=sellingPrice"%(sdItem.supc)
12256 kshitij.so 92
            print url
93
            time.sleep(1)
94
            req = urllib2.Request(url)
95
            response = urllib2.urlopen(req)
96
            json_input = response.read()
97
            vendorInfo = json.loads(json_input)
15496 kshitij.so 98
            print vendorInfo
12256 kshitij.so 99
            lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice   = (0,)*8
100
            lowestSellerName = ''
101
            for vendor in vendorInfo:
102
                if iterator == 0:
103
                    lowestSellerName = vendor['vendorDisplayName']
104
                    try:
105
                        lowestSp = vendor['sellingPriceBefIntCashBack']
106
                    except:
107
                        lowestSp = vendor['sellingPrice']
108
                    lowestOfferPrice = vendor['sellingPrice']
109
                    lowestSellerInventory = vendor['buyableInventory']
110
 
111
                if vendor['vendorDisplayName'] == 'MobilesnMore':
112
                    ourInventory = vendor['buyableInventory']
113
                    try:
114
                        ourSp = vendor['sellingPriceBefIntCashBack']
115
                    except:
116
                        ourSp = vendor['sellingPrice']
117
                    ourOfferPrice = vendor['sellingPrice']
118
                iterator+=1
119
        except:
120
            continue
12286 kshitij.so 121
        finally:
12287 kshitij.so 122
            sdItem =None
15496 kshitij.so 123
        print  "Item id ",sdItem.item_id
124
        print ourSp
125
        print ourOfferPrice
126
        print ourInventory
12256 kshitij.so 127
        snapdealItem.ourSnapdealPrice = ourSp
128
        snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
129
        snapdealItem.ourSnapdealInventory = ourInventory
130
        snapdealItem.lowestSnapdealPrice = lowestSp
131
        snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
132
        snapdealItem.lowestSnapdealSeller = lowestSellerName 
12314 kshitij.so 133
        snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
134
    snapdealItems[:]=[]  
12256 kshitij.so 135
 
136
def scrapFlipkart(flipkartItems):
12297 kshitij.so 137
    from shop2020.model.v1.catalog.script import FlipkartScraper
138
    from operator import itemgetter
139
    import requests as httpRequest
140
    from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
12276 kshitij.so 141
    scraperFk = FlipkartScraper.FlipkartScraper()
12256 kshitij.so 142
    for flipkartItem in flipkartItems:
143
        fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
144
        if fkItem is None:
145
            continue
146
        try:
147
            url = "http://www.flipkart.com/ps/%s"%(fkItem.flipkartSerialNumber)
148
            vendorsData = scraperFk.read(url)
149
            sortedVendorsData = []
150
            sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))
151
            lowestSellerSp, iterator, ourSp = (0,)*3
152
            lowestSellerName = ''
153
            for data in sortedVendorsData:
154
                if iterator == 0:
155
                    lowestSellerName = data['sellerName']
156
                    lowestSellerSp = data['sellingPrice']
157
 
158
                if data['sellerName'] == 'Saholic':
159
                    ourSp = data['sellingPrice']
160
 
161
                iterator+=1
162
        except:
163
            continue
12286 kshitij.so 164
        finally:
12287 kshitij.so 165
            fkItem=None
12256 kshitij.so 166
        try:
167
            request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
168
            r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
169
            print "Inventory info",r.json()
170
            stock_count = int((r.json()['attributeValues'])['stock_count'])
171
        except:
172
            stock_count = 0
173
        finally:
174
                r={}
175
        flipkartItem.ourFlipkartPrice = ourSp
176
        flipkartItem.ourFlipkartInventory = stock_count
177
        flipkartItem.lowestFlipkartPrice = lowestSellerSp
178
        flipkartItem.lowestFlipkartSeller =  lowestSellerName
12283 kshitij.so 179
    scraperFk = None
12314 kshitij.so 180
    flipkartItems[:] =[] 
12256 kshitij.so 181
 
182
 
183
def close_session():
184
    if session.is_active:
185
        print "session is active. closing it."
186
        session.close()
187
 
15492 kshitij.so 188
def scrapAmazon(amazonItems):
15484 kshitij.so 189
    from shop2020.model.v1.catalog.script import AmazonScraper
190
    from shop2020.model.v1.catalog.impl.DataService import Amazonlisted
12277 kshitij.so 191
    print "Inside amazonitems ",amazonItems
192
    print "len amazon items ",len(amazonItems)
193
    time.sleep(5)
15484 kshitij.so 194
    amScraper = AmazonScraper.AmazonScraper()
12256 kshitij.so 195
    for amazonItem in amazonItems:
15484 kshitij.so 196
        amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)
197
        if len(amazon_d_item.asin)==0:
198
            print "No asin found for ",amazonItem.item_id
12256 kshitij.so 199
            continue
15484 kshitij.so 200
        saholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())
15495 kshitij.so 201
        generalUrl = "www.amazon.in/gp/offer-listing/%s"%(amazon_d_item.asin.strip())
12256 kshitij.so 202
        try:
15484 kshitij.so 203
            saholicPrice = (amScraper.read(saholicUrl, False))[0]
204
        except:
205
            saholicPrice = 0.0
206
        try:
207
            cheapestSeller = (amScraper.read(generalUrl, True))
208
            cheapestSellerPrice = cheapestSeller[0]
209
            cheapestSellerName = cheapestSeller[1]
210
        except:
211
            cheapestSellerPrice = 0.0
212
            cheapestSellerName = ""
213
        amazonItem.ourAmazonPrice = saholicPrice
214
        amazonItem.lowestAmazonPrice = cheapestSellerPrice
215
        amazonItem.lowestAmazonSeller = cheapestSellerName 
12314 kshitij.so 216
    amazonItems[:] =[] 
12256 kshitij.so 217
 
218
 
12314 kshitij.so 219
def sendMail(request):
220
    import smtplib
221
    from email.mime.text import MIMEText
222
    from email.mime.multipart import MIMEMultipart
223
    mailServer = smtplib.SMTP("smtp.gmail.com", 587)
224
    mailServer.ehlo()
225
    mailServer.starttls()
226
    mailServer.ehlo()
227
    recipients = []
228
    recipients.append(request.user)
229
    message = "Your Request has been processed.Visit dashboard to check & download report" 
230
    msg = MIMEMultipart()
12329 kshitij.so 231
    msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
12314 kshitij.so 232
    msg['From'] = ""
233
    msg['To'] = ",".join(recipients)
234
    msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
235
    html_msg = MIMEText(message, 'html')
236
    msg.attach(html_msg)
237
    try:
238
        mailServer.login("build@shop2020.in", "cafe@nes")
239
        #mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
240
        mailServer.sendmail("cafe@nes", recipients, msg.as_string())
241
    except Exception as e:
242
        print e
243
        print "Unable to send mail.Lets try with local SMTP."
244
        smtpServer = smtplib.SMTP('localhost')
245
        smtpServer.set_debuglevel(1)
246
        sender = 'build@shop2020.in'
247
        try:
248
            smtpServer.sendmail(sender, recipients, msg.as_string())
249
            print "Successfully sent email"
250
        except:
251
            print "Error: unable to send email."
252
 
12256 kshitij.so 253
 
254
 
255
if __name__ == "__main__":
256
    parser = optparse.OptionParser()
257
    parser.add_option("-l", "--logfile", dest="logfile",
258
                      type="string",
259
                      help="Log all output to LOG_FILE",
260
                      )
261
    parser.add_option("-i", "--pidfile", dest="pidfile",
262
                      type="string",
263
                      help="Write the PID to pidfile")
264
    (options, args) = parser.parse_args()
265
    daemon = CompetitorScraping(options.logfile, options.pidfile)
266
    if len(args) == 0:
267
        daemon.run()
268
    elif len(args) == 1:
269
        if 'start' == args[0]:
270
            daemon.start()
271
        elif 'stop' == args[0]:
272
            daemon.stop()
273
        elif 'restart' == args[0]:
274
            daemon.restart()
275
        else:
276
            print "Unknown command"
277
            sys.exit(2)
278
        sys.exit(0)
279
    else:
280
        print "usage: %s start|stop|restart" % sys.argv[0]
281
        sys.exit(2)