Subversion Repositories SmartDukaan

Rev

Rev 15484 | Rev 15493 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
12256 kshitij.so 1
from elixir import session
12268 kshitij.so 2
from shop2020.config.client.ConfigClient import ConfigClient
12256 kshitij.so 3
from sqlalchemy.sql import asc
4
from sqlalchemy.sql.expression import or_
5
from shop2020.utils.daemon import Daemon
6
import optparse
7
import sys
8
import mechanize
9
import time
10
from shop2020.model.v1.catalog.impl import DataService
12297 kshitij.so 11
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
12272 kshitij.so 12
import gc 
12256 kshitij.so 13
 
12268 kshitij.so 14
config_client = ConfigClient()
15
host = config_client.get_property('staging_hostname')
16
DataService.initialize(db_hostname=host)
12256 kshitij.so 17
 
18
class CompetitorScraping(Daemon):
19
    def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
20
        Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
21
 
22
    def run(self):
23
        start()
24
 
25
def start():
26
    try:
27
        while True:
28
            requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
29
            if requests ==[] or requests is None:
30
                print "No new request to process, sleeeeeeping....."
12281 kshitij.so 31
                close_session()
32
                collected = gc.collect()
12292 kshitij.so 33
                print locals()
34
                print globals()
12281 kshitij.so 35
                print "Garbage collector: collected %d objects." % (collected)
12256 kshitij.so 36
                time.sleep(600)
37
            for request in requests:
38
                fetchDetails(request)
39
                request.isProcessed = True
40
                session.commit()
12314 kshitij.so 41
                sendMail(request)
12256 kshitij.so 42
            close_session()
12279 kshitij.so 43
            collected = gc.collect()
44
            print "Garbage collector: collected %d objects." % (collected)
12292 kshitij.so 45
            print locals()
46
            print globals()
12279 kshitij.so 47
            requests = []
12256 kshitij.so 48
    except Exception as e:
49
        print e
50
        sys.exit(2)
51
 
52
def fetchDetails(request):
12297 kshitij.so 53
    import threading
12256 kshitij.so 54
    items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
55
    print items
56
    snapdeal, flipkart, amazon =[],[],[]
57
    for item in items:
58
        if item.snapdealScraping:
59
            snapdeal.append(item)
60
        if item.flipkartScraping:
61
            flipkart.append(item)
62
        if item.amazonScraping:
63
            amazon.append(item)
64
    threads = []
65
    t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
66
    t1.daemon = True
67
    t1.start()
68
    t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
69
    t2.daemon = True
70
    t2.start()
15484 kshitij.so 71
    t3 = threading.Thread(target=scrapAmazon, args = (amazon))
12278 kshitij.so 72
    t3.daemon = True
73
    t3.start()
12256 kshitij.so 74
    threads.append(t1)
75
    threads.append(t2)
76
    threads.append(t3)
77
    for th in threads:
78
        th.join()
12284 kshitij.so 79
    br,t1,t2,t3 =None,None,None,None
80
    items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]
12256 kshitij.so 81
 
82
def scrapSnapdeal(snapdealItems):
12297 kshitij.so 83
    import simplejson as json
84
    import urllib2
85
    from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
12256 kshitij.so 86
    for snapdealItem in snapdealItems:
87
        sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
88
        if sdItem is None:
89
            continue
90
        try:
15484 kshitij.so 91
            url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=175&sort=sellingPrice"%(sdItem.supc)
12256 kshitij.so 92
            print url
93
            time.sleep(1)
94
            req = urllib2.Request(url)
95
            response = urllib2.urlopen(req)
96
            json_input = response.read()
97
            vendorInfo = json.loads(json_input)
98
            lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice   = (0,)*8
99
            lowestSellerName = ''
100
            for vendor in vendorInfo:
101
                if iterator == 0:
102
                    lowestSellerName = vendor['vendorDisplayName']
103
                    try:
104
                        lowestSp = vendor['sellingPriceBefIntCashBack']
105
                    except:
106
                        lowestSp = vendor['sellingPrice']
107
                    lowestOfferPrice = vendor['sellingPrice']
108
                    lowestSellerInventory = vendor['buyableInventory']
109
 
110
                if vendor['vendorDisplayName'] == 'MobilesnMore':
111
                    ourInventory = vendor['buyableInventory']
112
                    try:
113
                        ourSp = vendor['sellingPriceBefIntCashBack']
114
                    except:
115
                        ourSp = vendor['sellingPrice']
116
                    ourOfferPrice = vendor['sellingPrice']
117
                iterator+=1
118
        except:
119
            continue
12286 kshitij.so 120
        finally:
12287 kshitij.so 121
            sdItem =None
12256 kshitij.so 122
        snapdealItem.ourSnapdealPrice = ourSp
123
        snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
124
        snapdealItem.ourSnapdealInventory = ourInventory
125
        snapdealItem.lowestSnapdealPrice = lowestSp
126
        snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
127
        snapdealItem.lowestSnapdealSeller = lowestSellerName 
12314 kshitij.so 128
        snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
129
    snapdealItems[:]=[]  
12256 kshitij.so 130
 
131
def scrapFlipkart(flipkartItems):
12297 kshitij.so 132
    from shop2020.model.v1.catalog.script import FlipkartScraper
133
    from operator import itemgetter
134
    import requests as httpRequest
135
    from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
12276 kshitij.so 136
    scraperFk = FlipkartScraper.FlipkartScraper()
12256 kshitij.so 137
    for flipkartItem in flipkartItems:
138
        fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
139
        if fkItem is None:
140
            continue
141
        try:
142
            url = "http://www.flipkart.com/ps/%s"%(fkItem.flipkartSerialNumber)
143
            vendorsData = scraperFk.read(url)
144
            sortedVendorsData = []
145
            sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))
146
            lowestSellerSp, iterator, ourSp = (0,)*3
147
            lowestSellerName = ''
148
            for data in sortedVendorsData:
149
                if iterator == 0:
150
                    lowestSellerName = data['sellerName']
151
                    lowestSellerSp = data['sellingPrice']
152
 
153
                if data['sellerName'] == 'Saholic':
154
                    ourSp = data['sellingPrice']
155
 
156
                iterator+=1
157
        except:
158
            continue
12286 kshitij.so 159
        finally:
12287 kshitij.so 160
            fkItem=None
12256 kshitij.so 161
        try:
162
            request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
163
            r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
164
            print "Inventory info",r.json()
165
            stock_count = int((r.json()['attributeValues'])['stock_count'])
166
        except:
167
            stock_count = 0
168
        finally:
169
                r={}
170
        flipkartItem.ourFlipkartPrice = ourSp
171
        flipkartItem.ourFlipkartInventory = stock_count
172
        flipkartItem.lowestFlipkartPrice = lowestSellerSp
173
        flipkartItem.lowestFlipkartSeller =  lowestSellerName
12283 kshitij.so 174
    scraperFk = None
12314 kshitij.so 175
    flipkartItems[:] =[] 
12256 kshitij.so 176
 
177
 
178
def close_session():
179
    if session.is_active:
180
        print "session is active. closing it."
181
        session.close()
182
 
15492 kshitij.so 183
def scrapAmazon(amazonItems):
15484 kshitij.so 184
    from shop2020.model.v1.catalog.script import AmazonScraper
185
    from shop2020.model.v1.catalog.impl.DataService import Amazonlisted
12277 kshitij.so 186
    print "Inside amazonitems ",amazonItems
187
    print "len amazon items ",len(amazonItems)
188
    time.sleep(5)
15484 kshitij.so 189
    amScraper = AmazonScraper.AmazonScraper()
12256 kshitij.so 190
    for amazonItem in amazonItems:
15484 kshitij.so 191
        amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)
192
        if len(amazon_d_item.asin)==0:
193
            print "No asin found for ",amazonItem.item_id
12256 kshitij.so 194
            continue
15484 kshitij.so 195
        saholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())
196
        generalUrl = "http://www.amazon.in/dp/%s"%(amazonItem.item_id.strip())
12256 kshitij.so 197
        try:
15484 kshitij.so 198
            saholicPrice = (amScraper.read(saholicUrl, False))[0]
199
        except:
200
            saholicPrice = 0.0
201
        try:
202
            cheapestSeller = (amScraper.read(generalUrl, True))
203
            cheapestSellerPrice = cheapestSeller[0]
204
            cheapestSellerName = cheapestSeller[1]
205
        except:
206
            cheapestSellerPrice = 0.0
207
            cheapestSellerName = ""
208
        amazonItem.ourAmazonPrice = saholicPrice
209
        amazonItem.lowestAmazonPrice = cheapestSellerPrice
210
        amazonItem.lowestAmazonSeller = cheapestSellerName 
12314 kshitij.so 211
    amazonItems[:] =[] 
12256 kshitij.so 212
 
213
 
12314 kshitij.so 214
def sendMail(request):
215
    import smtplib
216
    from email.mime.text import MIMEText
217
    from email.mime.multipart import MIMEMultipart
218
    mailServer = smtplib.SMTP("smtp.gmail.com", 587)
219
    mailServer.ehlo()
220
    mailServer.starttls()
221
    mailServer.ehlo()
222
    recipients = []
223
    recipients.append(request.user)
224
    message = "Your Request has been processed.Visit dashboard to check & download report" 
225
    msg = MIMEMultipart()
12329 kshitij.so 226
    msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
12314 kshitij.so 227
    msg['From'] = ""
228
    msg['To'] = ",".join(recipients)
229
    msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
230
    html_msg = MIMEText(message, 'html')
231
    msg.attach(html_msg)
232
    try:
233
        mailServer.login("build@shop2020.in", "cafe@nes")
234
        #mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
235
        mailServer.sendmail("cafe@nes", recipients, msg.as_string())
236
    except Exception as e:
237
        print e
238
        print "Unable to send mail.Lets try with local SMTP."
239
        smtpServer = smtplib.SMTP('localhost')
240
        smtpServer.set_debuglevel(1)
241
        sender = 'build@shop2020.in'
242
        try:
243
            smtpServer.sendmail(sender, recipients, msg.as_string())
244
            print "Successfully sent email"
245
        except:
246
            print "Error: unable to send email."
247
 
12256 kshitij.so 248
 
249
 
250
if __name__ == "__main__":
251
    parser = optparse.OptionParser()
252
    parser.add_option("-l", "--logfile", dest="logfile",
253
                      type="string",
254
                      help="Log all output to LOG_FILE",
255
                      )
256
    parser.add_option("-i", "--pidfile", dest="pidfile",
257
                      type="string",
258
                      help="Write the PID to pidfile")
259
    (options, args) = parser.parse_args()
260
    daemon = CompetitorScraping(options.logfile, options.pidfile)
261
    if len(args) == 0:
262
        daemon.run()
263
    elif len(args) == 1:
264
        if 'start' == args[0]:
265
            daemon.start()
266
        elif 'stop' == args[0]:
267
            daemon.stop()
268
        elif 'restart' == args[0]:
269
            daemon.restart()
270
        else:
271
            print "Unknown command"
272
            sys.exit(2)
273
        sys.exit(0)
274
    else:
275
        print "usage: %s start|stop|restart" % sys.argv[0]
276
        sys.exit(2)