Subversion Repositories SmartDukaan

Rev

Rev 15746 | Rev 15826 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
12256 kshitij.so 1
from elixir import session
12268 kshitij.so 2
from shop2020.config.client.ConfigClient import ConfigClient
12256 kshitij.so 3
from sqlalchemy.sql import asc
4
from sqlalchemy.sql.expression import or_
5
from shop2020.utils.daemon import Daemon
6
import optparse
7
import sys
8
import mechanize
9
import time
10
from shop2020.model.v1.catalog.impl import DataService
12297 kshitij.so 11
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
12272 kshitij.so 12
import gc 
15825 kshitij.so 13
from operator import itemgetter
12256 kshitij.so 14
 
12268 kshitij.so 15
config_client = ConfigClient()
16
host = config_client.get_property('staging_hostname')
17
DataService.initialize(db_hostname=host)
12256 kshitij.so 18
 
15498 kshitij.so 19
headers = { 
20
           'User-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
21
            'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      
22
            'Accept-Language' : 'en-US,en;q=0.8',                     
23
            'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
24
        }
25
 
12256 kshitij.so 26
class CompetitorScraping(Daemon):
27
    def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
28
        Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
29
 
30
    def run(self):
31
        start()
32
 
33
def start():
34
    try:
35
        while True:
36
            requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
37
            if requests ==[] or requests is None:
38
                print "No new request to process, sleeeeeeping....."
12281 kshitij.so 39
                close_session()
40
                collected = gc.collect()
12292 kshitij.so 41
                print locals()
42
                print globals()
12281 kshitij.so 43
                print "Garbage collector: collected %d objects." % (collected)
12256 kshitij.so 44
                time.sleep(600)
45
            for request in requests:
46
                fetchDetails(request)
47
                request.isProcessed = True
48
                session.commit()
12314 kshitij.so 49
                sendMail(request)
12256 kshitij.so 50
            close_session()
12279 kshitij.so 51
            collected = gc.collect()
52
            print "Garbage collector: collected %d objects." % (collected)
12292 kshitij.so 53
            print locals()
54
            print globals()
12279 kshitij.so 55
            requests = []
12256 kshitij.so 56
    except Exception as e:
57
        print e
58
        sys.exit(2)
59
 
60
def fetchDetails(request):
12297 kshitij.so 61
    import threading
12256 kshitij.so 62
    items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
63
    print items
64
    snapdeal, flipkart, amazon =[],[],[]
65
    for item in items:
66
        if item.snapdealScraping:
67
            snapdeal.append(item)
68
        if item.flipkartScraping:
69
            flipkart.append(item)
70
        if item.amazonScraping:
71
            amazon.append(item)
72
    threads = []
73
    t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
74
    t1.daemon = True
75
    t1.start()
76
    t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
77
    t2.daemon = True
78
    t2.start()
15493 kshitij.so 79
    t3 = threading.Thread(target=scrapAmazon, args = (amazon,))
12278 kshitij.so 80
    t3.daemon = True
81
    t3.start()
12256 kshitij.so 82
    threads.append(t1)
83
    threads.append(t2)
84
    threads.append(t3)
85
    for th in threads:
86
        th.join()
12284 kshitij.so 87
    br,t1,t2,t3 =None,None,None,None
88
    items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]
12256 kshitij.so 89
 
90
def scrapSnapdeal(snapdealItems):
12297 kshitij.so 91
    import simplejson as json
92
    import urllib2
93
    from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
12256 kshitij.so 94
    for snapdealItem in snapdealItems:
95
        sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
96
        if sdItem is None:
97
            continue
98
        try:
15825 kshitij.so 99
            url="http://www.snapdeal.com/acors/json/v2/gvbps?supc=%s&catUrl=&bn=&catId=175&start=0&count=10000"%(sdItem.supc)
12256 kshitij.so 100
            print url
101
            time.sleep(1)
15498 kshitij.so 102
            req = urllib2.Request(url,headers=headers)
12256 kshitij.so 103
            response = urllib2.urlopen(req)
15825 kshitij.so 104
            vendorInfo = json.load(response.read())
105
            response.close()
15496 kshitij.so 106
            print vendorInfo
12256 kshitij.so 107
            lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice   = (0,)*8
108
            lowestSellerName = ''
15825 kshitij.so 109
            sortedVendorsData = sorted(vendorInfo['vendors'], key=itemgetter('sellingPrice'))
110
            for vendor in sortedVendorsData:
12256 kshitij.so 111
                if iterator == 0:
15536 kshitij.so 112
                    lowestSellerName = vendor['vendorDisplayName'].encode('utf-8')
12256 kshitij.so 113
                    try:
114
                        lowestSp = vendor['sellingPriceBefIntCashBack']
115
                    except:
116
                        lowestSp = vendor['sellingPrice']
117
                    lowestOfferPrice = vendor['sellingPrice']
118
                    lowestSellerInventory = vendor['buyableInventory']
119
 
120
                if vendor['vendorDisplayName'] == 'MobilesnMore':
121
                    ourInventory = vendor['buyableInventory']
122
                    try:
123
                        ourSp = vendor['sellingPriceBefIntCashBack']
124
                    except:
125
                        ourSp = vendor['sellingPrice']
126
                    ourOfferPrice = vendor['sellingPrice']
127
                iterator+=1
15497 kshitij.so 128
        except Exception as e:
129
            import traceback
130
            print traceback.print_exc()
12256 kshitij.so 131
            continue
12286 kshitij.so 132
        finally:
12287 kshitij.so 133
            sdItem =None
15496 kshitij.so 134
        print ourSp
135
        print ourOfferPrice
136
        print ourInventory
12256 kshitij.so 137
        snapdealItem.ourSnapdealPrice = ourSp
138
        snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
139
        snapdealItem.ourSnapdealInventory = ourInventory
140
        snapdealItem.lowestSnapdealPrice = lowestSp
141
        snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
142
        snapdealItem.lowestSnapdealSeller = lowestSellerName 
12314 kshitij.so 143
        snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
144
    snapdealItems[:]=[]  
12256 kshitij.so 145
 
146
def scrapFlipkart(flipkartItems):
15520 kshitij.so 147
    from shop2020.model.v1.catalog.script import FlipkartProductPageParser
12297 kshitij.so 148
    from operator import itemgetter
149
    import requests as httpRequest
150
    from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
15520 kshitij.so 151
    scraperFk = FlipkartProductPageParser.FlipkartProductPageScraper()
12256 kshitij.so 152
    for flipkartItem in flipkartItems:
153
        fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
154
        if fkItem is None:
155
            continue
156
        try:
15520 kshitij.so 157
            vendorsData = scraperFk.read(fkItem.flipkartSerialNumber.strip())
15536 kshitij.so 158
            lowestSellerName = vendorsData['cheapestSeller'].encode('utf-8')
15520 kshitij.so 159
            lowestSellerSp = vendorsData['lowestSellingPrice']
160
            ourSp = vendorsData['saholicSellingPrice']
12256 kshitij.so 161
        except:
162
            continue
12286 kshitij.so 163
        finally:
12287 kshitij.so 164
            fkItem=None
12256 kshitij.so 165
        try:
166
            request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
167
            r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
168
            print "Inventory info",r.json()
169
            stock_count = int((r.json()['attributeValues'])['stock_count'])
170
        except:
171
            stock_count = 0
172
        finally:
173
                r={}
174
        flipkartItem.ourFlipkartPrice = ourSp
175
        flipkartItem.ourFlipkartInventory = stock_count
176
        flipkartItem.lowestFlipkartPrice = lowestSellerSp
177
        flipkartItem.lowestFlipkartSeller =  lowestSellerName
12283 kshitij.so 178
    scraperFk = None
12314 kshitij.so 179
    flipkartItems[:] =[] 
12256 kshitij.so 180
 
181
 
182
def close_session():
183
    if session.is_active:
184
        print "session is active. closing it."
185
        session.close()
186
 
15492 kshitij.so 187
def scrapAmazon(amazonItems):
15484 kshitij.so 188
    from shop2020.model.v1.catalog.script import AmazonScraper
189
    from shop2020.model.v1.catalog.impl.DataService import Amazonlisted
12277 kshitij.so 190
    print "Inside amazonitems ",amazonItems
191
    print "len amazon items ",len(amazonItems)
192
    time.sleep(5)
15484 kshitij.so 193
    amScraper = AmazonScraper.AmazonScraper()
12256 kshitij.so 194
    for amazonItem in amazonItems:
15484 kshitij.so 195
        amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)
15746 kshitij.so 196
        if amazon_d_item is None:
197
            continue
15484 kshitij.so 198
        if len(amazon_d_item.asin)==0:
199
            print "No asin found for ",amazonItem.item_id
12256 kshitij.so 200
            continue
15484 kshitij.so 201
        saholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())
15500 kshitij.so 202
        generalUrl = "http://www.amazon.in/gp/offer-listing/%s"%(amazon_d_item.asin.strip())
12256 kshitij.so 203
        try:
15484 kshitij.so 204
            saholicPrice = (amScraper.read(saholicUrl, False))[0]
205
        except:
206
            saholicPrice = 0.0
207
        try:
208
            cheapestSeller = (amScraper.read(generalUrl, True))
209
            cheapestSellerPrice = cheapestSeller[0]
15536 kshitij.so 210
            cheapestSellerName = cheapestSeller[1].encode('utf-8')
15484 kshitij.so 211
        except:
212
            cheapestSellerPrice = 0.0
213
            cheapestSellerName = ""
214
        amazonItem.ourAmazonPrice = saholicPrice
215
        amazonItem.lowestAmazonPrice = cheapestSellerPrice
216
        amazonItem.lowestAmazonSeller = cheapestSellerName 
12314 kshitij.so 217
    amazonItems[:] =[] 
12256 kshitij.so 218
 
219
 
12314 kshitij.so 220
def sendMail(request):
221
    import smtplib
222
    from email.mime.text import MIMEText
223
    from email.mime.multipart import MIMEMultipart
224
    mailServer = smtplib.SMTP("smtp.gmail.com", 587)
225
    mailServer.ehlo()
226
    mailServer.starttls()
227
    mailServer.ehlo()
228
    recipients = []
229
    recipients.append(request.user)
230
    message = "Your Request has been processed.Visit dashboard to check & download report" 
231
    msg = MIMEMultipart()
12329 kshitij.so 232
    msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
12314 kshitij.so 233
    msg['From'] = ""
234
    msg['To'] = ",".join(recipients)
235
    msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
236
    html_msg = MIMEText(message, 'html')
237
    msg.attach(html_msg)
238
    try:
239
        mailServer.login("build@shop2020.in", "cafe@nes")
240
        #mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
241
        mailServer.sendmail("cafe@nes", recipients, msg.as_string())
242
    except Exception as e:
243
        print e
244
        print "Unable to send mail.Lets try with local SMTP."
245
        smtpServer = smtplib.SMTP('localhost')
246
        smtpServer.set_debuglevel(1)
247
        sender = 'build@shop2020.in'
248
        try:
249
            smtpServer.sendmail(sender, recipients, msg.as_string())
250
            print "Successfully sent email"
251
        except:
252
            print "Error: unable to send email."
253
 
12256 kshitij.so 254
 
255
 
256
if __name__ == "__main__":
257
    parser = optparse.OptionParser()
258
    parser.add_option("-l", "--logfile", dest="logfile",
259
                      type="string",
260
                      help="Log all output to LOG_FILE",
261
                      )
262
    parser.add_option("-i", "--pidfile", dest="pidfile",
263
                      type="string",
264
                      help="Write the PID to pidfile")
265
    (options, args) = parser.parse_args()
266
    daemon = CompetitorScraping(options.logfile, options.pidfile)
267
    if len(args) == 0:
268
        daemon.run()
269
    elif len(args) == 1:
270
        if 'start' == args[0]:
271
            daemon.start()
272
        elif 'stop' == args[0]:
273
            daemon.stop()
274
        elif 'restart' == args[0]:
275
            daemon.restart()
276
        else:
277
            print "Unknown command"
278
            sys.exit(2)
279
        sys.exit(0)
280
    else:
281
        print "usage: %s start|stop|restart" % sys.argv[0]
282
        sys.exit(2)