Subversion Repositories SmartDukaan

Rev

Rev 15520 | Rev 15536 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
12256 kshitij.so 1
from elixir import session
12268 kshitij.so 2
from shop2020.config.client.ConfigClient import ConfigClient
12256 kshitij.so 3
from sqlalchemy.sql import asc
4
from sqlalchemy.sql.expression import or_
5
from shop2020.utils.daemon import Daemon
6
import optparse
7
import sys
8
import mechanize
9
import time
10
from shop2020.model.v1.catalog.impl import DataService
12297 kshitij.so 11
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
12272 kshitij.so 12
import gc 
12256 kshitij.so 13
 
12268 kshitij.so 14
config_client = ConfigClient()
15
host = config_client.get_property('staging_hostname')
16
DataService.initialize(db_hostname=host)
12256 kshitij.so 17
 
15498 kshitij.so 18
headers = { 
19
           'User-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
20
            'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      
21
            'Accept-Language' : 'en-US,en;q=0.8',                     
22
            'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
23
        }
24
 
12256 kshitij.so 25
class CompetitorScraping(Daemon):
26
    def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
27
        Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
28
 
29
    def run(self):
30
        start()
31
 
32
def start():
33
    try:
34
        while True:
35
            requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
36
            if requests ==[] or requests is None:
37
                print "No new request to process, sleeeeeeping....."
12281 kshitij.so 38
                close_session()
39
                collected = gc.collect()
12292 kshitij.so 40
                print locals()
41
                print globals()
12281 kshitij.so 42
                print "Garbage collector: collected %d objects." % (collected)
12256 kshitij.so 43
                time.sleep(600)
44
            for request in requests:
45
                fetchDetails(request)
46
                request.isProcessed = True
47
                session.commit()
12314 kshitij.so 48
                sendMail(request)
12256 kshitij.so 49
            close_session()
12279 kshitij.so 50
            collected = gc.collect()
51
            print "Garbage collector: collected %d objects." % (collected)
12292 kshitij.so 52
            print locals()
53
            print globals()
12279 kshitij.so 54
            requests = []
12256 kshitij.so 55
    except Exception as e:
56
        print e
57
        sys.exit(2)
58
 
59
def fetchDetails(request):
12297 kshitij.so 60
    import threading
12256 kshitij.so 61
    items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
62
    print items
63
    snapdeal, flipkart, amazon =[],[],[]
64
    for item in items:
65
        if item.snapdealScraping:
66
            snapdeal.append(item)
67
        if item.flipkartScraping:
68
            flipkart.append(item)
69
        if item.amazonScraping:
70
            amazon.append(item)
71
    threads = []
72
    t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
73
    t1.daemon = True
74
    t1.start()
75
    t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
76
    t2.daemon = True
77
    t2.start()
15493 kshitij.so 78
    t3 = threading.Thread(target=scrapAmazon, args = (amazon,))
12278 kshitij.so 79
    t3.daemon = True
80
    t3.start()
12256 kshitij.so 81
    threads.append(t1)
82
    threads.append(t2)
83
    threads.append(t3)
84
    for th in threads:
85
        th.join()
12284 kshitij.so 86
    br,t1,t2,t3 =None,None,None,None
87
    items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]
12256 kshitij.so 88
 
89
def scrapSnapdeal(snapdealItems):
12297 kshitij.so 90
    import simplejson as json
91
    import urllib2
92
    from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
12256 kshitij.so 93
    for snapdealItem in snapdealItems:
94
        sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
95
        if sdItem is None:
96
            continue
97
        try:
15484 kshitij.so 98
            url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=175&sort=sellingPrice"%(sdItem.supc)
12256 kshitij.so 99
            print url
100
            time.sleep(1)
15498 kshitij.so 101
            req = urllib2.Request(url,headers=headers)
12256 kshitij.so 102
            response = urllib2.urlopen(req)
103
            json_input = response.read()
104
            vendorInfo = json.loads(json_input)
15496 kshitij.so 105
            print vendorInfo
12256 kshitij.so 106
            lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice   = (0,)*8
107
            lowestSellerName = ''
108
            for vendor in vendorInfo:
109
                if iterator == 0:
15535 kshitij.so 110
                    lowestSellerName = vendor['vendorDisplayName'].decode("utf-8")
12256 kshitij.so 111
                    try:
112
                        lowestSp = vendor['sellingPriceBefIntCashBack']
113
                    except:
114
                        lowestSp = vendor['sellingPrice']
115
                    lowestOfferPrice = vendor['sellingPrice']
116
                    lowestSellerInventory = vendor['buyableInventory']
117
 
118
                if vendor['vendorDisplayName'] == 'MobilesnMore':
119
                    ourInventory = vendor['buyableInventory']
120
                    try:
121
                        ourSp = vendor['sellingPriceBefIntCashBack']
122
                    except:
123
                        ourSp = vendor['sellingPrice']
124
                    ourOfferPrice = vendor['sellingPrice']
125
                iterator+=1
15497 kshitij.so 126
        except Exception as e:
127
            import traceback
128
            print traceback.print_exc()
12256 kshitij.so 129
            continue
12286 kshitij.so 130
        finally:
12287 kshitij.so 131
            sdItem =None
15496 kshitij.so 132
        print ourSp
133
        print ourOfferPrice
134
        print ourInventory
12256 kshitij.so 135
        snapdealItem.ourSnapdealPrice = ourSp
136
        snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
137
        snapdealItem.ourSnapdealInventory = ourInventory
138
        snapdealItem.lowestSnapdealPrice = lowestSp
139
        snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
140
        snapdealItem.lowestSnapdealSeller = lowestSellerName 
12314 kshitij.so 141
        snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
142
    snapdealItems[:]=[]  
12256 kshitij.so 143
 
144
def scrapFlipkart(flipkartItems):
15520 kshitij.so 145
    from shop2020.model.v1.catalog.script import FlipkartProductPageParser
12297 kshitij.so 146
    from operator import itemgetter
147
    import requests as httpRequest
148
    from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
15520 kshitij.so 149
    scraperFk = FlipkartProductPageParser.FlipkartProductPageScraper()
12256 kshitij.so 150
    for flipkartItem in flipkartItems:
151
        fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
152
        if fkItem is None:
153
            continue
154
        try:
15520 kshitij.so 155
            vendorsData = scraperFk.read(fkItem.flipkartSerialNumber.strip())
15535 kshitij.so 156
            lowestSellerName = vendorsData['cheapestSeller'].decode("utf-8")
15520 kshitij.so 157
            lowestSellerSp = vendorsData['lowestSellingPrice']
158
            ourSp = vendorsData['saholicSellingPrice']
12256 kshitij.so 159
        except:
160
            continue
12286 kshitij.so 161
        finally:
12287 kshitij.so 162
            fkItem=None
12256 kshitij.so 163
        try:
164
            request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
165
            r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
166
            print "Inventory info",r.json()
167
            stock_count = int((r.json()['attributeValues'])['stock_count'])
168
        except:
169
            stock_count = 0
170
        finally:
171
                r={}
172
        flipkartItem.ourFlipkartPrice = ourSp
173
        flipkartItem.ourFlipkartInventory = stock_count
174
        flipkartItem.lowestFlipkartPrice = lowestSellerSp
175
        flipkartItem.lowestFlipkartSeller =  lowestSellerName
12283 kshitij.so 176
    scraperFk = None
12314 kshitij.so 177
    flipkartItems[:] =[] 
12256 kshitij.so 178
 
179
 
180
def close_session():
181
    if session.is_active:
182
        print "session is active. closing it."
183
        session.close()
184
 
15492 kshitij.so 185
def scrapAmazon(amazonItems):
15484 kshitij.so 186
    from shop2020.model.v1.catalog.script import AmazonScraper
187
    from shop2020.model.v1.catalog.impl.DataService import Amazonlisted
12277 kshitij.so 188
    print "Inside amazonitems ",amazonItems
189
    print "len amazon items ",len(amazonItems)
190
    time.sleep(5)
15484 kshitij.so 191
    amScraper = AmazonScraper.AmazonScraper()
12256 kshitij.so 192
    for amazonItem in amazonItems:
15484 kshitij.so 193
        amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)
194
        if len(amazon_d_item.asin)==0:
195
            print "No asin found for ",amazonItem.item_id
12256 kshitij.so 196
            continue
15484 kshitij.so 197
        saholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())
15500 kshitij.so 198
        generalUrl = "http://www.amazon.in/gp/offer-listing/%s"%(amazon_d_item.asin.strip())
12256 kshitij.so 199
        try:
15484 kshitij.so 200
            saholicPrice = (amScraper.read(saholicUrl, False))[0]
201
        except:
202
            saholicPrice = 0.0
203
        try:
204
            cheapestSeller = (amScraper.read(generalUrl, True))
205
            cheapestSellerPrice = cheapestSeller[0]
15535 kshitij.so 206
            cheapestSellerName = cheapestSeller[1].decode("utf-8")
15484 kshitij.so 207
        except:
208
            cheapestSellerPrice = 0.0
209
            cheapestSellerName = ""
210
        amazonItem.ourAmazonPrice = saholicPrice
211
        amazonItem.lowestAmazonPrice = cheapestSellerPrice
212
        amazonItem.lowestAmazonSeller = cheapestSellerName 
12314 kshitij.so 213
    amazonItems[:] =[] 
12256 kshitij.so 214
 
215
 
12314 kshitij.so 216
def sendMail(request):
217
    import smtplib
218
    from email.mime.text import MIMEText
219
    from email.mime.multipart import MIMEMultipart
220
    mailServer = smtplib.SMTP("smtp.gmail.com", 587)
221
    mailServer.ehlo()
222
    mailServer.starttls()
223
    mailServer.ehlo()
224
    recipients = []
225
    recipients.append(request.user)
226
    message = "Your Request has been processed.Visit dashboard to check & download report" 
227
    msg = MIMEMultipart()
12329 kshitij.so 228
    msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
12314 kshitij.so 229
    msg['From'] = ""
230
    msg['To'] = ",".join(recipients)
231
    msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
232
    html_msg = MIMEText(message, 'html')
233
    msg.attach(html_msg)
234
    try:
235
        mailServer.login("build@shop2020.in", "cafe@nes")
236
        #mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
237
        mailServer.sendmail("cafe@nes", recipients, msg.as_string())
238
    except Exception as e:
239
        print e
240
        print "Unable to send mail.Lets try with local SMTP."
241
        smtpServer = smtplib.SMTP('localhost')
242
        smtpServer.set_debuglevel(1)
243
        sender = 'build@shop2020.in'
244
        try:
245
            smtpServer.sendmail(sender, recipients, msg.as_string())
246
            print "Successfully sent email"
247
        except:
248
            print "Error: unable to send email."
249
 
12256 kshitij.so 250
 
251
 
252
if __name__ == "__main__":
253
    parser = optparse.OptionParser()
254
    parser.add_option("-l", "--logfile", dest="logfile",
255
                      type="string",
256
                      help="Log all output to LOG_FILE",
257
                      )
258
    parser.add_option("-i", "--pidfile", dest="pidfile",
259
                      type="string",
260
                      help="Write the PID to pidfile")
261
    (options, args) = parser.parse_args()
262
    daemon = CompetitorScraping(options.logfile, options.pidfile)
263
    if len(args) == 0:
264
        daemon.run()
265
    elif len(args) == 1:
266
        if 'start' == args[0]:
267
            daemon.start()
268
        elif 'stop' == args[0]:
269
            daemon.stop()
270
        elif 'restart' == args[0]:
271
            daemon.restart()
272
        else:
273
            print "Unknown command"
274
            sys.exit(2)
275
        sys.exit(0)
276
    else:
277
        print "usage: %s start|stop|restart" % sys.argv[0]
278
        sys.exit(2)