| 12256 |
kshitij.so |
1 |
from elixir import session
|
| 12268 |
kshitij.so |
2 |
from shop2020.config.client.ConfigClient import ConfigClient
|
| 12256 |
kshitij.so |
3 |
from sqlalchemy.sql import asc
|
|
|
4 |
from sqlalchemy.sql.expression import or_
|
|
|
5 |
from shop2020.utils.daemon import Daemon
|
|
|
6 |
import optparse
|
|
|
7 |
import sys
|
|
|
8 |
import mechanize
|
|
|
9 |
import time
|
|
|
10 |
from shop2020.model.v1.catalog.impl import DataService
|
| 12297 |
kshitij.so |
11 |
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
|
| 12272 |
kshitij.so |
12 |
import gc
|
| 12256 |
kshitij.so |
13 |
|
| 12268 |
kshitij.so |
14 |
config_client = ConfigClient()
|
|
|
15 |
host = config_client.get_property('staging_hostname')
|
|
|
16 |
DataService.initialize(db_hostname=host)
|
| 12256 |
kshitij.so |
17 |
|
|
|
18 |
class CompetitorScraping(Daemon):
|
|
|
19 |
def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
|
|
|
20 |
Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
|
|
|
21 |
|
|
|
22 |
def run(self):
|
|
|
23 |
start()
|
|
|
24 |
|
|
|
25 |
def start():
|
|
|
26 |
try:
|
|
|
27 |
while True:
|
|
|
28 |
requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
|
|
|
29 |
if requests ==[] or requests is None:
|
|
|
30 |
print "No new request to process, sleeeeeeping....."
|
| 12281 |
kshitij.so |
31 |
close_session()
|
|
|
32 |
collected = gc.collect()
|
| 12292 |
kshitij.so |
33 |
print locals()
|
|
|
34 |
print globals()
|
| 12281 |
kshitij.so |
35 |
print "Garbage collector: collected %d objects." % (collected)
|
| 12256 |
kshitij.so |
36 |
time.sleep(600)
|
|
|
37 |
for request in requests:
|
|
|
38 |
fetchDetails(request)
|
|
|
39 |
request.isProcessed = True
|
|
|
40 |
session.commit()
|
| 12314 |
kshitij.so |
41 |
sendMail(request)
|
| 12256 |
kshitij.so |
42 |
close_session()
|
| 12279 |
kshitij.so |
43 |
collected = gc.collect()
|
|
|
44 |
print "Garbage collector: collected %d objects." % (collected)
|
| 12292 |
kshitij.so |
45 |
print locals()
|
|
|
46 |
print globals()
|
| 12279 |
kshitij.so |
47 |
requests = []
|
| 12256 |
kshitij.so |
48 |
except Exception as e:
|
|
|
49 |
print e
|
|
|
50 |
sys.exit(2)
|
|
|
51 |
|
|
|
52 |
def fetchDetails(request):
|
| 12297 |
kshitij.so |
53 |
import threading
|
| 12256 |
kshitij.so |
54 |
items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
|
|
|
55 |
print items
|
|
|
56 |
snapdeal, flipkart, amazon =[],[],[]
|
|
|
57 |
for item in items:
|
|
|
58 |
if item.snapdealScraping:
|
|
|
59 |
snapdeal.append(item)
|
|
|
60 |
if item.flipkartScraping:
|
|
|
61 |
flipkart.append(item)
|
|
|
62 |
if item.amazonScraping:
|
|
|
63 |
amazon.append(item)
|
|
|
64 |
threads = []
|
|
|
65 |
t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
|
|
|
66 |
t1.daemon = True
|
|
|
67 |
t1.start()
|
|
|
68 |
t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
|
|
|
69 |
t2.daemon = True
|
|
|
70 |
t2.start()
|
| 15493 |
kshitij.so |
71 |
t3 = threading.Thread(target=scrapAmazon, args = (amazon,))
|
| 12278 |
kshitij.so |
72 |
t3.daemon = True
|
|
|
73 |
t3.start()
|
| 12256 |
kshitij.so |
74 |
threads.append(t1)
|
|
|
75 |
threads.append(t2)
|
|
|
76 |
threads.append(t3)
|
|
|
77 |
for th in threads:
|
|
|
78 |
th.join()
|
| 12284 |
kshitij.so |
79 |
br,t1,t2,t3 =None,None,None,None
|
|
|
80 |
items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]
|
| 12256 |
kshitij.so |
81 |
|
|
|
82 |
def scrapSnapdeal(snapdealItems):
|
| 12297 |
kshitij.so |
83 |
import simplejson as json
|
|
|
84 |
import urllib2
|
|
|
85 |
from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
|
| 12256 |
kshitij.so |
86 |
for snapdealItem in snapdealItems:
|
|
|
87 |
sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
|
|
|
88 |
if sdItem is None:
|
|
|
89 |
continue
|
|
|
90 |
try:
|
| 15484 |
kshitij.so |
91 |
url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=175&sort=sellingPrice"%(sdItem.supc)
|
| 12256 |
kshitij.so |
92 |
print url
|
|
|
93 |
time.sleep(1)
|
|
|
94 |
req = urllib2.Request(url)
|
|
|
95 |
response = urllib2.urlopen(req)
|
|
|
96 |
json_input = response.read()
|
|
|
97 |
vendorInfo = json.loads(json_input)
|
| 15496 |
kshitij.so |
98 |
print vendorInfo
|
| 12256 |
kshitij.so |
99 |
lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice = (0,)*8
|
|
|
100 |
lowestSellerName = ''
|
|
|
101 |
for vendor in vendorInfo:
|
|
|
102 |
if iterator == 0:
|
|
|
103 |
lowestSellerName = vendor['vendorDisplayName']
|
|
|
104 |
try:
|
|
|
105 |
lowestSp = vendor['sellingPriceBefIntCashBack']
|
|
|
106 |
except:
|
|
|
107 |
lowestSp = vendor['sellingPrice']
|
|
|
108 |
lowestOfferPrice = vendor['sellingPrice']
|
|
|
109 |
lowestSellerInventory = vendor['buyableInventory']
|
|
|
110 |
|
|
|
111 |
if vendor['vendorDisplayName'] == 'MobilesnMore':
|
|
|
112 |
ourInventory = vendor['buyableInventory']
|
|
|
113 |
try:
|
|
|
114 |
ourSp = vendor['sellingPriceBefIntCashBack']
|
|
|
115 |
except:
|
|
|
116 |
ourSp = vendor['sellingPrice']
|
|
|
117 |
ourOfferPrice = vendor['sellingPrice']
|
|
|
118 |
iterator+=1
|
| 15497 |
kshitij.so |
119 |
except Exception as e:
|
|
|
120 |
import traceback
|
|
|
121 |
print traceback.print_exc()
|
| 12256 |
kshitij.so |
122 |
continue
|
| 12286 |
kshitij.so |
123 |
finally:
|
| 12287 |
kshitij.so |
124 |
sdItem =None
|
| 15496 |
kshitij.so |
125 |
print "Item id ",sdItem.item_id
|
|
|
126 |
print ourSp
|
|
|
127 |
print ourOfferPrice
|
|
|
128 |
print ourInventory
|
| 12256 |
kshitij.so |
129 |
snapdealItem.ourSnapdealPrice = ourSp
|
|
|
130 |
snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
|
|
|
131 |
snapdealItem.ourSnapdealInventory = ourInventory
|
|
|
132 |
snapdealItem.lowestSnapdealPrice = lowestSp
|
|
|
133 |
snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
|
|
|
134 |
snapdealItem.lowestSnapdealSeller = lowestSellerName
|
| 12314 |
kshitij.so |
135 |
snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
|
|
|
136 |
snapdealItems[:]=[]
|
| 12256 |
kshitij.so |
137 |
|
|
|
138 |
def scrapFlipkart(flipkartItems):
|
| 12297 |
kshitij.so |
139 |
from shop2020.model.v1.catalog.script import FlipkartScraper
|
|
|
140 |
from operator import itemgetter
|
|
|
141 |
import requests as httpRequest
|
|
|
142 |
from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
|
| 12276 |
kshitij.so |
143 |
scraperFk = FlipkartScraper.FlipkartScraper()
|
| 12256 |
kshitij.so |
144 |
for flipkartItem in flipkartItems:
|
|
|
145 |
fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
|
|
|
146 |
if fkItem is None:
|
|
|
147 |
continue
|
|
|
148 |
try:
|
|
|
149 |
url = "http://www.flipkart.com/ps/%s"%(fkItem.flipkartSerialNumber)
|
|
|
150 |
vendorsData = scraperFk.read(url)
|
|
|
151 |
sortedVendorsData = []
|
|
|
152 |
sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))
|
|
|
153 |
lowestSellerSp, iterator, ourSp = (0,)*3
|
|
|
154 |
lowestSellerName = ''
|
|
|
155 |
for data in sortedVendorsData:
|
|
|
156 |
if iterator == 0:
|
|
|
157 |
lowestSellerName = data['sellerName']
|
|
|
158 |
lowestSellerSp = data['sellingPrice']
|
|
|
159 |
|
|
|
160 |
if data['sellerName'] == 'Saholic':
|
|
|
161 |
ourSp = data['sellingPrice']
|
|
|
162 |
|
|
|
163 |
iterator+=1
|
|
|
164 |
except:
|
|
|
165 |
continue
|
| 12286 |
kshitij.so |
166 |
finally:
|
| 12287 |
kshitij.so |
167 |
fkItem=None
|
| 12256 |
kshitij.so |
168 |
try:
|
|
|
169 |
request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
|
|
|
170 |
r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
|
|
|
171 |
print "Inventory info",r.json()
|
|
|
172 |
stock_count = int((r.json()['attributeValues'])['stock_count'])
|
|
|
173 |
except:
|
|
|
174 |
stock_count = 0
|
|
|
175 |
finally:
|
|
|
176 |
r={}
|
|
|
177 |
flipkartItem.ourFlipkartPrice = ourSp
|
|
|
178 |
flipkartItem.ourFlipkartInventory = stock_count
|
|
|
179 |
flipkartItem.lowestFlipkartPrice = lowestSellerSp
|
|
|
180 |
flipkartItem.lowestFlipkartSeller = lowestSellerName
|
| 12283 |
kshitij.so |
181 |
scraperFk = None
|
| 12314 |
kshitij.so |
182 |
flipkartItems[:] =[]
|
| 12256 |
kshitij.so |
183 |
|
|
|
184 |
|
|
|
185 |
def close_session():
|
|
|
186 |
if session.is_active:
|
|
|
187 |
print "session is active. closing it."
|
|
|
188 |
session.close()
|
|
|
189 |
|
| 15492 |
kshitij.so |
190 |
def scrapAmazon(amazonItems):
|
| 15484 |
kshitij.so |
191 |
from shop2020.model.v1.catalog.script import AmazonScraper
|
|
|
192 |
from shop2020.model.v1.catalog.impl.DataService import Amazonlisted
|
| 12277 |
kshitij.so |
193 |
print "Inside amazonitems ",amazonItems
|
|
|
194 |
print "len amazon items ",len(amazonItems)
|
|
|
195 |
time.sleep(5)
|
| 15484 |
kshitij.so |
196 |
amScraper = AmazonScraper.AmazonScraper()
|
| 12256 |
kshitij.so |
197 |
for amazonItem in amazonItems:
|
| 15484 |
kshitij.so |
198 |
amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)
|
|
|
199 |
if len(amazon_d_item.asin)==0:
|
|
|
200 |
print "No asin found for ",amazonItem.item_id
|
| 12256 |
kshitij.so |
201 |
continue
|
| 15484 |
kshitij.so |
202 |
saholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())
|
| 15495 |
kshitij.so |
203 |
generalUrl = "www.amazon.in/gp/offer-listing/%s"%(amazon_d_item.asin.strip())
|
| 12256 |
kshitij.so |
204 |
try:
|
| 15484 |
kshitij.so |
205 |
saholicPrice = (amScraper.read(saholicUrl, False))[0]
|
|
|
206 |
except:
|
|
|
207 |
saholicPrice = 0.0
|
|
|
208 |
try:
|
|
|
209 |
cheapestSeller = (amScraper.read(generalUrl, True))
|
|
|
210 |
cheapestSellerPrice = cheapestSeller[0]
|
|
|
211 |
cheapestSellerName = cheapestSeller[1]
|
|
|
212 |
except:
|
|
|
213 |
cheapestSellerPrice = 0.0
|
|
|
214 |
cheapestSellerName = ""
|
|
|
215 |
amazonItem.ourAmazonPrice = saholicPrice
|
|
|
216 |
amazonItem.lowestAmazonPrice = cheapestSellerPrice
|
|
|
217 |
amazonItem.lowestAmazonSeller = cheapestSellerName
|
| 12314 |
kshitij.so |
218 |
amazonItems[:] =[]
|
| 12256 |
kshitij.so |
219 |
|
|
|
220 |
|
| 12314 |
kshitij.so |
221 |
def sendMail(request):
|
|
|
222 |
import smtplib
|
|
|
223 |
from email.mime.text import MIMEText
|
|
|
224 |
from email.mime.multipart import MIMEMultipart
|
|
|
225 |
mailServer = smtplib.SMTP("smtp.gmail.com", 587)
|
|
|
226 |
mailServer.ehlo()
|
|
|
227 |
mailServer.starttls()
|
|
|
228 |
mailServer.ehlo()
|
|
|
229 |
recipients = []
|
|
|
230 |
recipients.append(request.user)
|
|
|
231 |
message = "Your Request has been processed.Visit dashboard to check & download report"
|
|
|
232 |
msg = MIMEMultipart()
|
| 12329 |
kshitij.so |
233 |
msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
|
| 12314 |
kshitij.so |
234 |
msg['From'] = ""
|
|
|
235 |
msg['To'] = ",".join(recipients)
|
|
|
236 |
msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
|
|
|
237 |
html_msg = MIMEText(message, 'html')
|
|
|
238 |
msg.attach(html_msg)
|
|
|
239 |
try:
|
|
|
240 |
mailServer.login("build@shop2020.in", "cafe@nes")
|
|
|
241 |
#mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
|
|
|
242 |
mailServer.sendmail("cafe@nes", recipients, msg.as_string())
|
|
|
243 |
except Exception as e:
|
|
|
244 |
print e
|
|
|
245 |
print "Unable to send mail.Lets try with local SMTP."
|
|
|
246 |
smtpServer = smtplib.SMTP('localhost')
|
|
|
247 |
smtpServer.set_debuglevel(1)
|
|
|
248 |
sender = 'build@shop2020.in'
|
|
|
249 |
try:
|
|
|
250 |
smtpServer.sendmail(sender, recipients, msg.as_string())
|
|
|
251 |
print "Successfully sent email"
|
|
|
252 |
except:
|
|
|
253 |
print "Error: unable to send email."
|
|
|
254 |
|
| 12256 |
kshitij.so |
255 |
|
|
|
256 |
|
|
|
257 |
if __name__ == "__main__":
|
|
|
258 |
parser = optparse.OptionParser()
|
|
|
259 |
parser.add_option("-l", "--logfile", dest="logfile",
|
|
|
260 |
type="string",
|
|
|
261 |
help="Log all output to LOG_FILE",
|
|
|
262 |
)
|
|
|
263 |
parser.add_option("-i", "--pidfile", dest="pidfile",
|
|
|
264 |
type="string",
|
|
|
265 |
help="Write the PID to pidfile")
|
|
|
266 |
(options, args) = parser.parse_args()
|
|
|
267 |
daemon = CompetitorScraping(options.logfile, options.pidfile)
|
|
|
268 |
if len(args) == 0:
|
|
|
269 |
daemon.run()
|
|
|
270 |
elif len(args) == 1:
|
|
|
271 |
if 'start' == args[0]:
|
|
|
272 |
daemon.start()
|
|
|
273 |
elif 'stop' == args[0]:
|
|
|
274 |
daemon.stop()
|
|
|
275 |
elif 'restart' == args[0]:
|
|
|
276 |
daemon.restart()
|
|
|
277 |
else:
|
|
|
278 |
print "Unknown command"
|
|
|
279 |
sys.exit(2)
|
|
|
280 |
sys.exit(0)
|
|
|
281 |
else:
|
|
|
282 |
print "usage: %s start|stop|restart" % sys.argv[0]
|
|
|
283 |
sys.exit(2)
|