| 12256 |
kshitij.so |
1 |
from elixir import session
|
| 12268 |
kshitij.so |
2 |
from shop2020.config.client.ConfigClient import ConfigClient
|
| 12256 |
kshitij.so |
3 |
from sqlalchemy.sql import asc
|
|
|
4 |
from sqlalchemy.sql.expression import or_
|
|
|
5 |
from shop2020.utils.daemon import Daemon
|
|
|
6 |
import optparse
|
|
|
7 |
import sys
|
|
|
8 |
import mechanize
|
|
|
9 |
import time
|
|
|
10 |
from shop2020.model.v1.catalog.impl import DataService
|
| 12297 |
kshitij.so |
11 |
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
|
| 12272 |
kshitij.so |
12 |
import gc
|
| 12256 |
kshitij.so |
13 |
|
| 12268 |
kshitij.so |
14 |
config_client = ConfigClient()
|
|
|
15 |
host = config_client.get_property('staging_hostname')
|
|
|
16 |
DataService.initialize(db_hostname=host)
|
| 12256 |
kshitij.so |
17 |
|
| 15498 |
kshitij.so |
18 |
headers = {
|
|
|
19 |
'User-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
|
|
|
20 |
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
|
21 |
'Accept-Language' : 'en-US,en;q=0.8',
|
|
|
22 |
'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
|
|
|
23 |
}
|
|
|
24 |
|
| 12256 |
kshitij.so |
25 |
class CompetitorScraping(Daemon):
|
|
|
26 |
def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
|
|
|
27 |
Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
|
|
|
28 |
|
|
|
29 |
def run(self):
|
|
|
30 |
start()
|
|
|
31 |
|
|
|
32 |
def start():
|
|
|
33 |
try:
|
|
|
34 |
while True:
|
|
|
35 |
requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
|
|
|
36 |
if requests ==[] or requests is None:
|
|
|
37 |
print "No new request to process, sleeeeeeping....."
|
| 12281 |
kshitij.so |
38 |
close_session()
|
|
|
39 |
collected = gc.collect()
|
| 12292 |
kshitij.so |
40 |
print locals()
|
|
|
41 |
print globals()
|
| 12281 |
kshitij.so |
42 |
print "Garbage collector: collected %d objects." % (collected)
|
| 12256 |
kshitij.so |
43 |
time.sleep(600)
|
|
|
44 |
for request in requests:
|
|
|
45 |
fetchDetails(request)
|
|
|
46 |
request.isProcessed = True
|
|
|
47 |
session.commit()
|
| 12314 |
kshitij.so |
48 |
sendMail(request)
|
| 12256 |
kshitij.so |
49 |
close_session()
|
| 12279 |
kshitij.so |
50 |
collected = gc.collect()
|
|
|
51 |
print "Garbage collector: collected %d objects." % (collected)
|
| 12292 |
kshitij.so |
52 |
print locals()
|
|
|
53 |
print globals()
|
| 12279 |
kshitij.so |
54 |
requests = []
|
| 12256 |
kshitij.so |
55 |
except Exception as e:
|
|
|
56 |
print e
|
|
|
57 |
sys.exit(2)
|
|
|
58 |
|
|
|
59 |
def fetchDetails(request):
|
| 12297 |
kshitij.so |
60 |
import threading
|
| 12256 |
kshitij.so |
61 |
items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
|
|
|
62 |
print items
|
|
|
63 |
snapdeal, flipkart, amazon =[],[],[]
|
|
|
64 |
for item in items:
|
|
|
65 |
if item.snapdealScraping:
|
|
|
66 |
snapdeal.append(item)
|
|
|
67 |
if item.flipkartScraping:
|
|
|
68 |
flipkart.append(item)
|
|
|
69 |
if item.amazonScraping:
|
|
|
70 |
amazon.append(item)
|
|
|
71 |
threads = []
|
|
|
72 |
t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
|
|
|
73 |
t1.daemon = True
|
|
|
74 |
t1.start()
|
|
|
75 |
t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
|
|
|
76 |
t2.daemon = True
|
|
|
77 |
t2.start()
|
| 15493 |
kshitij.so |
78 |
t3 = threading.Thread(target=scrapAmazon, args = (amazon,))
|
| 12278 |
kshitij.so |
79 |
t3.daemon = True
|
|
|
80 |
t3.start()
|
| 12256 |
kshitij.so |
81 |
threads.append(t1)
|
|
|
82 |
threads.append(t2)
|
|
|
83 |
threads.append(t3)
|
|
|
84 |
for th in threads:
|
|
|
85 |
th.join()
|
| 12284 |
kshitij.so |
86 |
br,t1,t2,t3 =None,None,None,None
|
|
|
87 |
items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]
|
| 12256 |
kshitij.so |
88 |
|
|
|
89 |
def scrapSnapdeal(snapdealItems):
|
| 12297 |
kshitij.so |
90 |
import simplejson as json
|
|
|
91 |
import urllib2
|
|
|
92 |
from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
|
| 12256 |
kshitij.so |
93 |
for snapdealItem in snapdealItems:
|
|
|
94 |
sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
|
|
|
95 |
if sdItem is None:
|
|
|
96 |
continue
|
|
|
97 |
try:
|
| 15484 |
kshitij.so |
98 |
url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=175&sort=sellingPrice"%(sdItem.supc)
|
| 12256 |
kshitij.so |
99 |
print url
|
|
|
100 |
time.sleep(1)
|
| 15498 |
kshitij.so |
101 |
req = urllib2.Request(url,headers=headers)
|
| 12256 |
kshitij.so |
102 |
response = urllib2.urlopen(req)
|
|
|
103 |
json_input = response.read()
|
|
|
104 |
vendorInfo = json.loads(json_input)
|
| 15496 |
kshitij.so |
105 |
print vendorInfo
|
| 12256 |
kshitij.so |
106 |
lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice = (0,)*8
|
|
|
107 |
lowestSellerName = ''
|
|
|
108 |
for vendor in vendorInfo:
|
|
|
109 |
if iterator == 0:
|
|
|
110 |
lowestSellerName = vendor['vendorDisplayName']
|
|
|
111 |
try:
|
|
|
112 |
lowestSp = vendor['sellingPriceBefIntCashBack']
|
|
|
113 |
except:
|
|
|
114 |
lowestSp = vendor['sellingPrice']
|
|
|
115 |
lowestOfferPrice = vendor['sellingPrice']
|
|
|
116 |
lowestSellerInventory = vendor['buyableInventory']
|
|
|
117 |
|
|
|
118 |
if vendor['vendorDisplayName'] == 'MobilesnMore':
|
|
|
119 |
ourInventory = vendor['buyableInventory']
|
|
|
120 |
try:
|
|
|
121 |
ourSp = vendor['sellingPriceBefIntCashBack']
|
|
|
122 |
except:
|
|
|
123 |
ourSp = vendor['sellingPrice']
|
|
|
124 |
ourOfferPrice = vendor['sellingPrice']
|
|
|
125 |
iterator+=1
|
| 15497 |
kshitij.so |
126 |
except Exception as e:
|
|
|
127 |
import traceback
|
|
|
128 |
print traceback.print_exc()
|
| 12256 |
kshitij.so |
129 |
continue
|
| 12286 |
kshitij.so |
130 |
finally:
|
| 12287 |
kshitij.so |
131 |
sdItem =None
|
| 15496 |
kshitij.so |
132 |
print "Item id ",sdItem.item_id
|
|
|
133 |
print ourSp
|
|
|
134 |
print ourOfferPrice
|
|
|
135 |
print ourInventory
|
| 12256 |
kshitij.so |
136 |
snapdealItem.ourSnapdealPrice = ourSp
|
|
|
137 |
snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
|
|
|
138 |
snapdealItem.ourSnapdealInventory = ourInventory
|
|
|
139 |
snapdealItem.lowestSnapdealPrice = lowestSp
|
|
|
140 |
snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
|
|
|
141 |
snapdealItem.lowestSnapdealSeller = lowestSellerName
|
| 12314 |
kshitij.so |
142 |
snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
|
|
|
143 |
snapdealItems[:]=[]
|
| 12256 |
kshitij.so |
144 |
|
|
|
145 |
def scrapFlipkart(flipkartItems):
|
| 12297 |
kshitij.so |
146 |
from shop2020.model.v1.catalog.script import FlipkartScraper
|
|
|
147 |
from operator import itemgetter
|
|
|
148 |
import requests as httpRequest
|
|
|
149 |
from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
|
| 12276 |
kshitij.so |
150 |
scraperFk = FlipkartScraper.FlipkartScraper()
|
| 12256 |
kshitij.so |
151 |
for flipkartItem in flipkartItems:
|
|
|
152 |
fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
|
|
|
153 |
if fkItem is None:
|
|
|
154 |
continue
|
|
|
155 |
try:
|
|
|
156 |
url = "http://www.flipkart.com/ps/%s"%(fkItem.flipkartSerialNumber)
|
|
|
157 |
vendorsData = scraperFk.read(url)
|
|
|
158 |
sortedVendorsData = []
|
|
|
159 |
sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))
|
|
|
160 |
lowestSellerSp, iterator, ourSp = (0,)*3
|
|
|
161 |
lowestSellerName = ''
|
|
|
162 |
for data in sortedVendorsData:
|
|
|
163 |
if iterator == 0:
|
|
|
164 |
lowestSellerName = data['sellerName']
|
|
|
165 |
lowestSellerSp = data['sellingPrice']
|
|
|
166 |
|
|
|
167 |
if data['sellerName'] == 'Saholic':
|
|
|
168 |
ourSp = data['sellingPrice']
|
|
|
169 |
|
|
|
170 |
iterator+=1
|
|
|
171 |
except:
|
|
|
172 |
continue
|
| 12286 |
kshitij.so |
173 |
finally:
|
| 12287 |
kshitij.so |
174 |
fkItem=None
|
| 12256 |
kshitij.so |
175 |
try:
|
|
|
176 |
request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
|
|
|
177 |
r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
|
|
|
178 |
print "Inventory info",r.json()
|
|
|
179 |
stock_count = int((r.json()['attributeValues'])['stock_count'])
|
|
|
180 |
except:
|
|
|
181 |
stock_count = 0
|
|
|
182 |
finally:
|
|
|
183 |
r={}
|
|
|
184 |
flipkartItem.ourFlipkartPrice = ourSp
|
|
|
185 |
flipkartItem.ourFlipkartInventory = stock_count
|
|
|
186 |
flipkartItem.lowestFlipkartPrice = lowestSellerSp
|
|
|
187 |
flipkartItem.lowestFlipkartSeller = lowestSellerName
|
| 12283 |
kshitij.so |
188 |
scraperFk = None
|
| 12314 |
kshitij.so |
189 |
flipkartItems[:] =[]
|
| 12256 |
kshitij.so |
190 |
|
|
|
191 |
|
|
|
192 |
def close_session():
|
|
|
193 |
if session.is_active:
|
|
|
194 |
print "session is active. closing it."
|
|
|
195 |
session.close()
|
|
|
196 |
|
| 15492 |
kshitij.so |
197 |
def scrapAmazon(amazonItems):
|
| 15484 |
kshitij.so |
198 |
from shop2020.model.v1.catalog.script import AmazonScraper
|
|
|
199 |
from shop2020.model.v1.catalog.impl.DataService import Amazonlisted
|
| 12277 |
kshitij.so |
200 |
print "Inside amazonitems ",amazonItems
|
|
|
201 |
print "len amazon items ",len(amazonItems)
|
|
|
202 |
time.sleep(5)
|
| 15484 |
kshitij.so |
203 |
amScraper = AmazonScraper.AmazonScraper()
|
| 12256 |
kshitij.so |
204 |
for amazonItem in amazonItems:
|
| 15484 |
kshitij.so |
205 |
amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)
|
|
|
206 |
if len(amazon_d_item.asin)==0:
|
|
|
207 |
print "No asin found for ",amazonItem.item_id
|
| 12256 |
kshitij.so |
208 |
continue
|
| 15484 |
kshitij.so |
209 |
saholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())
|
| 15495 |
kshitij.so |
210 |
generalUrl = "www.amazon.in/gp/offer-listing/%s"%(amazon_d_item.asin.strip())
|
| 12256 |
kshitij.so |
211 |
try:
|
| 15484 |
kshitij.so |
212 |
saholicPrice = (amScraper.read(saholicUrl, False))[0]
|
|
|
213 |
except:
|
|
|
214 |
saholicPrice = 0.0
|
|
|
215 |
try:
|
|
|
216 |
cheapestSeller = (amScraper.read(generalUrl, True))
|
|
|
217 |
cheapestSellerPrice = cheapestSeller[0]
|
|
|
218 |
cheapestSellerName = cheapestSeller[1]
|
|
|
219 |
except:
|
|
|
220 |
cheapestSellerPrice = 0.0
|
|
|
221 |
cheapestSellerName = ""
|
|
|
222 |
amazonItem.ourAmazonPrice = saholicPrice
|
|
|
223 |
amazonItem.lowestAmazonPrice = cheapestSellerPrice
|
|
|
224 |
amazonItem.lowestAmazonSeller = cheapestSellerName
|
| 12314 |
kshitij.so |
225 |
amazonItems[:] =[]
|
| 12256 |
kshitij.so |
226 |
|
|
|
227 |
|
| 12314 |
kshitij.so |
228 |
def sendMail(request):
|
|
|
229 |
import smtplib
|
|
|
230 |
from email.mime.text import MIMEText
|
|
|
231 |
from email.mime.multipart import MIMEMultipart
|
|
|
232 |
mailServer = smtplib.SMTP("smtp.gmail.com", 587)
|
|
|
233 |
mailServer.ehlo()
|
|
|
234 |
mailServer.starttls()
|
|
|
235 |
mailServer.ehlo()
|
|
|
236 |
recipients = []
|
|
|
237 |
recipients.append(request.user)
|
|
|
238 |
message = "Your Request has been processed.Visit dashboard to check & download report"
|
|
|
239 |
msg = MIMEMultipart()
|
| 12329 |
kshitij.so |
240 |
msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
|
| 12314 |
kshitij.so |
241 |
msg['From'] = ""
|
|
|
242 |
msg['To'] = ",".join(recipients)
|
|
|
243 |
msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
|
|
|
244 |
html_msg = MIMEText(message, 'html')
|
|
|
245 |
msg.attach(html_msg)
|
|
|
246 |
try:
|
|
|
247 |
mailServer.login("build@shop2020.in", "cafe@nes")
|
|
|
248 |
#mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
|
|
|
249 |
mailServer.sendmail("cafe@nes", recipients, msg.as_string())
|
|
|
250 |
except Exception as e:
|
|
|
251 |
print e
|
|
|
252 |
print "Unable to send mail.Lets try with local SMTP."
|
|
|
253 |
smtpServer = smtplib.SMTP('localhost')
|
|
|
254 |
smtpServer.set_debuglevel(1)
|
|
|
255 |
sender = 'build@shop2020.in'
|
|
|
256 |
try:
|
|
|
257 |
smtpServer.sendmail(sender, recipients, msg.as_string())
|
|
|
258 |
print "Successfully sent email"
|
|
|
259 |
except:
|
|
|
260 |
print "Error: unable to send email."
|
|
|
261 |
|
| 12256 |
kshitij.so |
262 |
|
|
|
263 |
|
|
|
264 |
if __name__ == "__main__":
|
|
|
265 |
parser = optparse.OptionParser()
|
|
|
266 |
parser.add_option("-l", "--logfile", dest="logfile",
|
|
|
267 |
type="string",
|
|
|
268 |
help="Log all output to LOG_FILE",
|
|
|
269 |
)
|
|
|
270 |
parser.add_option("-i", "--pidfile", dest="pidfile",
|
|
|
271 |
type="string",
|
|
|
272 |
help="Write the PID to pidfile")
|
|
|
273 |
(options, args) = parser.parse_args()
|
|
|
274 |
daemon = CompetitorScraping(options.logfile, options.pidfile)
|
|
|
275 |
if len(args) == 0:
|
|
|
276 |
daemon.run()
|
|
|
277 |
elif len(args) == 1:
|
|
|
278 |
if 'start' == args[0]:
|
|
|
279 |
daemon.start()
|
|
|
280 |
elif 'stop' == args[0]:
|
|
|
281 |
daemon.stop()
|
|
|
282 |
elif 'restart' == args[0]:
|
|
|
283 |
daemon.restart()
|
|
|
284 |
else:
|
|
|
285 |
print "Unknown command"
|
|
|
286 |
sys.exit(2)
|
|
|
287 |
sys.exit(0)
|
|
|
288 |
else:
|
|
|
289 |
print "usage: %s start|stop|restart" % sys.argv[0]
|
|
|
290 |
sys.exit(2)
|