| 12256 |
kshitij.so |
1 |
from elixir import session
|
| 12268 |
kshitij.so |
2 |
from shop2020.config.client.ConfigClient import ConfigClient
|
| 12256 |
kshitij.so |
3 |
from sqlalchemy.sql import asc
|
|
|
4 |
from sqlalchemy.sql.expression import or_
|
|
|
5 |
from shop2020.utils.daemon import Daemon
|
|
|
6 |
import optparse
|
|
|
7 |
import sys
|
|
|
8 |
import mechanize
|
|
|
9 |
import time
|
|
|
10 |
from shop2020.model.v1.catalog.impl import DataService
|
| 12297 |
kshitij.so |
11 |
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
|
| 12272 |
kshitij.so |
12 |
import gc
|
| 12256 |
kshitij.so |
13 |
|
| 12268 |
kshitij.so |
14 |
config_client = ConfigClient()
|
|
|
15 |
host = config_client.get_property('staging_hostname')
|
|
|
16 |
DataService.initialize(db_hostname=host)
|
| 12256 |
kshitij.so |
17 |
|
|
|
18 |
class CompetitorScraping(Daemon):
|
|
|
19 |
def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
|
|
|
20 |
Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
|
|
|
21 |
|
|
|
22 |
def run(self):
|
|
|
23 |
start()
|
|
|
24 |
|
|
|
25 |
def start():
|
|
|
26 |
try:
|
|
|
27 |
while True:
|
|
|
28 |
requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
|
|
|
29 |
if requests ==[] or requests is None:
|
|
|
30 |
print "No new request to process, sleeeeeeping....."
|
| 12281 |
kshitij.so |
31 |
close_session()
|
|
|
32 |
collected = gc.collect()
|
| 12292 |
kshitij.so |
33 |
print locals()
|
|
|
34 |
print globals()
|
| 12281 |
kshitij.so |
35 |
print "Garbage collector: collected %d objects." % (collected)
|
| 12256 |
kshitij.so |
36 |
time.sleep(600)
|
|
|
37 |
for request in requests:
|
|
|
38 |
fetchDetails(request)
|
|
|
39 |
request.isProcessed = True
|
|
|
40 |
session.commit()
|
| 12314 |
kshitij.so |
41 |
sendMail(request)
|
| 12256 |
kshitij.so |
42 |
close_session()
|
| 12279 |
kshitij.so |
43 |
collected = gc.collect()
|
|
|
44 |
print "Garbage collector: collected %d objects." % (collected)
|
| 12292 |
kshitij.so |
45 |
print locals()
|
|
|
46 |
print globals()
|
| 12279 |
kshitij.so |
47 |
requests = []
|
| 12256 |
kshitij.so |
48 |
except Exception as e:
|
|
|
49 |
print e
|
|
|
50 |
sys.exit(2)
|
|
|
51 |
|
|
|
52 |
def fetchDetails(request):
|
| 12297 |
kshitij.so |
53 |
import threading
|
| 12256 |
kshitij.so |
54 |
items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
|
|
|
55 |
print items
|
|
|
56 |
snapdeal, flipkart, amazon =[],[],[]
|
|
|
57 |
for item in items:
|
|
|
58 |
if item.snapdealScraping:
|
|
|
59 |
snapdeal.append(item)
|
|
|
60 |
if item.flipkartScraping:
|
|
|
61 |
flipkart.append(item)
|
|
|
62 |
if item.amazonScraping:
|
|
|
63 |
amazon.append(item)
|
|
|
64 |
threads = []
|
|
|
65 |
t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
|
|
|
66 |
t1.daemon = True
|
|
|
67 |
t1.start()
|
|
|
68 |
t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
|
|
|
69 |
t2.daemon = True
|
|
|
70 |
t2.start()
|
| 15484 |
kshitij.so |
71 |
t3 = threading.Thread(target=scrapAmazon, args = (amazon))
|
| 12278 |
kshitij.so |
72 |
t3.daemon = True
|
|
|
73 |
t3.start()
|
| 12256 |
kshitij.so |
74 |
threads.append(t1)
|
|
|
75 |
threads.append(t2)
|
|
|
76 |
threads.append(t3)
|
|
|
77 |
for th in threads:
|
|
|
78 |
th.join()
|
| 12284 |
kshitij.so |
79 |
br,t1,t2,t3 =None,None,None,None
|
|
|
80 |
items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]
|
| 12256 |
kshitij.so |
81 |
|
|
|
82 |
def scrapSnapdeal(snapdealItems):
|
| 12297 |
kshitij.so |
83 |
import simplejson as json
|
|
|
84 |
import urllib2
|
|
|
85 |
from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
|
| 12256 |
kshitij.so |
86 |
for snapdealItem in snapdealItems:
|
|
|
87 |
sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
|
|
|
88 |
if sdItem is None:
|
|
|
89 |
continue
|
|
|
90 |
try:
|
| 15484 |
kshitij.so |
91 |
url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=175&sort=sellingPrice"%(sdItem.supc)
|
| 12256 |
kshitij.so |
92 |
print url
|
|
|
93 |
time.sleep(1)
|
|
|
94 |
req = urllib2.Request(url)
|
|
|
95 |
response = urllib2.urlopen(req)
|
|
|
96 |
json_input = response.read()
|
|
|
97 |
vendorInfo = json.loads(json_input)
|
|
|
98 |
lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice = (0,)*8
|
|
|
99 |
lowestSellerName = ''
|
|
|
100 |
for vendor in vendorInfo:
|
|
|
101 |
if iterator == 0:
|
|
|
102 |
lowestSellerName = vendor['vendorDisplayName']
|
|
|
103 |
try:
|
|
|
104 |
lowestSp = vendor['sellingPriceBefIntCashBack']
|
|
|
105 |
except:
|
|
|
106 |
lowestSp = vendor['sellingPrice']
|
|
|
107 |
lowestOfferPrice = vendor['sellingPrice']
|
|
|
108 |
lowestSellerInventory = vendor['buyableInventory']
|
|
|
109 |
|
|
|
110 |
if vendor['vendorDisplayName'] == 'MobilesnMore':
|
|
|
111 |
ourInventory = vendor['buyableInventory']
|
|
|
112 |
try:
|
|
|
113 |
ourSp = vendor['sellingPriceBefIntCashBack']
|
|
|
114 |
except:
|
|
|
115 |
ourSp = vendor['sellingPrice']
|
|
|
116 |
ourOfferPrice = vendor['sellingPrice']
|
|
|
117 |
iterator+=1
|
|
|
118 |
except:
|
|
|
119 |
continue
|
| 12286 |
kshitij.so |
120 |
finally:
|
| 12287 |
kshitij.so |
121 |
sdItem =None
|
| 12256 |
kshitij.so |
122 |
snapdealItem.ourSnapdealPrice = ourSp
|
|
|
123 |
snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
|
|
|
124 |
snapdealItem.ourSnapdealInventory = ourInventory
|
|
|
125 |
snapdealItem.lowestSnapdealPrice = lowestSp
|
|
|
126 |
snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
|
|
|
127 |
snapdealItem.lowestSnapdealSeller = lowestSellerName
|
| 12314 |
kshitij.so |
128 |
snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
|
|
|
129 |
snapdealItems[:]=[]
|
| 12256 |
kshitij.so |
130 |
|
|
|
131 |
def scrapFlipkart(flipkartItems):
|
| 12297 |
kshitij.so |
132 |
from shop2020.model.v1.catalog.script import FlipkartScraper
|
|
|
133 |
from operator import itemgetter
|
|
|
134 |
import requests as httpRequest
|
|
|
135 |
from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
|
| 12276 |
kshitij.so |
136 |
scraperFk = FlipkartScraper.FlipkartScraper()
|
| 12256 |
kshitij.so |
137 |
for flipkartItem in flipkartItems:
|
|
|
138 |
fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
|
|
|
139 |
if fkItem is None:
|
|
|
140 |
continue
|
|
|
141 |
try:
|
|
|
142 |
url = "http://www.flipkart.com/ps/%s"%(fkItem.flipkartSerialNumber)
|
|
|
143 |
vendorsData = scraperFk.read(url)
|
|
|
144 |
sortedVendorsData = []
|
|
|
145 |
sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))
|
|
|
146 |
lowestSellerSp, iterator, ourSp = (0,)*3
|
|
|
147 |
lowestSellerName = ''
|
|
|
148 |
for data in sortedVendorsData:
|
|
|
149 |
if iterator == 0:
|
|
|
150 |
lowestSellerName = data['sellerName']
|
|
|
151 |
lowestSellerSp = data['sellingPrice']
|
|
|
152 |
|
|
|
153 |
if data['sellerName'] == 'Saholic':
|
|
|
154 |
ourSp = data['sellingPrice']
|
|
|
155 |
|
|
|
156 |
iterator+=1
|
|
|
157 |
except:
|
|
|
158 |
continue
|
| 12286 |
kshitij.so |
159 |
finally:
|
| 12287 |
kshitij.so |
160 |
fkItem=None
|
| 12256 |
kshitij.so |
161 |
try:
|
|
|
162 |
request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
|
|
|
163 |
r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
|
|
|
164 |
print "Inventory info",r.json()
|
|
|
165 |
stock_count = int((r.json()['attributeValues'])['stock_count'])
|
|
|
166 |
except:
|
|
|
167 |
stock_count = 0
|
|
|
168 |
finally:
|
|
|
169 |
r={}
|
|
|
170 |
flipkartItem.ourFlipkartPrice = ourSp
|
|
|
171 |
flipkartItem.ourFlipkartInventory = stock_count
|
|
|
172 |
flipkartItem.lowestFlipkartPrice = lowestSellerSp
|
|
|
173 |
flipkartItem.lowestFlipkartSeller = lowestSellerName
|
| 12283 |
kshitij.so |
174 |
scraperFk = None
|
| 12314 |
kshitij.so |
175 |
flipkartItems[:] =[]
|
| 12256 |
kshitij.so |
176 |
|
|
|
177 |
|
|
|
178 |
def close_session():
|
|
|
179 |
if session.is_active:
|
|
|
180 |
print "session is active. closing it."
|
|
|
181 |
session.close()
|
|
|
182 |
|
| 15492 |
kshitij.so |
183 |
def scrapAmazon(amazonItems):
|
| 15484 |
kshitij.so |
184 |
from shop2020.model.v1.catalog.script import AmazonScraper
|
|
|
185 |
from shop2020.model.v1.catalog.impl.DataService import Amazonlisted
|
| 12277 |
kshitij.so |
186 |
print "Inside amazonitems ",amazonItems
|
|
|
187 |
print "len amazon items ",len(amazonItems)
|
|
|
188 |
time.sleep(5)
|
| 15484 |
kshitij.so |
189 |
amScraper = AmazonScraper.AmazonScraper()
|
| 12256 |
kshitij.so |
190 |
for amazonItem in amazonItems:
|
| 15484 |
kshitij.so |
191 |
amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)
|
|
|
192 |
if len(amazon_d_item.asin)==0:
|
|
|
193 |
print "No asin found for ",amazonItem.item_id
|
| 12256 |
kshitij.so |
194 |
continue
|
| 15484 |
kshitij.so |
195 |
saholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())
|
|
|
196 |
generalUrl = "http://www.amazon.in/dp/%s"%(amazonItem.item_id.strip())
|
| 12256 |
kshitij.so |
197 |
try:
|
| 15484 |
kshitij.so |
198 |
saholicPrice = (amScraper.read(saholicUrl, False))[0]
|
|
|
199 |
except:
|
|
|
200 |
saholicPrice = 0.0
|
|
|
201 |
try:
|
|
|
202 |
cheapestSeller = (amScraper.read(generalUrl, True))
|
|
|
203 |
cheapestSellerPrice = cheapestSeller[0]
|
|
|
204 |
cheapestSellerName = cheapestSeller[1]
|
|
|
205 |
except:
|
|
|
206 |
cheapestSellerPrice = 0.0
|
|
|
207 |
cheapestSellerName = ""
|
|
|
208 |
amazonItem.ourAmazonPrice = saholicPrice
|
|
|
209 |
amazonItem.lowestAmazonPrice = cheapestSellerPrice
|
|
|
210 |
amazonItem.lowestAmazonSeller = cheapestSellerName
|
| 12314 |
kshitij.so |
211 |
amazonItems[:] =[]
|
| 12256 |
kshitij.so |
212 |
|
|
|
213 |
|
| 12314 |
kshitij.so |
214 |
def sendMail(request):
|
|
|
215 |
import smtplib
|
|
|
216 |
from email.mime.text import MIMEText
|
|
|
217 |
from email.mime.multipart import MIMEMultipart
|
|
|
218 |
mailServer = smtplib.SMTP("smtp.gmail.com", 587)
|
|
|
219 |
mailServer.ehlo()
|
|
|
220 |
mailServer.starttls()
|
|
|
221 |
mailServer.ehlo()
|
|
|
222 |
recipients = []
|
|
|
223 |
recipients.append(request.user)
|
|
|
224 |
message = "Your Request has been processed.Visit dashboard to check & download report"
|
|
|
225 |
msg = MIMEMultipart()
|
| 12329 |
kshitij.so |
226 |
msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
|
| 12314 |
kshitij.so |
227 |
msg['From'] = ""
|
|
|
228 |
msg['To'] = ",".join(recipients)
|
|
|
229 |
msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
|
|
|
230 |
html_msg = MIMEText(message, 'html')
|
|
|
231 |
msg.attach(html_msg)
|
|
|
232 |
try:
|
|
|
233 |
mailServer.login("build@shop2020.in", "cafe@nes")
|
|
|
234 |
#mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
|
|
|
235 |
mailServer.sendmail("cafe@nes", recipients, msg.as_string())
|
|
|
236 |
except Exception as e:
|
|
|
237 |
print e
|
|
|
238 |
print "Unable to send mail.Lets try with local SMTP."
|
|
|
239 |
smtpServer = smtplib.SMTP('localhost')
|
|
|
240 |
smtpServer.set_debuglevel(1)
|
|
|
241 |
sender = 'build@shop2020.in'
|
|
|
242 |
try:
|
|
|
243 |
smtpServer.sendmail(sender, recipients, msg.as_string())
|
|
|
244 |
print "Successfully sent email"
|
|
|
245 |
except:
|
|
|
246 |
print "Error: unable to send email."
|
|
|
247 |
|
| 12256 |
kshitij.so |
248 |
|
|
|
249 |
|
|
|
250 |
if __name__ == "__main__":
|
|
|
251 |
parser = optparse.OptionParser()
|
|
|
252 |
parser.add_option("-l", "--logfile", dest="logfile",
|
|
|
253 |
type="string",
|
|
|
254 |
help="Log all output to LOG_FILE",
|
|
|
255 |
)
|
|
|
256 |
parser.add_option("-i", "--pidfile", dest="pidfile",
|
|
|
257 |
type="string",
|
|
|
258 |
help="Write the PID to pidfile")
|
|
|
259 |
(options, args) = parser.parse_args()
|
|
|
260 |
daemon = CompetitorScraping(options.logfile, options.pidfile)
|
|
|
261 |
if len(args) == 0:
|
|
|
262 |
daemon.run()
|
|
|
263 |
elif len(args) == 1:
|
|
|
264 |
if 'start' == args[0]:
|
|
|
265 |
daemon.start()
|
|
|
266 |
elif 'stop' == args[0]:
|
|
|
267 |
daemon.stop()
|
|
|
268 |
elif 'restart' == args[0]:
|
|
|
269 |
daemon.restart()
|
|
|
270 |
else:
|
|
|
271 |
print "Unknown command"
|
|
|
272 |
sys.exit(2)
|
|
|
273 |
sys.exit(0)
|
|
|
274 |
else:
|
|
|
275 |
print "usage: %s start|stop|restart" % sys.argv[0]
|
|
|
276 |
sys.exit(2)
|