| 12256 |
kshitij.so |
1 |
from elixir import session
|
| 12268 |
kshitij.so |
2 |
from shop2020.config.client.ConfigClient import ConfigClient
|
| 12256 |
kshitij.so |
3 |
from sqlalchemy.sql import asc
|
|
|
4 |
from sqlalchemy.sql.expression import or_
|
|
|
5 |
from shop2020.utils.daemon import Daemon
|
|
|
6 |
import optparse
|
|
|
7 |
import sys
|
|
|
8 |
import mechanize
|
|
|
9 |
import time
|
|
|
10 |
from shop2020.model.v1.catalog.impl import DataService
|
| 12297 |
kshitij.so |
11 |
from shop2020.model.v1.catalog.impl.DataService import CompetitorPricing, CompetitorPricingRequest
|
| 12272 |
kshitij.so |
12 |
import gc
|
| 15825 |
kshitij.so |
13 |
from operator import itemgetter
|
| 12256 |
kshitij.so |
14 |
|
| 12268 |
kshitij.so |
15 |
config_client = ConfigClient()
|
|
|
16 |
host = config_client.get_property('staging_hostname')
|
|
|
17 |
DataService.initialize(db_hostname=host)
|
| 12256 |
kshitij.so |
18 |
|
| 15498 |
kshitij.so |
19 |
headers = {
|
|
|
20 |
'User-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
|
|
|
21 |
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
|
22 |
'Accept-Language' : 'en-US,en;q=0.8',
|
|
|
23 |
'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
|
|
|
24 |
}
|
|
|
25 |
|
| 12256 |
kshitij.so |
26 |
class CompetitorScraping(Daemon):
|
|
|
27 |
def __init__(self, logfile='/var/log/services/competitorScraping.log', pidfile='/var/run/competitor-scraper.pid'):
|
|
|
28 |
Daemon.__init__(self, pidfile, stdout=logfile, stderr=logfile)
|
|
|
29 |
|
|
|
30 |
def run(self):
|
|
|
31 |
start()
|
|
|
32 |
|
|
|
33 |
def start():
|
|
|
34 |
try:
|
|
|
35 |
while True:
|
|
|
36 |
requests = session.query(CompetitorPricingRequest).filter(or_(CompetitorPricingRequest.isProcessed==False,CompetitorPricingRequest.isProcessed==None)).order_by(asc(CompetitorPricingRequest.requestId)).all()
|
|
|
37 |
if requests ==[] or requests is None:
|
|
|
38 |
print "No new request to process, sleeeeeeping....."
|
| 12281 |
kshitij.so |
39 |
close_session()
|
|
|
40 |
collected = gc.collect()
|
| 12292 |
kshitij.so |
41 |
print locals()
|
|
|
42 |
print globals()
|
| 12281 |
kshitij.so |
43 |
print "Garbage collector: collected %d objects." % (collected)
|
| 12256 |
kshitij.so |
44 |
time.sleep(600)
|
|
|
45 |
for request in requests:
|
|
|
46 |
fetchDetails(request)
|
|
|
47 |
request.isProcessed = True
|
|
|
48 |
session.commit()
|
| 12314 |
kshitij.so |
49 |
sendMail(request)
|
| 12256 |
kshitij.so |
50 |
close_session()
|
| 12279 |
kshitij.so |
51 |
collected = gc.collect()
|
|
|
52 |
print "Garbage collector: collected %d objects." % (collected)
|
| 12292 |
kshitij.so |
53 |
print locals()
|
|
|
54 |
print globals()
|
| 12279 |
kshitij.so |
55 |
requests = []
|
| 12256 |
kshitij.so |
56 |
except Exception as e:
|
|
|
57 |
print e
|
|
|
58 |
sys.exit(2)
|
|
|
59 |
|
|
|
60 |
def fetchDetails(request):
|
| 12297 |
kshitij.so |
61 |
import threading
|
| 12256 |
kshitij.so |
62 |
items = session.query(CompetitorPricing).filter(CompetitorPricing.competitorPricing_requestId==request.requestId).all()
|
|
|
63 |
print items
|
|
|
64 |
snapdeal, flipkart, amazon =[],[],[]
|
|
|
65 |
for item in items:
|
|
|
66 |
if item.snapdealScraping:
|
|
|
67 |
snapdeal.append(item)
|
|
|
68 |
if item.flipkartScraping:
|
|
|
69 |
flipkart.append(item)
|
|
|
70 |
if item.amazonScraping:
|
|
|
71 |
amazon.append(item)
|
|
|
72 |
threads = []
|
|
|
73 |
t1 = threading.Thread(target=scrapSnapdeal, args = (snapdeal,))
|
|
|
74 |
t1.daemon = True
|
|
|
75 |
t1.start()
|
|
|
76 |
t2 = threading.Thread(target=scrapFlipkart, args = (flipkart,))
|
|
|
77 |
t2.daemon = True
|
|
|
78 |
t2.start()
|
| 15493 |
kshitij.so |
79 |
t3 = threading.Thread(target=scrapAmazon, args = (amazon,))
|
| 12278 |
kshitij.so |
80 |
t3.daemon = True
|
|
|
81 |
t3.start()
|
| 12256 |
kshitij.so |
82 |
threads.append(t1)
|
|
|
83 |
threads.append(t2)
|
|
|
84 |
threads.append(t3)
|
|
|
85 |
for th in threads:
|
|
|
86 |
th.join()
|
| 12284 |
kshitij.so |
87 |
br,t1,t2,t3 =None,None,None,None
|
|
|
88 |
items[:],snapdeal[:],flipkart[:],amazon[:],threads[:]=[],[],[],[],[]
|
| 12256 |
kshitij.so |
89 |
|
|
|
90 |
def scrapSnapdeal(snapdealItems):
|
| 12297 |
kshitij.so |
91 |
import simplejson as json
|
|
|
92 |
import urllib2
|
|
|
93 |
from shop2020.model.v1.catalog.impl.DataService import SnapdealItem
|
| 12256 |
kshitij.so |
94 |
for snapdealItem in snapdealItems:
|
|
|
95 |
sdItem = SnapdealItem.get_by(item_id=snapdealItem.item_id)
|
|
|
96 |
if sdItem is None:
|
|
|
97 |
continue
|
|
|
98 |
try:
|
| 15825 |
kshitij.so |
99 |
url="http://www.snapdeal.com/acors/json/v2/gvbps?supc=%s&catUrl=&bn=&catId=175&start=0&count=10000"%(sdItem.supc)
|
| 12256 |
kshitij.so |
100 |
print url
|
|
|
101 |
time.sleep(1)
|
| 15498 |
kshitij.so |
102 |
req = urllib2.Request(url,headers=headers)
|
| 12256 |
kshitij.so |
103 |
response = urllib2.urlopen(req)
|
| 15826 |
kshitij.so |
104 |
vendorInfo = json.load(response)
|
| 15825 |
kshitij.so |
105 |
response.close()
|
| 15496 |
kshitij.so |
106 |
print vendorInfo
|
| 12256 |
kshitij.so |
107 |
lowestSp, iterator, ourInventory, lowestSellerInventory,ourSp,ourOfferPrice,lowestSp,lowestOfferPrice = (0,)*8
|
|
|
108 |
lowestSellerName = ''
|
| 15825 |
kshitij.so |
109 |
sortedVendorsData = sorted(vendorInfo['vendors'], key=itemgetter('sellingPrice'))
|
|
|
110 |
for vendor in sortedVendorsData:
|
| 12256 |
kshitij.so |
111 |
if iterator == 0:
|
| 15536 |
kshitij.so |
112 |
lowestSellerName = vendor['vendorDisplayName'].encode('utf-8')
|
| 12256 |
kshitij.so |
113 |
try:
|
|
|
114 |
lowestSp = vendor['sellingPriceBefIntCashBack']
|
|
|
115 |
except:
|
|
|
116 |
lowestSp = vendor['sellingPrice']
|
|
|
117 |
lowestOfferPrice = vendor['sellingPrice']
|
|
|
118 |
lowestSellerInventory = vendor['buyableInventory']
|
|
|
119 |
|
|
|
120 |
if vendor['vendorDisplayName'] == 'MobilesnMore':
|
|
|
121 |
ourInventory = vendor['buyableInventory']
|
|
|
122 |
try:
|
|
|
123 |
ourSp = vendor['sellingPriceBefIntCashBack']
|
|
|
124 |
except:
|
|
|
125 |
ourSp = vendor['sellingPrice']
|
|
|
126 |
ourOfferPrice = vendor['sellingPrice']
|
|
|
127 |
iterator+=1
|
| 15497 |
kshitij.so |
128 |
except Exception as e:
|
|
|
129 |
import traceback
|
|
|
130 |
print traceback.print_exc()
|
| 12256 |
kshitij.so |
131 |
continue
|
| 12286 |
kshitij.so |
132 |
finally:
|
| 12287 |
kshitij.so |
133 |
sdItem =None
|
| 15496 |
kshitij.so |
134 |
print ourSp
|
|
|
135 |
print ourOfferPrice
|
|
|
136 |
print ourInventory
|
| 12256 |
kshitij.so |
137 |
snapdealItem.ourSnapdealPrice = ourSp
|
|
|
138 |
snapdealItem.ourSnapdealOfferPrice = ourOfferPrice
|
|
|
139 |
snapdealItem.ourSnapdealInventory = ourInventory
|
|
|
140 |
snapdealItem.lowestSnapdealPrice = lowestSp
|
|
|
141 |
snapdealItem.lowestSnapdealOfferPrice = lowestOfferPrice
|
|
|
142 |
snapdealItem.lowestSnapdealSeller = lowestSellerName
|
| 12314 |
kshitij.so |
143 |
snapdealItem.lowestSnapdealSellerInventory = lowestSellerInventory
|
|
|
144 |
snapdealItems[:]=[]
|
| 12256 |
kshitij.so |
145 |
|
|
|
146 |
def scrapFlipkart(flipkartItems):
|
| 15520 |
kshitij.so |
147 |
from shop2020.model.v1.catalog.script import FlipkartProductPageParser
|
| 12297 |
kshitij.so |
148 |
from operator import itemgetter
|
|
|
149 |
import requests as httpRequest
|
|
|
150 |
from shop2020.model.v1.catalog.impl.DataService import FlipkartItem
|
| 15520 |
kshitij.so |
151 |
scraperFk = FlipkartProductPageParser.FlipkartProductPageScraper()
|
| 12256 |
kshitij.so |
152 |
for flipkartItem in flipkartItems:
|
|
|
153 |
fkItem = FlipkartItem.get_by(item_id=flipkartItem.item_id)
|
|
|
154 |
if fkItem is None:
|
|
|
155 |
continue
|
|
|
156 |
try:
|
| 15520 |
kshitij.so |
157 |
vendorsData = scraperFk.read(fkItem.flipkartSerialNumber.strip())
|
| 15536 |
kshitij.so |
158 |
lowestSellerName = vendorsData['cheapestSeller'].encode('utf-8')
|
| 15520 |
kshitij.so |
159 |
lowestSellerSp = vendorsData['lowestSellingPrice']
|
|
|
160 |
ourSp = vendorsData['saholicSellingPrice']
|
| 12256 |
kshitij.so |
161 |
except:
|
|
|
162 |
continue
|
| 12286 |
kshitij.so |
163 |
finally:
|
| 12287 |
kshitij.so |
164 |
fkItem=None
|
| 12256 |
kshitij.so |
165 |
try:
|
|
|
166 |
request_url = "https://api.flipkart.net/sellers/skus/%s/listings"%(str(fkItem.flipkartSerialNumber))
|
|
|
167 |
r = httpRequest.get(request_url, auth=('m2z93iskuj81qiid', '0c7ab6a5-98c0-4cdc-8be3-72c591e0add4'))
|
|
|
168 |
print "Inventory info",r.json()
|
|
|
169 |
stock_count = int((r.json()['attributeValues'])['stock_count'])
|
|
|
170 |
except:
|
|
|
171 |
stock_count = 0
|
|
|
172 |
finally:
|
|
|
173 |
r={}
|
|
|
174 |
flipkartItem.ourFlipkartPrice = ourSp
|
|
|
175 |
flipkartItem.ourFlipkartInventory = stock_count
|
|
|
176 |
flipkartItem.lowestFlipkartPrice = lowestSellerSp
|
|
|
177 |
flipkartItem.lowestFlipkartSeller = lowestSellerName
|
| 12283 |
kshitij.so |
178 |
scraperFk = None
|
| 12314 |
kshitij.so |
179 |
flipkartItems[:] =[]
|
| 12256 |
kshitij.so |
180 |
|
|
|
181 |
|
|
|
182 |
def close_session():
|
| 35718 |
amit |
183 |
session.remove()
|
| 12256 |
kshitij.so |
184 |
|
| 15492 |
kshitij.so |
185 |
def scrapAmazon(amazonItems):
|
| 15484 |
kshitij.so |
186 |
from shop2020.model.v1.catalog.script import AmazonScraper
|
|
|
187 |
from shop2020.model.v1.catalog.impl.DataService import Amazonlisted
|
| 12277 |
kshitij.so |
188 |
print "Inside amazonitems ",amazonItems
|
|
|
189 |
print "len amazon items ",len(amazonItems)
|
|
|
190 |
time.sleep(5)
|
| 15484 |
kshitij.so |
191 |
amScraper = AmazonScraper.AmazonScraper()
|
| 12256 |
kshitij.so |
192 |
for amazonItem in amazonItems:
|
| 15484 |
kshitij.so |
193 |
amazon_d_item = Amazonlisted.get_by(itemId=amazonItem.item_id)
|
| 15746 |
kshitij.so |
194 |
if amazon_d_item is None:
|
|
|
195 |
continue
|
| 15484 |
kshitij.so |
196 |
if len(amazon_d_item.asin)==0:
|
|
|
197 |
print "No asin found for ",amazonItem.item_id
|
| 12256 |
kshitij.so |
198 |
continue
|
| 15484 |
kshitij.so |
199 |
saholicUrl = "http://www.amazon.in/gp/offer-listing/%s?m=AF6E3O0VE0X4D&s=merchant-items&ie=UTF8&qid=1433584512&sr=1-1"%(amazon_d_item.asin.strip())
|
| 15500 |
kshitij.so |
200 |
generalUrl = "http://www.amazon.in/gp/offer-listing/%s"%(amazon_d_item.asin.strip())
|
| 12256 |
kshitij.so |
201 |
try:
|
| 15484 |
kshitij.so |
202 |
saholicPrice = (amScraper.read(saholicUrl, False))[0]
|
|
|
203 |
except:
|
|
|
204 |
saholicPrice = 0.0
|
|
|
205 |
try:
|
|
|
206 |
cheapestSeller = (amScraper.read(generalUrl, True))
|
|
|
207 |
cheapestSellerPrice = cheapestSeller[0]
|
| 15536 |
kshitij.so |
208 |
cheapestSellerName = cheapestSeller[1].encode('utf-8')
|
| 15484 |
kshitij.so |
209 |
except:
|
|
|
210 |
cheapestSellerPrice = 0.0
|
|
|
211 |
cheapestSellerName = ""
|
|
|
212 |
amazonItem.ourAmazonPrice = saholicPrice
|
|
|
213 |
amazonItem.lowestAmazonPrice = cheapestSellerPrice
|
|
|
214 |
amazonItem.lowestAmazonSeller = cheapestSellerName
|
| 12314 |
kshitij.so |
215 |
amazonItems[:] =[]
|
| 12256 |
kshitij.so |
216 |
|
|
|
217 |
|
| 12314 |
kshitij.so |
218 |
def sendMail(request):
|
|
|
219 |
import smtplib
|
|
|
220 |
from email.mime.text import MIMEText
|
|
|
221 |
from email.mime.multipart import MIMEMultipart
|
|
|
222 |
mailServer = smtplib.SMTP("smtp.gmail.com", 587)
|
|
|
223 |
mailServer.ehlo()
|
|
|
224 |
mailServer.starttls()
|
|
|
225 |
mailServer.ehlo()
|
|
|
226 |
recipients = []
|
|
|
227 |
recipients.append(request.user)
|
|
|
228 |
message = "Your Request has been processed.Visit dashboard to check & download report"
|
|
|
229 |
msg = MIMEMultipart()
|
| 12329 |
kshitij.so |
230 |
msg['Subject'] = "Competition Scraping.Upload Id" + ' - ' + str(request.requestId)
|
| 12314 |
kshitij.so |
231 |
msg['From'] = ""
|
|
|
232 |
msg['To'] = ",".join(recipients)
|
|
|
233 |
msg.preamble = "Competition Scraping" + ' - ' + str(request.requestId)
|
|
|
234 |
html_msg = MIMEText(message, 'html')
|
|
|
235 |
msg.attach(html_msg)
|
|
|
236 |
try:
|
|
|
237 |
mailServer.login("build@shop2020.in", "cafe@nes")
|
|
|
238 |
#mailServer.sendmail("cafe@nes", ['kshitij.sood@saholic.com'], msg.as_string())
|
|
|
239 |
mailServer.sendmail("cafe@nes", recipients, msg.as_string())
|
|
|
240 |
except Exception as e:
|
|
|
241 |
print e
|
|
|
242 |
print "Unable to send mail.Lets try with local SMTP."
|
|
|
243 |
smtpServer = smtplib.SMTP('localhost')
|
|
|
244 |
smtpServer.set_debuglevel(1)
|
|
|
245 |
sender = 'build@shop2020.in'
|
|
|
246 |
try:
|
|
|
247 |
smtpServer.sendmail(sender, recipients, msg.as_string())
|
|
|
248 |
print "Successfully sent email"
|
|
|
249 |
except:
|
|
|
250 |
print "Error: unable to send email."
|
|
|
251 |
|
| 12256 |
kshitij.so |
252 |
|
|
|
253 |
|
|
|
254 |
if __name__ == "__main__":
|
|
|
255 |
parser = optparse.OptionParser()
|
|
|
256 |
parser.add_option("-l", "--logfile", dest="logfile",
|
|
|
257 |
type="string",
|
|
|
258 |
help="Log all output to LOG_FILE",
|
|
|
259 |
)
|
|
|
260 |
parser.add_option("-i", "--pidfile", dest="pidfile",
|
|
|
261 |
type="string",
|
|
|
262 |
help="Write the PID to pidfile")
|
|
|
263 |
(options, args) = parser.parse_args()
|
|
|
264 |
daemon = CompetitorScraping(options.logfile, options.pidfile)
|
|
|
265 |
if len(args) == 0:
|
|
|
266 |
daemon.run()
|
|
|
267 |
elif len(args) == 1:
|
|
|
268 |
if 'start' == args[0]:
|
|
|
269 |
daemon.start()
|
|
|
270 |
elif 'stop' == args[0]:
|
|
|
271 |
daemon.stop()
|
|
|
272 |
elif 'restart' == args[0]:
|
|
|
273 |
daemon.restart()
|
|
|
274 |
else:
|
|
|
275 |
print "Unknown command"
|
|
|
276 |
sys.exit(2)
|
|
|
277 |
sys.exit(0)
|
|
|
278 |
else:
|
|
|
279 |
print "usage: %s start|stop|restart" % sys.argv[0]
|
|
|
280 |
sys.exit(2)
|