Rev 18284 | Rev 19647 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
import urllib2from BeautifulSoup import BeautifulSoupimport pymongoimport refrom dtr.utils.utils import to_java_date, fetchResponseUsingProxyfrom datetime import datetimeimport optparseimport smtplibfrom email.mime.text import MIMETextfrom email.mime.multipart import MIMEMultipartfrom dtr.utils.utils import fetchResponseUsingProxycon = Noneparser = optparse.OptionParser()parser.add_option("-m", "--m", dest="mongoHost",default="localhost",type="string", help="The HOST where the mongo server is running",metavar="mongo_host")(options, args) = parser.parse_args()bestSellers = []now = datetime.now()exceptionList = []class __RankInfo:def __init__(self, identifier, rank, category):self.identifier = identifierself.rank = rankself.category = categorydef get_mongo_connection(host=options.mongoHost, port=27017):global conif con is None:print "Establishing connection %s host and port %d" %(host,port)try:con = pymongo.MongoClient(host, port)except Exception, e:print ereturn Nonereturn condef getSoupObject(url):print "Getting soup object for"print urlsoup = Noneresponse_data = fetchResponseUsingProxy(url,proxy=False)print response_datatry:page=response_data.decode("utf-8")soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)except:soup = BeautifulSoup(response_data,convertEntities=BeautifulSoup.HTML_ENTITIES)return soupdef scrapeBestSellerMobiles():global bestSellersrank = 0for i in [1, 21, 41, 61, 81]:url = "http://www.flipkart.com/lc/pr/pv1/spotList1/spot1/productList?sid=tyy,4io&filterNone=true&start=%d&ajax=true" %(i)soup = getSoupObject(url)product_divs = soup.findAll('div',{'class':re.compile('.*browse-product')})for x in product_divs:rank = rank +1print rank,print '\t',print x['data-pid']r_info = __RankInfo(x['data-pid'].strip(),rank, None)bestSellers.append(r_info)def scrapeBestSellerTablets():global bestSellersbestSellers = []rank = 0for i in [1, 21, 41, 61, 81]:url = "http://www.flipkart.com/lc/pr/pv1/spotList1/spot1/productList?sid=tyy,hry&filterNone=true&start=%d&ajax=true" %(i)soup = getSoupObject(url)product_divs = soup.findAll('div',{'class':re.compile('.*browse-product')})for x in product_divs:rank = rank +1print rank,print '\t',print x['data-pid']r_info = __RankInfo(x['data-pid'].strip(),rank, None)bestSellers.append(r_info)def commitBestSellers(category):global exceptionListfor x in bestSellers:print x.rank,print '\t',print x.identifier,col = get_mongo_connection().Catalog.MasterData.find({'identifier':x.identifier.strip()})print "count sku",print '\t',if len(list(col)) == 0:x.category = categoryexceptionList.append(x)else:get_mongo_connection().Catalog.MasterData.update({'identifier':x.identifier.strip()}, {'$set' : {'rank':x.rank,'updatedOn':to_java_date(now)}}, multi=True)def resetRanks(category):oldRankedItems = get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0},'source_id':2,'category':category})for item in oldRankedItems:get_mongo_connection().Catalog.MasterData.update({'_id':item['_id']}, {'$set' : {'rank':0,'updatedOn':to_java_date(now)}}, multi=True)def sendEmail():message="""<html><body><h3>Flipkart Best Sellers not in master</h3><table border="1" style="width:100%;"><thead><tr><th>Identifier</th><th>Category</th><th>Rank</th></tr></thead><tbody>"""for item in exceptionList:message+="""<tr><td style="text-align:center">"""+(item.identifier)+"""</td><td style="text-align:center">"""+(item.category)+"""</td><td style="text-align:center">"""+str(item.rank)+"""</td></tr>"""message+="""</tbody></table></body></html>"""print messagerecipients = ['kshitij.sood@saholic.com']#recipients = ['rajneesh.arora@saholic.com','kshitij.sood@saholic.com','chaitnaya.vats@saholic.com','manoj.kumar@saholic.com']msg = MIMEMultipart()msg['Subject'] = "Flipkart Best Sellers" + ' - ' + str(datetime.now())msg['From'] = ""msg['To'] = ",".join(recipients)msg.preamble = "Flipkart Best Sellers" + ' - ' + str(datetime.now())html_msg = MIMEText(message, 'html')msg.attach(html_msg)smtpServer = smtplib.SMTP('localhost')smtpServer.set_debuglevel(1)sender = 'dtr@shop2020.in'try:smtpServer.sendmail(sender, recipients, msg.as_string())print "Successfully sent email"except:print "Error: unable to send email."def main():scrapeBestSellerMobiles()if len(bestSellers) > 0:resetRanks('Mobiles')commitBestSellers('MOBILES')scrapeBestSellerTablets()if len(bestSellers) > 0:resetRanks('Tablets')commitBestSellers('TABLETS')sendEmail()if __name__=='__main__':main()