Rev 22737 | Blame | Compare with Previous | Last modification | View Log | RSS feed
from dtr.utils.utils import to_java_date, fetchResponseUsingProxy, get_mongo_connection, CATEGORY_MAPfrom datetime import datetimeimport optparseimport smtplibfrom email.mime.text import MIMETextfrom email.mime.multipart import MIMEMultipartimport jsonimport chardetfrom shop2020.utils.EmailAttachmentSender import get_attachment_partfrom shop2020.utils import EmailAttachmentSenderimport reheaders = {'Browser-Name': 'Chrome','User-Agent': 'Mozilla/5.0 (Linux; Android 5.1.1; A0001 Build/LMY48B; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/43.0.2357.121 Mobile Safari/537.36 FKUA/Retail/550900/Android/Mobile (OnePlus/A0001)','Host': 'mobileapi.flipkart.net'}con = Noneparser = optparse.OptionParser()parser.add_option("-m", "--m", dest="mongoHost",default="localhost",type="string", help="The HOST where the mongo server is running",metavar="mongo_host")(options, args) = parser.parse_args()bestSellers = []now = datetime.now()exceptionList = []xstr = lambda s: s or ""class __RankInfo:def __init__(self, identifier, rank, category, title, url):self.identifier = identifierself.rank = rankself.category = categoryself.title = titleself.url = urldef scrapeBestSellerMobiles():global bestSellersrank = 1for i in range(0,100,10):url = "http://mobileapi.flipkart.net/3/discover/getSearch?store=tyy/4io&start=%d&count=10"%(i)print urlresponse_data = fetchResponseUsingProxy(url, headers, livePricing=None, proxy=False, flipkart=False)input_json = json.loads(response_data)for identifier, data in (input_json['RESPONSE']['product']).iteritems():t_title = data['value']['titles']['title']t_subtitle = data['value']['titles']['subtitle']title = (xstr(t_title) + " " + xstr(t_subtitle)).strip()print titleurl = (data['value']['smartUrl']).replace("http://dl.flipkart.com/dl", "http://www.flipkart.com")r_info = __RankInfo(identifier, rank, 3, title, url)bestSellers.append(r_info)rank = rank + 1def scrapeBestSellerTablets():global bestSellersrank = 1for i in range(0,100,10):url = "http://mobileapi.flipkart.net/3/discover/getSearch?store=tyy/hry&start=%d&count=10"%(i)print urlresponse_data = fetchResponseUsingProxy(url, headers, livePricing=None, proxy=False, flipkart=False)input_json = json.loads(response_data)for identifier, data in (input_json['RESPONSE']['product']).iteritems():t_title = data['value']['titles']['title']t_subtitle = data['value']['titles']['subtitle']title = (xstr(t_title) + " " + xstr(t_subtitle)).strip()print titleurl = (data['value']['smartUrl']).replace("http://dl.flipkart.com/dl", "http://www.flipkart.com")print urlprint identifierprint "=================="r_info = __RankInfo(identifier, rank, 5, title, url)bestSellers.append(r_info)rank = rank + 1def commitBestSellers():global exceptionListfor x in bestSellers:col = get_mongo_connection(host=options.mongoHost).Catalog.MasterData.find({'identifier':x.identifier.strip()})if len(list(col)) == 0:exceptionList.append(x)else:get_mongo_connection(host=options.mongoHost).Catalog.MasterData.update({'identifier':x.identifier.strip()}, {'$set' : {'rank':x.rank,'updatedOn':to_java_date(now)}}, multi=True)def resetRanks(category):get_mongo_connection(host=options.mongoHost).Catalog.MasterData.update({'category_id':category}, {'$set' : {'rank':0,'updatedOn':to_java_date(now)}}, multi=True)def sendEmail():message="""<html><body><h3>Flipkart Best Sellers not in master</h3><table border="1" style="width:100%;"><thead><tr><th>Identifier</th><th>Title</th><th>Category</th><th>Rank</th><th>URL</th></tr></thead><tbody>"""for item in exceptionList:message+="""<tr><td style="text-align:center">"""+(item.identifier)+"""</td><td style="text-align:center">"""+(item.title)+"""</td><td style="text-align:center">"""+(CATEGORY_MAP.get(item.category))+"""</td><td style="text-align:center">"""+str(item.rank)+"""</td><td style="text-align:center">"""+(item.url)+"""</td></tr>"""message+="""</tbody></table></body></html>"""message = sanitizeUnicode(message)print message#recipients = ['kshitij.sood@saholic.com']recipients = ['ritesh.chauhan@saholic.com']EmailAttachmentSender.mail_send_grid("dtr@smartdukaan.com","apikey", "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", recipients, "Flipkart Best Sellers",message ,[],[],[])def sanitizeUnicode(unicodeText):#remove unicode charactersunicodeText = re.sub(r'[^\x00-\x7F]+','', unicodeText)#remove whitespaces and stripunicodeText = re.sub(r'[^\S]+',' ', unicodeText)return unicodeText.strip().encode('utf-8', 'ignore')def main():scrapeBestSellerMobiles()resetRanks(3)scrapeBestSellerTablets()resetRanks(5)commitBestSellers()sendEmail()if __name__=='__main__':main()