Subversion Repositories SmartDukaan

Rev

Rev 13828 | Rev 14257 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 13828 Rev 13913
Line 104... Line 104...
104
        below_soup = getSoupObject(belowFoldUrl)
104
        below_soup = getSoupObject(belowFoldUrl)
105
        for x in above_soup.findAll('div',{'class':'zg_itemImmersion'}):
105
        for x in above_soup.findAll('div',{'class':'zg_itemImmersion'}):
106
            am_url =  x.find('div',{'class':'zg_title'}).find('a')['href']
106
            am_url =  x.find('div',{'class':'zg_title'}).find('a')['href']
107
            identifier =  (re.search(asin_regex, am_url)).group(1)
107
            identifier =  (re.search(asin_regex, am_url)).group(1)
108
            rank = rank + 1
108
            rank = rank + 1
-
 
109
            print identifier,
-
 
110
            print '\t',
-
 
111
            print rank
109
            r_info = __RankInfo(identifier,rank)
112
            r_info = __RankInfo(identifier,rank)
110
            bestSellers.append(r_info)
113
            bestSellers.append(r_info)
111
        for x in below_soup.findAll('div',{'class':'zg_itemImmersion'}):
114
        for x in below_soup.findAll('div',{'class':'zg_itemImmersion'}):
112
            am_url =  x.find('div',{'class':'zg_title'}).find('a')['href']
115
            am_url =  x.find('div',{'class':'zg_title'}).find('a')['href']
113
            identifier =  (re.search(asin_regex, am_url)).group(1)
116
            identifier =  (re.search(asin_regex, am_url)).group(1)
114
            rank = rank + 1
117
            rank = rank + 1
-
 
118
            print identifier,
-
 
119
            print '\t',
-
 
120
            print rank
115
            r_info = __RankInfo(identifier,rank)
121
            r_info = __RankInfo(identifier,rank)
116
            bestSellers.append(r_info)
122
            bestSellers.append(r_info)
117
 
123
 
118
def resetRanks(category):
124
def resetRanks(category_id):
119
    oldRankedItems = get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0},'source_id':1,'category':category})
125
    oldRankedItems = get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0},'source_id':1,'category_id':category_id})
120
    for item in oldRankedItems:
126
    for item in oldRankedItems:
121
        get_mongo_connection().Catalog.MasterData.update({'_id':item['_id']}, {'$set' : {'rank':0,'updatedOn':to_java_date(now)}}, multi=True)
127
        get_mongo_connection().Catalog.MasterData.update({'_id':item['_id']}, {'$set' : {'rank':0,'updatedOn':to_java_date(now)}}, multi=True)
122
            
128
            
123
def main():
129
def main():
124
    scrapeBestSellerMobiles()
130
    scrapeBestSellerMobiles()
125
    if len(bestSellers) > 0:
131
    if len(bestSellers) > 0:
126
        resetRanks('Mobiles')
132
        resetRanks(3)
127
        commitBestSellers()
133
        commitBestSellers()
128
    scrapeBestSellerTablets()
134
    scrapeBestSellerTablets()
129
    if len(bestSellers) > 0:
135
    if len(bestSellers) > 0:
130
        resetRanks('Tablets')
136
        resetRanks(5)
131
        commitBestSellers()
137
        commitBestSellers()
132
        
138
        
133
if __name__=='__main__':
139
if __name__=='__main__':
134
    main()
140
    main()
135
141