Subversion Repositories SmartDukaan

Rev

Rev 19572 | Blame | Compare with Previous | Last modification | View Log | RSS feed

from elasticsearch import Elasticsearch
from dtr.utils.utils import get_mongo_connection
import optparse
from pymongo import DESCENDING

parser = optparse.OptionParser()
parser.add_option("-m", "--m", dest="mongoHost",
                      default="localhost",
                      type="string", help="The HOST where the mongo server is running",
                      metavar="HOST")
parser.add_option("-e", "--e", dest="elastic_search_host",
                      default="localhost",
                      type="string", help="The HOST where the elastic server is running",
                      metavar="HOST")
parser.add_option("-p", "--p", dest="elastic_search_port",
                      default="9200",
                      type="string", help="The PORT where the elastic server is running",
                      metavar="HOST")


(options, args) = parser.parse_args()


subCategoryList = [19, 20, 27, 29, 33, 28]
es = Elasticsearch([{'host': options.elastic_search_host, 'port': options.elastic_search_port}])
xstr = lambda s: s or ""

class __SkuInfo:
    
    def __init__(self, id, title, category_id, subCategoryId, subCategory, dealRankPoints):
        self.id = id
        self.title = title
        self.category_id = category_id
        self.subCategoryId = subCategoryId
        self.subCategory = subCategory
        self.dealRankPoints = dealRankPoints

def main():
    added = []
    items = list(get_mongo_connection(host=options.mongoHost).Catalog.MasterData.find({'subCategoryId':{"$in":subCategoryList}}))
    for item in items:
        if item['skuBundleId'] in added:
            continue
        deal_obj = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':item['skuBundleId'],'dealRankPoints':{"$gt":0}}).sort([('internalRank', DESCENDING)]))
        if len(deal_obj) == 0:
            dealRankPoints = 0
        else:
            dealRankPoints = deal_obj[0]['dealRankPoints']
        title = xstr(item['brand'])+" "+xstr(item['model_name'])
        s_info = __SkuInfo(int(item['skuBundleId']),title,int(item['category_id']),int(item['subCategoryId']),item['subCategory'], dealRankPoints)
        es.index(index='my_index', doc_type='my_type', id=s_info.id,body=s_info.__dict__)
        added.append(int(item['skuBundleId']))

def validateListings():
    offset , limit = 0, 100
    
    body = {
            "query" : {
        "match_all" : {}
        }
            }
    toDelete = []
    while(True):
        result = es.search("my_index", "my_type", body,from_=offset,size=limit)
        print result
        if len(result['hits']['hits']) > 0:
            for x in result['hits']['hits']:
                skuBundleId =x['_source']['id']
                subCategoryId = x['_source']['subCategoryId']
                category_id = x['_source']['category_id']
                exist = list(get_mongo_connection(host=options.mongoHost).Catalog.MasterData.find({'skuBundleId':skuBundleId}))
                if len(exist) ==0:
                    toDelete.append(skuBundleId)
                else:
                    for item in exist:
                        if item['subCategoryId']!=subCategoryId or item['category_id'] !=category_id:
                            if skuBundleId not in toDelete:
                                print "Deleting item ",skuBundleId
                                toDelete.append(skuBundleId)
            offset = offset+limit
        else:
            break
    
    for id in toDelete:
        print "Deleting id ",id
        es.delete(index='my_index', doc_type='my_type', id=id)

        
if __name__ == '__main__':
    main()
    validateListings()