Rev 15887 | Blame | Compare with Previous | Last modification | View Log | RSS feed
import pymongofrom dtr.utils.utils import to_java_dateimport optparsefrom datetime import datetimefrom dtr.utils import ShopCluesScraperimport tracebackcon = Noneparser = optparse.OptionParser()parser.add_option("-m", "--m", dest="mongoHost",default="localhost",type="string", help="The HOST where the mongo server is running",metavar="mongo_host")(options, args) = parser.parse_args()exceptionList = []now = datetime.now()sc = ShopCluesScraper.ShopCluesScraper()def get_mongo_connection(host=options.mongoHost, port=27017):global conif con is None:print "Establishing connection %s host and port %d" %(host,port)try:con = pymongo.MongoClient(host, port)except Exception, e:print ereturn Nonereturn condef getAllProductsToSync():global exceptionListall_products = get_mongo_connection().Catalog.MasterData.find({'source':"shopclues.com"})for product in all_products:print product['_id']try:result = sc.read(product['url'])print resultexcept:traceback.print_exc()exceptionList.append(product['_id'])continueget_mongo_connection().Catalog.MasterData.update({'_id':product['_id']},{'$set':{'codAvailable':result['isCod'], 'coupon':result['coupon'], 'in_stock':result['inStock'], 'available_price':result['price'], \'identifier':result['scin']}}, multi=False, upsert=False)def correctUrls():all_products = get_mongo_connection().Catalog.MasterData.find({'source':"shopclues.com"})for product in all_products:print product['url']url = product['url'][0:product['url'].index('?utm_source')]print "======================="get_mongo_connection().Catalog.MasterData.update({'_id':product['_id']},{'$set':{'marketPlaceUrl':url}})def correctScin():all_products = get_mongo_connection().Catalog.MasterData.find({'source':"shopclues.com"})for product in all_products:if len(product['identifier'])!=0:continueprint product['_id']try:result = sc.read(product['marketPlaceUrl'])except:continueprint resultget_mongo_connection().Catalog.MasterData.update({'_id':product['_id']},{'$set':{'identifier':result}})def main():getAllProductsToSync()#print "Exception list"#for e in exceptionList:# print ecorrectUrls()#correctScin()if __name__=='__main__':main()