Rev 12259 | Blame | Compare with Previous | Last modification | View Log | RSS feed
from elixir import *from shop2020.config.client.ConfigClient import ConfigClientfrom shop2020.clients.CatalogClient import CatalogClientfrom shop2020.model.v1.catalog.impl import DataServicefrom shop2020.model.v1.catalog.impl.DataService import PrivateDealsPriceComparison, PrivateDeals, Amazonlisted, \SnapdealItem, FlipkartItemfrom shop2020.model.v1.catalog.script import FlipkartScraper, AmazonScraperfrom operator import itemgetterimport urllib2import timeimport simplejson as jsonfrom datetime import datetimeconfig_client = ConfigClient()host = config_client.get_property('staging_hostname')DataService.initialize(db_hostname=host)scrapedInfo = []scraperFk = FlipkartScraper.FlipkartScraper()scraperAmazon = AmazonScraper.AmazonScraper()class __ScrapingInfo:def __init__(self,itemId, dealPrice, saholicPrice, sdPrice, fkPrice, amazonPrice, supc, fsn, asin):self.itemId = itemIdself.dealPrice = dealPriceself.saholicPrice = saholicPriceself.sdPrice = sdPriceself.fkPrice = fkPriceself.amazonPrice = amazonPriceself.supc = supcself.fsn = fsnself.asin = asindef getAllActivePDFromMaster():catalog_client = CatalogClient().get_client()allActivePrivateDeals = catalog_client.getAllActivePrivateDeals(None, 0)return allActivePrivateDealsdef scrapeSnapdeal():for data in scrapedInfo:if data.supc is None or len(data.supc)==0:continuetry:url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=175&sort=sellingPrice"%(data.supc)print urltime.sleep(1)req = urllib2.Request(url)response = urllib2.urlopen(req)json_input = response.read()vendorInfo = json.loads(json_input)lowestOfferPrice, iterator = (0,)*2for vendor in vendorInfo:if iterator == 0:lowestOfferPrice = vendor['sellingPrice']breakdata.sdPrice = lowestOfferPriceexcept:continuedef scrapeFlipkart():for data in scrapedInfo:if data.fsn is None or len(data.fsn)==0:continuetry:url = "http://www.flipkart.com/ps/%s"%(data.fsn)vendorsData = scraperFk.read(url)sortedVendorsData = []sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))lowestSp, iterator = (0,)*2for vData in sortedVendorsData:if iterator == 0:lowestSp = vData['sellingPrice']data.fkPrice = lowestSpbreakexcept:continuedef scrapeAmazon():for data in scrapedInfo:if data.asin is None or len(data.asin)==0:continuetry:url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data.asin)scraperAmazon.read(url,False)lowestSp,store = scraperAmazon.createData()data.amazonPrice = lowestSpexcept:continuedef populateScrapingInfo():for data in scrapedInfo:amazon = Amazonlisted.get_by(itemId=data.itemId)snapdeal = SnapdealItem.get_by(item_id=data.itemId)flipkart = FlipkartItem.get_by(item_id=data.itemId)if amazon is not None:data.asin = amazon.asinif snapdeal is not None:data.supc = snapdeal.supcif flipkart is not None:data.fsn = flipkart.flipkartSerialNumberdef populateOurPrices():catalog_client = CatalogClient().get_client()for data in scrapedInfo:cat_item = catalog_client.getItem(data.itemId)data.saholicPrice = cat_item.sellingPricedef commitData():PrivateDealsPriceComparison.query.delete()session.commit()for data in scrapedInfo:pdComp = PrivateDealsPriceComparison()pdComp.item_id = data.itemIdpdComp.dealPrice = data.dealPricepdComp.saholicPrice = data.saholicPricepdComp.sdPrice = data.sdPricepdComp.fkPrice = data.fkPricepdComp.amazonPrice = data.amazonPricepdComp.asin = data.asinpdComp.fsn = data.fsnpdComp.supc = data.supcpdComp.lastProcessedTimestamp = datetime.now()session.commit()def main():privateDeals = getAllActivePDFromMaster()global scrapedInfofor itemId in privateDeals.iterkeys():temp = __ScrapingInfo(itemId,privateDeals.get(itemId).dealPrice,None,None,None,None,None,None,None )scrapedInfo.append(temp)privateDeals = {}populateScrapingInfo()scrapeSnapdeal()scrapeFlipkart()scrapeAmazon()populateOurPrices()commitData()if __name__=='__main__':main()