Rev 6024 | Blame | Compare with Previous | Last modification | View Log | RSS feed
from BeautifulSoup import BeautifulSoupfrom BaseScraper import BaseScraperfrom Utils import removePriceFormattingimport timeclass TradusScraper(BaseScraper):mobilePageCount = 32tabletPageCount = 21productCountPerScraping = 20mobileCurrentPage = 0tabletCurrentPage = 0def __init__(self):BaseScraper.__init__(self)self.url = Noneself.id = Nonedef setUrl(self, url):self.url = urldef scrape(self):html = BaseScraper.read(self, self.url)self.soup = BeautifulSoup(html)self.phones = None#self.setPageCount()def getPhones(self):phones = []for div in self.soup.findAll('div', {'class': 'prod_main_div'}):try:productUrlContainer = div.find('div', {'class': 'product_name search-product-block'})name = productUrlContainer.contents[1].stringproduct_url = "http://www.tradus.com" +productUrlContainer.contents[1]['href']price = div.find('span', {'class':'numDiv_left'}).string.strip()price = removePriceFormatting(price)in_stock = 1try:if price is None:continueelse:phones.append({'name': str(name),'price': removePriceFormatting(price),'source': 'tradus','product_url': str(product_url),'in_stock': in_stock})except Exception as e:print epassexcept IndexError as iex:try:price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0].contents[0].strip()[18:]in_stock = 1phones.append({'name': str(name),'price': removePriceFormatting(price),'source': 'tradus','product_url': str(product_url),'in_stock': in_stock})except Exception as ex:print expassexcept Exception as e:print epassself.phones = phonesreturn phonesdef setPageCount(self):self.currentPage = self.currentPage + 1def getNextUrl(self):time.sleep(1)nextLink = self.soup.find('a', {'class': 'filter-link more'})if(nextLink is not None):urlDivider = self.url.find('page=')if urlDivider is not -1:urlDivider +=5pageNumber = int(self.url[urlDivider:]) +1return self.url[:urlDivider] + str(pageNumber)else:return Noneelse:return Nonedef getDataFromProductPage(self, url):html = BaseScraper.read(self, url)soup = BeautifulSoup(html)name = soup.find('span',{'itemprop':'name'}).string.strip()price= soup.find('span',{'class':'mrp3'}).contents[0].strip()price = removePriceFormatting(price)in_stock = 1data = {"product_url": str(url),"source": "tradus","price": price,"in_stock": 1,"name": name}return datadef removePriceFormatting(price_string):return price_string.strip().replace('Rs.', '').replace('Rs', '').replace(',', '').replace(' ', '').replace(' ', '').split('.')[0]if __name__ == '__main__':s = TradusScraper()data = s.getDataFromProductPage('http://www.tradus.com/zing-q800-dual-sim-mobile-phone/p/MOB0000004506663')print data