Subversion Repositories SmartDukaan

Rev

Rev 6022 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

from BeautifulSoup import BeautifulSoup
from BaseScraper import BaseScraper
from Utils import removePriceFormatting

import time

class TradusScraper(BaseScraper):

    mobilePageCount = 32
    tabletPageCount = 21
    productCountPerScraping = 20
    mobileCurrentPage = 0
    tabletCurrentPage = 0
    

    def __init__(self):
        BaseScraper.__init__(self)
        self.url = None
        self.id = None
    
    def setUrl(self, url):
        self.url = url
    
    def scrape(self):
        html = BaseScraper.read(self, self.url)
        self.soup = BeautifulSoup(html)
        self.phones = None
        #self.setPageCount()
    
    def getPhones(self):
        phones = []
        for div in self.soup.findAll('div', {'class': 'prod_main_div'}):
            try:
                productUrlContainer = div.find('div', {'class': 'product_name search-product-block'})
                name = productUrlContainer.contents[1].string
                product_url = "http://www.tradus.com" +productUrlContainer.contents[1]['href']
                price = div.find('span', {'class':'numDiv_left'}).string.strip()
                price = removePriceFormatting(price)
                in_stock = 1
                
                try:
                    if price is None:
                        continue
                    else:
                        phones.append({
                                'name': str(name), 
                                'price': removePriceFormatting(price),
                                'source': 'tradus', 
                                'product_url': str(product_url), 
                                'in_stock': in_stock
                            })
                except Exception as e:
                    print e
                    pass
                
            except IndexError as iex:
                try:
                    price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0].contents[0].strip()[18:]
                    in_stock = 1
                    
                    phones.append({
                                'name': str(name), 
                                'price': removePriceFormatting(price),
                                'source': 'tradus', 
                                'product_url': str(product_url), 
                                'in_stock': in_stock
                            })
                    
                except Exception as ex:
                    print ex
                    pass
            except Exception as e:
                print e
                pass
        self.phones = phones
        return phones
    
    def setPageCount(self):
        self.currentPage = self.currentPage + 1
    
    def getNextUrl(self):
        time.sleep(1)
        if "mobile" in self.url:
            if TradusScraper.mobileCurrentPage < TradusScraper.mobilePageCount:
                TradusScraper.mobileCurrentPage += 1
                return 'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756&page=%s' % (TradusScraper.mobileCurrentPage)
            else:
                return None
        elif "tablets" in self.url:
            if TradusScraper.tabletCurrentPage < TradusScraper.tabletPageCount:
                TradusScraper.tabletCurrentPage += 1
                return 'http://www.tradus.com/search/tradus_search/?query=tablets&filters=cat:7756&cat:7762&page=%s' % (TradusScraper.tabletCurrentPage)
            else:
                return None
        else:
            return None
            

    def getDataFromProductPage(self, url):
        html = BaseScraper.read(self, url)
        soup = BeautifulSoup(html)
        name = soup.find('span',{'itemprop':'name'}).string.strip()
        price= soup.find('span',{'class':'mrp3'}).contents[0].strip()
        price = removePriceFormatting(price)
        in_stock = 1
        
        data = {
            "product_url": str(url), 
            "source": "tradus", 
            "price": price, 
            "in_stock": 1, 
            "name": name
        }
        return data

def removePriceFormatting(price_string):
    return price_string.strip().replace('Rs.', '').replace('Rs', '').replace(',', '').replace(' ', '').replace('&nbsp;', '').split('.')[0]

if __name__ == '__main__':
    s = TradusScraper()
    data = s.getDataFromProductPage('http://www.tradus.com/zing-q800-dual-sim-mobile-phone/p/MOB0000004506663')
    print data