Subversion Repositories SmartDukaan

Rev

Rev 5761 | Rev 6022 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

from BeautifulSoup import BeautifulSoup
from BaseScraper import BaseScraper
from Utils import removePriceFormatting

import time

class TradusScraper(BaseScraper):

    mobilePageCount = 32
    tabletPageCount = 21
    productCountPerScraping = 20
    mobileCurrentPage = 0
    tabletCurrentPage = 0
    

    def __init__(self):
        BaseScraper.__init__(self)
        self.url = None
        self.id = None
    
    def setUrl(self, url):
        self.url = url
    
    def scrape(self):
        html = BaseScraper.read(self, self.url)
        self.soup = BeautifulSoup(html)
        self.phones = None
        #self.setPageCount()
    
    def getPhones(self):
        phones = []
        for div in self.soup.findAll('div', {'class': 'mainresult-show-right'}):
            try:
                anchor = div.find('a')
                if(len(anchor.contents)==1):
                    name = anchor.contents[0].strip()
                elif(anchor.contents[1].string =="Tablet"):
                    name = anchor.contents[0] + "Tablet"
                    if(len(anchor.contents)>2):
                        name = name + anchor.contents[2];
                else:
                    name = anchor.contents[2].strip()
                    if(len(name)== 0):
                        name = anchor.contents[0].strip()
                product_url = anchor['href'].strip()
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
                in_stock = 1
                
                try:
                    if price is None:
                        continue
                    else:
                        phones.append({
                                'name': str(name), 
                                'price': removePriceFormatting(price),
                                'source': 'tradus', 
                                'product_url': str(product_url), 
                                'in_stock': in_stock
                            })
                except Exception as e:
                    print e
                    pass
                
            except IndexError as iex:
                try:
                    price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0].contents[0].strip()[18:]
                    in_stock = 1
                    
                    phones.append({
                                'name': str(name), 
                                'price': removePriceFormatting(price),
                                'source': 'tradus', 
                                'product_url': str(product_url), 
                                'in_stock': in_stock
                            })
                    
                except Exception as ex:
                    print ex
                    pass
            except Exception as e:
                print e
                pass
        self.phones = phones
        return phones
    
    def setPageCount(self):
        self.currentPage = self.currentPage + 1
    
    def getNextUrl(self):
        time.sleep(1)
        if "mobile" in self.url:
            if TradusScraper.mobileCurrentPage < TradusScraper.mobilePageCount:
                TradusScraper.mobileCurrentPage += 1
                return 'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756&page=%s' % (TradusScraper.mobileCurrentPage)
            else:
                return None
        elif "tablets" in self.url:
            if TradusScraper.tabletCurrentPage < TradusScraper.tabletPageCount:
                TradusScraper.tabletCurrentPage += 1
                return 'http://www.tradus.com/search/tradus_search/?query=tablets&filters=cat:7756&cat:7762&page=%s' % (TradusScraper.tabletCurrentPage)
            else:
                return None
        else:
            return None
            

    def getDataFromProductPage(self, url):
        html = BaseScraper.read(self, url)
        soup = BeautifulSoup(html)
        name = soup.find('h1',{'class': 'left-content-product-heading'}).string.strip()
        price = soup.find('b', {'id': 'tPrice'}).string.strip()
        if("Rs." in price):
            price = price[4:]
        in_stock = 1
        
        data = {
            "product_url": str(url), 
            "source": "tradus", 
            "price": price, 
            "in_stock": 1, 
            "name": name
        }
        return data

if __name__ == '__main__':
    s = TradusScraper()
    '''html = BaseScraper.read(s,'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756')
    soup = BeautifulSoup(html)
    
    phones = []
    for div in soup.findAll('div', {'class': 'mainresult-show-right'}):
        try:
            anchor = div.find('a')
            name = anchor.contents[2].strip()
            if(len(name)== 0):
                name = anchor.contents[0].strip()
            product_url = anchor['href'].strip()
            price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
            in_stock = 1
            
            try:
                if price is None:
                    continue
                else:
                    phones.append({
                            'name': str(name), 
                            'price': removePriceFormatting(price),
                            'source': 'tradus', 
                            'product_url': str(product_url), 
                            'in_stock': in_stock
                        })
            except Exception as e:
                print e
                pass
            
        except IndexError as iex:
            try:
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0].contents[0].strip()[18:]
                in_stock = 1
                
                phones.append({
                            'name': str(name), 
                            'price': removePriceFormatting(price),
                            'source': 'tradus', 
                            'product_url': str(product_url), 
                            'in_stock': in_stock
                        })
                
            except Exception as ex:
                print ex
                pass
        except Exception as e:
            print e
            pass
    print phones'''
    
    
    data = s.getDataFromProductPage('http://www.tradus.com/samsung-galaxy-y-pro-duos-b5512-mobile-phone/p/MOB0000004549294')
    print data