Subversion Repositories SmartDukaan

Rev

Rev 5761 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

from BeautifulSoup import BeautifulSoup
from BaseScraper import BaseScraper
from Utils import removePriceFormatting

import time

class TradusScraper(BaseScraper):

    pageCount = 67
    productCountPerScraping = 20
    currentPage = 0
    

    def __init__(self):
        BaseScraper.__init__(self)
        self.url = None
        self.id = None
    
    def setUrl(self, url):
        self.url = url
    
    def scrape(self):
        html = BaseScraper.read(self, self.url)
        self.soup = BeautifulSoup(html)
        self.phones = None
        #self.setPageCount()
    
    def getPhones(self):
        phones = []
        for div in self.soup.findAll('div', {'class': 'mainresult-show-right'}):
            try:
                anchor = div.find('a')
                name = anchor.contents[2].strip()
                if(len(name)== 0):
                    name = anchor.contents[0].strip()
                product_url = anchor['href'].strip()
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
                in_stock = 1
                
                try:
                    if price is None:
                        continue
                    else:
                        phones.append({
                                'name': str(name), 
                                'price': removePriceFormatting(price),
                                'source': 'tradus', 
                                'product_url': str(product_url), 
                                'in_stock': in_stock
                            })
                except Exception as e:
                    print e
                    pass
                
            except IndexError as iex:
                try:
                    price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0].contents[0].strip()[18:]
                    in_stock = 1
                    
                    phones.append({
                                'name': str(name), 
                                'price': removePriceFormatting(price),
                                'source': 'tradus', 
                                'product_url': str(product_url), 
                                'in_stock': in_stock
                            })
                    
                except Exception as ex:
                    print ex
                    pass
            except Exception as e:
                print e
                pass
            self.phones = phones
            return phones
    
    def setPageCount(self):
        self.currentPage = self.currentPage + 1
    
    def getNextUrl(self):
        time.sleep(1)
        if TradusScraper.currentPage < TradusScraper.pageCount:
            TradusScraper.currentPage += 1
            return 'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756&page=%s' % (TradusScraper.currentPage)
        else:
            return None

    def getDataFromProductPage(self, url):
        html = BaseScraper.read(self, url)
        soup = BeautifulSoup(html)
        name = soup.find('h1',{'class': 'left-content-product-heading'}).string.strip()
        price = soup.find('b', {'id': 'tPrice'}).string.strip()
        in_stock = 1
        
        data = {
            "product_url": str(url), 
            "source": "tradus", 
            "price": price, 
            "in_stock": 1, 
            "name": name
        }
        return data

if __name__ == '__main__':
    s = TradusScraper()
    '''html = BaseScraper.read(s,'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756')
    soup = BeautifulSoup(html)
    
    phones = []
    for div in soup.findAll('div', {'class': 'mainresult-show-right'}):
        try:
            anchor = div.find('a')
            name = anchor.contents[2].strip()
            if(len(name)== 0):
                name = anchor.contents[0].strip()
            product_url = anchor['href'].strip()
            price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
            in_stock = 1
            
            try:
                if price is None:
                    continue
                else:
                    phones.append({
                            'name': str(name), 
                            'price': removePriceFormatting(price),
                            'source': 'tradus', 
                            'product_url': str(product_url), 
                            'in_stock': in_stock
                        })
            except Exception as e:
                print e
                pass
            
        except IndexError as iex:
            try:
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0].contents[0].strip()[18:]
                in_stock = 1
                
                phones.append({
                            'name': str(name), 
                            'price': removePriceFormatting(price),
                            'source': 'tradus', 
                            'product_url': str(product_url), 
                            'in_stock': in_stock
                        })
                
            except Exception as ex:
                print ex
                pass
        except Exception as e:
            print e
            pass
    print phones'''
    
    
    data = s.getDataFromProductPage('http://www.tradus.com/samsung-galaxy-y-pro-duos-b5512-mobile-phone/p/MOB0000004549294')
    print data