Subversion Repositories SmartDukaan

Rev

Rev 4039 | Rev 4199 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 24-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
6
from BeautifulSoup import BeautifulSoup
4039 varun.gupt 7
from BaseScraper import BaseScraper
4198 varun.gupt 8
from Utils import removePriceFormatting
3232 varun.gupt 9
 
4039 varun.gupt 10
class InfibeamScraper(BaseScraper):
3232 varun.gupt 11
 
12
    def __init__(self):
13
        self.url = None
14
        self.id = None
15
 
16
    def setUrl(self, url):
17
        self.url = url
18
 
19
    def scrape(self):
4039 varun.gupt 20
        html = BaseScraper.read(self, self.url)
3232 varun.gupt 21
        self.soup = BeautifulSoup(html)
22
 
4039 varun.gupt 23
    def getPhones(self):
3232 varun.gupt 24
        phone_prices = []
4039 varun.gupt 25
        for li in self.soup.findAll('ul', {'class': 'srch_result portrait'})[0]('li'):
26
 
27
            name = li.findAll('span', {'class': 'title'})[0].string
28
            try:
29
                price = li.findAll('div', {'class': 'price'})[0].findAll('span', {'class': 'normal'})[0].string
30
            except IndexError:
4198 varun.gupt 31
                price = removePriceFormatting(li.findAll('span', {'class': 'price'})[0].contents[-1].strip())
4039 varun.gupt 32
 
33
            url = li.findAll('a')[0]['href']
34
 
35
            try:
4198 varun.gupt 36
                phone_prices.append({
37
                        'name': str(name), 
38
                        'price': str(price),
39
                        'source': 'infibeam', 
40
                        'in_stock': 1, 
41
                        'product_url': str(url)
42
                    })
4039 varun.gupt 43
 
44
            except UnicodeEncodeError as e:
45
                print 'Unicode Error', e, name
46
                name_ascii = "".join([char if ord(char) < 128 else " " for char in name])
47
                print name_ascii
4198 varun.gupt 48
                phone_prices.append({
49
                        "name": str(name_ascii), 
50
                        "price": str(price),
51
                        'source': 'infibeam', 
52
                        "in_stock": 1, 
53
                        "product_url": str(url)
54
                    })
4039 varun.gupt 55
 
3232 varun.gupt 56
        return phone_prices
57
 
58
    def getNextUrl(self):
4039 varun.gupt 59
        b = self.soup.findAll('div', {'class': 'resultsSummary'})[0].findAll('b')
60
        current_max = int(b[0].string.split('-')[1])
61
        total_products = int(b[1].string)
62
 
63
        return 'http://www.infibeam.com/Mobiles/search?page=%d' % (1 + current_max / 20) if current_max < total_products else None
3232 varun.gupt 64
 
65
if __name__ == '__main__':
66
    s = InfibeamScraper()
4039 varun.gupt 67
    s.setUrl('http://www.infibeam.com/Mobiles/search?page=17')
3232 varun.gupt 68
    s.scrape()
4039 varun.gupt 69
    products = s.getPhones()
70
    print products