Subversion Repositories SmartDukaan

Rev

Rev 4039 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 24-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
6
from BeautifulSoup import BeautifulSoup
7
import urllib
8
 
9
class LetsBuyScraper:
10
 
11
    def __init__(self):
12
        self.url = None
13
        self.id = None
14
 
15
 
16
    def setUrl(self, url):
17
        self.url = url
18
 
19
    def scrape(self):
20
        sock = urllib.urlopen(self.url)
21
        html = sock.read()
22
        sock.close()
23
        self.soup = BeautifulSoup(html)
24
 
25
    def getPhonePrices(self):
26
        phone_prices = []
27
 
28
        for div in self.soup.findAll('div', {'class': "detailbox"}):
29
            name = div('h2')[0]('a')[0].string.strip()
30
            price = div.findAll('span', {'class': "text12_stb"})[0].string.strip()
31
            print name, price
32
            phone_prices.append({'name': str(name), 'price': str(price)})
33
        return phone_prices
34
 
35
    def getNextUrl(self):
36
        next_url = None
37
 
38
        for anchor in self.soup.findAll('a'):
39
            try:
40
                if anchor['title'].strip() == "Next Page":
41
                    next_url = anchor['href'].strip()
42
            except KeyError:
43
                pass
44
 
45
        return next_url
46
 
47
if __name__ == '__main__':
48
    s = LetsBuyScraper()
49
    s.setUrl('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88?perpage=192')
50
    s.scrape()
51
    print s.getNextUrl()