Subversion Repositories SmartDukaan

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 29-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
6
from BeautifulSoup import BeautifulSoup
7
import urllib
8
 
9
class SaholicScraper:
10
 
11
    def __init__(self):
12
        self.url = None
13
        self.id = None
14
 
15
 
16
    def setUrl(self, url):
17
        self.url = url
18
 
19
    def scrape(self):
20
        sock = urllib.urlopen(self.url)
21
        html = sock.read()
22
        sock.close()
23
        self.soup = BeautifulSoup(html)
24
 
25
 
26
    def getPhonePrices(self):
27
        phone_prices = []
28
 
29
        for div in self.soup.findAll('div', {'class': 'productDetails'}):
30
            try:
31
                name = div.findAll('div', {'class': 'title'})[0]('a')[0].string.strip()
32
                price = div.findAll('span', {'class': 'newPrice'})[1].string.strip()
33
                phone_prices.append({'name': str(name), 'price': str(price)})
34
 
35
            except KeyError:
36
                pass
37
 
38
        return phone_prices
39
 
40
    def getNextUrl(self):
41
        try:
42
            anchors = self.soup.findAll('li', {'class': 'pager-next'})[0]('a')
43
            return 'http://www.saholic.com/all-mobile-phones/10001%s' % str(anchors[0]['href'].strip())
44
        except Exception as e:
45
            return None
46
 
47
if __name__ == '__main__':
48
    scraper = SaholicScraper()
49
    scraper.setUrl('http://www.saholic.com/all-mobile-phones/10001')
50
    scraper.scrape()
51
    print scraper.getNextUrl()