Subversion Repositories SmartDukaan

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4198 varun.gupt 1
'''
2
Created on 19-Sep-2011
3
 
4
@author: Varun Gupta
5
'''
6
from BaseScraper import BaseScraper
7
from BeautifulSoup import BeautifulSoup
8
from Utils import removePriceFormatting
9
import json
10
 
11
class AdexmartScraper(BaseScraper):
12
 
13
    def __init__(self):
14
        BaseScraper.__init__(self)
15
        self.url = None
16
        self.id = None
17
 
18
    def setUrl(self, url):
19
        self.url = url
20
 
21
    def scrape(self):
22
        response = BaseScraper.read(self, self.url)
23
        html = json.loads(response)['products']
24
        self.soup = BeautifulSoup(html)
25
        self.phones = None
26
 
27
    def getPhones(self):
28
        phones = []
29
        for li in self.soup.find('ul', {'class': 'clear'})('li'):
30
            anchor = li.find('a', {'class': 'product_img_link'})
31
            name = anchor['title'].strip()
32
            product_url = anchor['href']
33
            price = li.find('span', {'class': 'price'}).string.strip()
34
            in_stock = 1 if li.find('span', {'class': 'availability'}).string.__str__().strip() == 'Available' else 0
35
            phones.append({
36
                    'name': name, 
37
                    'price': removePriceFormatting(price),
38
                    'source': 'adexmart', 
39
                    'product_url': product_url, 
40
                    'in_stock': in_stock
41
                })
42
        self.phones = phones
43
        return phones
44
 
45
    def getNextUrl(self):
46
        return None
47
 
48
if __name__ == '__main__':
49
    scraper = AdexmartScraper()
50
    scraper.setUrl('http://www.adexmart.com/modules/coremanager/modules/filtersearch/filtersearch.json.php?act=filter&ident=16&page=1&perpage=1000&orderby=newest&orderway=desc')
51
    scraper.scrape()
52
    print scraper.getPhones()