Subversion Repositories SmartDukaan

Rev

Blame | Last modification | View Log | RSS feed

'''
Created on 19-Sep-2011

@author: Varun Gupta
'''
from BaseScraper import BaseScraper
from BeautifulSoup import BeautifulSoup
from Utils import removePriceFormatting
import json

class AdexmartScraper(BaseScraper):

    def __init__(self):
        BaseScraper.__init__(self)
        self.url = None
        self.id = None
    
    def setUrl(self, url):
        self.url = url
    
    def scrape(self):
        response = BaseScraper.read(self, self.url)
        html = json.loads(response)['products']
        self.soup = BeautifulSoup(html)
        self.phones = None
    
    def getPhones(self):
        phones = []
        for li in self.soup.find('ul', {'class': 'clear'})('li'):
            anchor = li.find('a', {'class': 'product_img_link'})
            name = anchor['title'].strip()
            product_url = anchor['href']
            price = li.find('span', {'class': 'price'}).string.strip()
            in_stock = 1 if li.find('span', {'class': 'availability'}).string.__str__().strip() == 'Available' else 0
            phones.append({
                    'name': name, 
                    'price': removePriceFormatting(price),
                    'source': 'adexmart', 
                    'product_url': product_url, 
                    'in_stock': in_stock
                })
        self.phones = phones
        return phones
    
    def getNextUrl(self):
        return None

if __name__ == '__main__':
    scraper = AdexmartScraper()
    scraper.setUrl('http://www.adexmart.com/modules/coremanager/modules/filtersearch/filtersearch.json.php?act=filter&ident=16&page=1&perpage=1000&orderby=newest&orderway=desc')
    scraper.scrape()
    print scraper.getPhones()