Subversion Repositories SmartDukaan

Rev

Rev 5291 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 5291 Rev 5401
Line 4... Line 4...
4
@author: Varun Gupta
4
@author: Varun Gupta
5
'''
5
'''
6
from BeautifulSoup import BeautifulSoup
6
from BeautifulSoup import BeautifulSoup
7
from BaseScraper import BaseScraper
7
from BaseScraper import BaseScraper
8
from Utils import removePriceFormatting
8
from Utils import removePriceFormatting
-
 
9
from SoupSelect import select
9
 
10
 
10
class HS18Scraper(BaseScraper):
11
class HS18Scraper(BaseScraper):
11
 
12
 
12
    def __init__(self):
13
    def __init__(self):
13
        self.url = None
14
        self.url = None
Line 21... Line 22...
21
        self.soup = BeautifulSoup(html)
22
        self.soup = BeautifulSoup(html)
22
    
23
    
23
    def getPhones(self):
24
    def getPhones(self):
24
        product_prices = []
25
        product_prices = []
25
        
26
        
26
        for div in self.soup.findAll('div', {'class': 'product_div'}):
27
        for div in select(self.soup, "div.product_div"):#self.soup.findAll('div', {'class': 'product_div'}):
27
            
28
            
28
            anchor = div.find('p', {'class': 'product_title'})('a')[0]
29
            anchor = div.find('p', {'class': 'product_title'})('a')[0]
29
            name = str(anchor['title'].strip())
30
            name = str(anchor['title'].strip())
30
            
31
            
31
            if name.endswith(' Mobile Phone'):
32
            if name.endswith(' Mobile Phone'):
Line 85... Line 86...
85
 
86
 
86
 
87
 
87
if __name__ == '__main__':
88
if __name__ == '__main__':
88
    scraper = HS18Scraper()
89
    scraper = HS18Scraper()
89
#    print scraper.getDataFromProductPage('http://www.homeshop18.com/samsung-galaxy-note-n7000-mobile-phone/mobiles-accessories/gsm-handsets/product:16601211/cid:3027/')
90
#    print scraper.getDataFromProductPage('http://www.homeshop18.com/samsung-galaxy-note-n7000-mobile-phone/mobiles-accessories/gsm-handsets/product:16601211/cid:3027/')
90
    scraper.setUrl('http://www.homeshop18.com/gsm-mobiles/categoryid:3027/search:*/start:112/')
91
    scraper.setUrl('http://www.homeshop18.com/mobiles/category:14569/')
91
    scraper.scrape()
92
    scraper.scrape()
92
    products = scraper.getPhones()
93
    products = scraper.getPhones()
93
    print products
94
    print products
94
    print scraper.getNextUrl()
95
    print scraper.getNextUrl()
95
96