Subversion Repositories SmartDukaan

Rev

Rev 4203 | Rev 5401 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4203 Rev 5291
Line 21... Line 21...
21
        self.soup = BeautifulSoup(html)
21
        self.soup = BeautifulSoup(html)
22
    
22
    
23
    def getPhones(self):
23
    def getPhones(self):
24
        product_prices = []
24
        product_prices = []
25
        
25
        
26
        for div in self.soup.findAll('div', {'class': 'product_div book_info_box'}):
26
        for div in self.soup.findAll('div', {'class': 'product_div'}):
-
 
27
            
27
            anchor = div.findAll('p', {'class': 'product_title'})[0]('a')[0]
28
            anchor = div.find('p', {'class': 'product_title'})('a')[0]
28
            name = str(anchor['title'].strip())
29
            name = str(anchor['title'].strip())
29
            
30
            
-
 
31
            if name.endswith(' Mobile Phone'):
30
            if name.endswith(' Mobile Phone'):  name = name.replace(' Mobile Phone', '')
32
                name = name.replace(' Mobile Phone', '')
31
            
33
            
32
            url = str(anchor['href'].strip())
34
            url = str(anchor['href'].strip())
33
            price = removePriceFormatting(str(div.findAll('span', {'class': 'product_new_price'})[0].string.strip()))
35
            price = removePriceFormatting(str(div.findAll('span', {'class': 'product_new_price'})[0].string.strip()))
34
            
36
            
35
            try:
37
            try:
Line 82... Line 84...
82
        return data
84
        return data
83
 
85
 
84
 
86
 
85
if __name__ == '__main__':
87
if __name__ == '__main__':
86
    scraper = HS18Scraper()
88
    scraper = HS18Scraper()
87
    print scraper.getDataFromProductPage('http://www.homeshop18.com/samsung-galaxy-note-n7000-mobile-phone/mobiles-accessories/gsm-handsets/product:16601211/cid:3027/')
-
 
88
#    scraper.setUrl('http://www.homeshop18.com//ipads-2f-tablets/categoryid:8937/search:*/start:32/')
-
 
89
#    scraper.scrape()
-
 
90
#    products = scraper.getPhones()
-
 
91
#    print scraper.getNextUrl()
-
 
92
#    print products
-
 
93
89
#    print scraper.getDataFromProductPage('http://www.homeshop18.com/samsung-galaxy-note-n7000-mobile-phone/mobiles-accessories/gsm-handsets/product:16601211/cid:3027/')
-
 
90
    scraper.setUrl('http://www.homeshop18.com/gsm-mobiles/categoryid:3027/search:*/start:112/')
-
 
91
    scraper.scrape()
-
 
92
    products = scraper.getPhones()
-
 
93
    print products
-
 
94
    print scraper.getNextUrl()
-
 
95
94
96