Subversion Repositories SmartDukaan

Rev

Rev 4110 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4106 varun.gupt 1
'''
2
Created on 19-Nov-2011
3
 
4
@author: Varun Gupta
5
'''
6
from BaseScraper import BaseScraper
7
from BeautifulSoup import BeautifulSoup
8
 
9
import json
10
 
11
class MySmartPrice(BaseScraper):
12
 
13
    def __init__(self):
14
        self.url_brand_list = 'http://www.mysmartprice.com/mobile/pricelist/'
15
        self.source_whitelist = ['adexmart', 'flipkart', 'homeshop18', 'infibeam', 'letsbuy', 'saholic']
16
 
17
    def getSourceName(self, url):
18
        for source in self.source_whitelist:
19
            if url.find(source) > -1:   return source
20
 
21
        return None
22
 
23
    def getSaholicEntityId(self, map):
24
        try:
25
            if map['saholic']['url'] is None:
26
                return None
27
            else:
28
                return map['saholic']['url'].split('-')[-1]
29
 
30
        except KeyError:
31
            return None
32
 
33
    def getBrandURLs(self):
34
        urls = []
35
        html = BaseScraper.read(self, self.url_brand_list)
36
        soup = BeautifulSoup(html)
37
        for td in soup.find('div', {'class': 'msp_left'}).find('table').find('table').findAll('td', {'width':"300px"}):
38
            urls.append(str(td.find('a')['href']))
39
        return urls
40
 
41
    def getPhoneURLsForBrand(self, url_brand):
42
        urls = []
43
        url_brand = 'http://www.mysmartprice.com' + url_brand
44
        html = BaseScraper.read(self, url_brand)
45
        soup = BeautifulSoup(html)
46
        for div in soup.findAll('div', {'class': 'item'}):
47
            a = div.find('a')
48
 
49
            if a is not None:
50
                urls.append(str(a['href']))
51
        return urls
52
 
53
    def getPhonePrices(self, url):
54
        html = BaseScraper.read(self, url)
55
        soup = BeautifulSoup(html)
56
        map = {}
57
        for div in soup.findAll('div', {'class': 'pt_row'}):
58
            url = div.find('td', {'width': '140px'}).find('a')['href'].split('?url=')[-1].strip()
59
            td_price = div.find('td', {'width': '135px'})
60
 
61
            if td_price.string is None:
62
                is_available = True
63
                price = td_price.find('b').string.strip()
64
            else:
65
                is_available = False
66
 
67
            source = self.getSourceName(url)
68
 
69
            if source is not None:
70
                map[source] = {
71
                        'is_available': is_available,
72
                        'price': price if is_available else 'Not Found',
73
                        'url': url if is_available else 'Not Found'
74
                }
75
        return map
76
 
77
if __name__ == '__main__':
78
    scraper = MySmartPrice()
79
#    brand_urls = scraper.getBrands()
80
    brand_urls = [
81
                  '/mobile/pricelist/nokia-mobile-price-list-in-india.html',
82
                  '/mobile/pricelist/samsung-mobile-price-list-in-india.html',
83
                  '/mobile/pricelist/blackberry-mobile-price-list-in-india.html',
84
                  '/mobile/pricelist/lg-mobile-price-list-in-india.html',
85
                  '/mobile/pricelist/sony-ericsson-mobile-price-list-in-india.html',
86
                  '/mobile/pricelist/micromax-mobile-price-list-in-india.html',
87
                  '/mobile/pricelist/motorola-mobile-price-list-in-india.html',
88
                  '/mobile/pricelist/htc-mobile-price-list-in-india.html',
89
                  '/mobile/pricelist/apple-mobile-price-list-in-india.html',
90
                  '/mobile/pricelist/spice-mobile-price-list-in-india.html',
91
                  '/mobile/pricelist/karbonn-mobile-price-list-in-india.html',
92
                  '/mobile/pricelist/lava-mobile-price-list-in-india.html']
93
    phone_urls = []
94
 
95
    for brand_url in brand_urls:
96
        try:
97
            print brand_url
98
            phone_urls.extend(scraper.getPhoneURLsForBrand(brand_url))
99
        except Exception as e:
100
            print e
101
            continue
102
 
103
    print phone_urls.__len__()
104
 
105
    for url in phone_urls:
106
        print url
107
        map = scraper.getPhonePrices(url)
108
        saholic_id = scraper.getSaholicEntityId(map)
109
        print map
110
        print saholic_id
111
 
112
        if saholic_id is not None:
5761 amar.kumar 113
            file_path = str("/usr/msp_dir/%s" % saholic_id)
4106 varun.gupt 114
            file_to_write = open(file_path, "w")
115
 
116
            if file_to_write is None:
117
                print 'File pointer is None'
118
            else:
119
                json.dump(map, file_to_write, indent = 4)