Subversion Repositories SmartDukaan

Rev

Rev 12402 | Rev 12411 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 12402 Rev 12410
Line 29... Line 29...
29
        self.count_trials = 0
29
        self.count_trials = 0
30
    
30
    
31
    def read(self, urls, findStore):
31
    def read(self, urls, findStore):
32
        returnMap = {}
32
        returnMap = {}
33
        print datetime.datetime.now()
33
        print datetime.datetime.now()
-
 
34
        header = {'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.218 Safari/535.1'}
34
        rs = (grequests.get(u,stream=False) for u in urls)
35
        rs = (grequests.get(u, headers=header) for u in urls)
35
        for x in grequests.map(rs):
36
        for x in grequests.map(rs):
36
            soup = BeautifulSoup(x.text,convertEntities=BeautifulSoup.HTML_ENTITIES)
37
            soup = strip_tags(x.text,invalid_tags)
-
 
38
            print soup
37
            for tag in soup.findAll(True):
39
            for tag in soup.findAll(True):
38
                if tag.name in invalid_tags:
40
                if tag.name in invalid_tags:
39
                    s = ""
41
                    s = ""
40
        
42
        
41
                    for c in tag.contents:
43
                    for c in tag.contents:
Line 47... Line 49...
47
            x.close()
49
            x.close()
48
            sellerCount=0
50
            sellerCount=0
49
            info = []
51
            info = []
50
            sellerData = soup.findAll("div" , {"class" : "a-row a-spacing-mini olpOffer"})
52
            sellerData = soup.findAll("div" , {"class" : "a-row a-spacing-mini olpOffer"})
51
            dataLength = len(sellerData)
53
            dataLength = len(sellerData)
-
 
54
            print dataLength
52
            for data in sellerData:
55
            for data in sellerData:
53
                tempMap={}
56
                tempMap={}
54
                price = data.find('span', attrs={'class' : re.compile('.*olpOfferPrice*')}).find('span').text
57
                price = data.find('span', attrs={'class' : re.compile('.*olpOfferPrice*')}).find('span').text
55
                unitCost = float(price.replace("Rs.","").replace(",",""))
58
                unitCost = float(price.replace("Rs.","").replace(",",""))
56
                shippingCost = data.find('p', attrs={'class' : re.compile('.*olpShippingInfo*')}).find('span').text
59
                shippingCost = data.find('p', attrs={'class' : re.compile('.*olpShippingInfo*')}).find('span').text
Line 103... Line 106...
103
            return returnMap
106
            return returnMap
104
    
107
    
105
    def findStoreFront(self,returnMap):
108
    def findStoreFront(self,returnMap):
106
        storeFront={}
109
        storeFront={}
107
        for arr in returnMap.itervalues():
110
        for arr in returnMap.itervalues():
-
 
111
            print "arr is ",arr
108
            for dic in arr:
112
            for dic in arr:
-
 
113
                print "dic ",dic
109
                if dic['isStoreFront']!='True':
114
                if dic['isStoreFront']!='True':
110
                    storeFront[dic.get('storeUrl')] =''
115
                    storeFront[dic.get('storeUrl')] =''
111
        rs = (grequests.get(u,stream=False) for u in storeFront.keys())
116
        rs = (grequests.get(u,stream=False) for u in storeFront.keys())
112
        for x in grequests.map(rs):
117
        for x in grequests.map(rs):
113
            soup = strip_tags(x.text,invalid_tags)
118
            soup = strip_tags(x.text,invalid_tags)
Line 152... Line 157...
152
        #return soup.title.string
157
        #return soup.title.string
153
            
158
            
154
 
159
 
155
if __name__ == '__main__':
160
if __name__ == '__main__':
156
    urls=[]
161
    urls=[]
157
    urls.append("http://amazon.in/gp/offer-listing/B003SNIN9Q/ref=olp_sort_ps")
162
    urls.append("http://amazon.in/gp/offer-listing/B007VZFZO8/ref=olp_sort_ps")
158
    urls.append("http://amazon.in/gp/offer-listing")
-
 
159
#    asin = []
163
#    asin = []
160
#    for a in amazonlisted:
164
#    for a in amazonlisted:
161
#        asin.append(a.asin)
165
#        asin.append(a.asin)
162
#        urls.append('http://www.amazon.in/gp/offer-listing/'+str(a.asin)+'/ref=olp_sort_ps')
166
#        urls.append('http://www.amazon.in/gp/offer-listing/'+str(a.asin)+'/ref=olp_sort_ps')
163
#        if len(urls)==50:
167
#        if len(urls)==50: