Subversion Repositories SmartDukaan

Rev

Rev 17182 | Rev 17268 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 17182 Rev 17263
Line 1... Line 1...
1
from dtr.utils.utils import fetchResponseUsingProxy, transformUrl
1
from dtr.utils.utils import fetchResponseUsingProxy
2
from sys import exit
2
from sys import exit
3
import json
3
import json
4
import re
-
 
5
import traceback
4
import traceback
6
import datetime
5
import datetime
7
from pyquery import PyQuery
-
 
8
 
6
 
9
 
7
 
10
 
8
 
11
headers = { 
9
headers = {
12
            'User-agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36',
-
 
13
            'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      
-
 
14
            'Accept-Language' : 'en-US,en;q=0.8',                     
-
 
15
            'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
-
 
16
            'Connection':'keep-alive',
10
           'Browser-Name': 'Chrome',
-
 
11
           'User-Agent': 'Mozilla/5.0 (Linux; Android 5.1.1; A0001 Build/LMY48B; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/43.0.2357.121 Mobile Safari/537.36 FKUA/Retail/550900/Android/Mobile (OnePlus/A0001)',
17
            'Accept-Encoding' : 'gzip,deflate,sdch'
12
           'Host': 'mobileapi.flipkart.net'
18
        }
13
        }
19
 
14
 
20
 
15
 
21
class FlipkartProductPageScraper:
16
class FlipkartProductPageScraper:
22
    def __init__(self):
17
    def __init__(self):
23
        self.count_trials = 0
18
        self.count_trials = 0
24
        self.redirectCount = 0
19
        self.redirectCount = 0
25
    
20
    
26
    def read(self, url):
21
    def read(self, identifier):
27
        response_data = ""
22
        response_data = ""
28
        redirect_url = ""
23
        self.fsn = identifier.upper().strip()
29
        #url = transformUrl(url,2)
24
        url = "http://mobileapi.flipkart.net/2/discover/productInfo/0?pids=%s"%(self.fsn)
30
        try:
25
        try:
31
            
26
            
32
            """quick fix,need to add it conf""" 
27
            """quick fix,need to add it conf""" 
33
            
28
            
34
            response_data = fetchResponseUsingProxy(url, headers, proxy=True)                
29
            response_data = fetchResponseUsingProxy(url, headers, proxy=True)                
Line 43... Line 38...
43
 
38
 
44
            if self.count_trials < 3:
39
            if self.count_trials < 3:
45
                return self.read(url)
40
                return self.read(url)
46
 
41
 
47
        self.response_data=response_data
42
        self.response_data=response_data
48
        print datetime.datetime.now()
-
 
49
        return self.createData(url,redirect_url)
43
        return self.parse()
50
    
44
 
51
    def createData(self,url, redirect_url):
45
    def parse(self):
52
        pq = PyQuery(self.response_data)
46
        input_json = json.loads(self.response_data)
-
 
47
        inStock = not input_json['RESPONSE']['productInfo'][self.fsn]['availabilityDetails']['product.isOOS']
53
        buyBoxPrice = float(pq('span.selling-price.omniture-field').attr['data-evar48'])
48
        preferred_seller = input_json['RESPONSE']['productInfo'][self.fsn]['preferredListingId']
54
        inStock = 1
49
        buyBoxPrice = 0
55
        try:
50
        lowestSp = 0
56
            sellerJson = pq('div.seller-table-wrap').attr['data-config']
51
        for x in (input_json['RESPONSE']['productInfo'][self.fsn]['marketplace']):
57
            x = json.loads(sellerJson)
52
#             print x['marketplace.listId'],
58
            lines = sorted(x['dataModel'], key=lambda k: k['priceInfo'].get('sellingPrice', 0), reverse=False)
-
 
59
            sellingPrice =  float(lines[0]['priceInfo']['sellingPrice'])
-
 
60
            try:
53
#             print '\t',
61
                offerText = lines[0]['offerInfo']['listingOffers'][0]['description']
54
#             print x['product.availabilityDetails']['product.isOOS'],
62
            except:
55
#             print '\t',
63
                offerText = ""
-
 
64
            return {'lowestSp':sellingPrice,'inStock':inStock,'buyBoxPrice':buyBoxPrice}
-
 
65
        except:
-
 
66
            """Not able to parse seller wrap section, probably due to only single seller option"""
-
 
67
            sellingPrice = buyBoxPrice
56
#             print x['product.selling_price'],
68
            stockDiv = pq('div.out-of-stock')
57
#             print '\t',
69
            commingSoonDiv = pq('div.coming-soon-status')
58
#             print x['marketplace.seller.shippingCharge'],
70
            if len(stockDiv) > 0 or len(commingSoonDiv) > 0:
-
 
71
                inStock = 0
59
#             print '\t',
72
            return {'lowestSp':sellingPrice,'inStock':inStock,'buyBoxPrice':buyBoxPrice}
60
#             print x['seller.displayName']
73
            
61
            
-
 
62
            if not x['product.availabilityDetails']['product.isOOS']:
-
 
63
                if lowestSp == 0 or lowestSp > x['product.selling_price']:
-
 
64
                    lowestSp = x['product.selling_price']
-
 
65
                
-
 
66
                if x['marketplace.listId'] == preferred_seller:
-
 
67
                    buyBoxPrice = x['product.selling_price']
-
 
68
            
74
if __name__ == '__main__':
69
                    
-
 
70
        return {'lowestSp':lowestSp,'inStock':int(inStock),'buyBoxPrice':buyBoxPrice} 
-
 
71
            
-
 
72
 
-
 
73
def main():
75
    print datetime.datetime.now()
74
    print datetime.datetime.now()
76
    scraper = FlipkartProductPageScraper()
75
    scraper = FlipkartProductPageScraper()
77
    print scraper.read('http://www.flipkart.com/moto-x-play/p/itmeajtqp9sfxgsk?pid=MOBEAJTQRH4CCRYM')
76
    print scraper.read('MOBE3UN6EURWTZCA')
78
    print datetime.datetime.now()
77
    print datetime.datetime.now()
-
 
78
 
-
 
79
 
-
 
80
if __name__ == '__main__':
-
 
81
    main()
79
82