Subversion Repositories SmartDukaan

Rev

Rev 185 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 185 Rev 234
Line 16... Line 16...
16
from scrapy.http.response import Response
16
from scrapy.http.response import Response
17
 
17
 
18
 
18
 
19
from datastore import DataAccessor
19
from datastore import DataAccessor
20
from datastore.DataAccessor import DataHelper
20
from datastore.DataAccessor import DataHelper
-
 
21
from html2text.unescaping import *
21
 
22
 
22
class mobstore_price(BaseSpider):
23
class mobstore_price(BaseSpider):
23
    
24
    
24
    def __init__(self):
25
    def __init__(self):
-
 
26
       MOBILESTORE_DOMAINNAME1 = "mobilestore1"   
25
       self.domain_name = "mobstorephones"
27
       self.domain_name = MOBILESTORE_DOMAINNAME1 
-
 
28
       # get urls from the database and append them in the list for crawling
26
       da = DataHelper()
29
       da = DataHelper()
27
       for pitem in da.get_allmobstoreurls():
30
       for pitem in da.get_allmobstoreurls():
28
            self.start_urls.append(pitem.url.strip())
31
            self.start_urls.append(pitem.url.strip())
29
    
32
    
30
    def start_requests(self):
33
    def start_requests(self):
31
        listreq = []
34
        listreq = []
-
 
35
        #for each request a referer has to be set
-
 
36
        MOBILESTORE_REFERER = "www.google.com/search"
32
        for url1 in self.start_urls:
37
        for url1 in self.start_urls:
33
            request = Request(url = url1, callback=self.parse)
38
            request = Request(url = str(url1), callback=self.parse)
34
            request.headers.setdefault("Referer", "www.themobilestore.in")
39
            request.headers.setdefault("Referer", MOBILESTORE_REFERER)
35
            listreq.append(request)
40
            listreq.append(request)
36
        return listreq
41
        return listreq
37
       
42
        
38
    def parse(self, response):
43
    def parse(self, response):
39
        site = response.url
44
        site = response.url
40
        vatplustax = 0
45
        site = unescape(site)
-
 
46
        MOBILESTORE_VATPLUSTAX = 0
-
 
47
        
-
 
48
        #retreiving model-name from the url
41
        pos1 = pos2 = 0
49
        pos1 = pos2 = 0
42
        temp = ""
50
        temp = ""
43
        pos1 = site.rfind('/')
51
        pos1 = site.rfind('/')
44
        if pos1 != -1:
52
        if pos1 != -1:
45
            temp = site[pos1+1:len(site)]
53
            temp = site[pos1+1:len(site)]
46
            #pos2 = site.rfind('/',0,pos1-1)
-
 
47
        #if pos2 > 0:
-
 
48
            #temp = site[pos2+1:len(site)]
-
 
49
        pos3 = temp.find('.')
54
        pos3 = temp.find('.')
50
        temp1 = temp[pos3:len(temp)]
55
        temp1 = temp[pos3:len(temp)]
51
        name = temp.replace(temp1,"")         
56
        name = temp.replace(temp1,"")         
52
        hxs = HtmlXPathSelector(response)
57
        hxs = HtmlXPathSelector(response)
53
        prices = hxs.select('//div[@id ="priceComp"]//tr[2]/td[3]/span/text()')
58
        MOBILESTORE_XPATH2 = '//div[@id ="priceComp"]//tr[2]/td[3]/span/text()' 
-
 
59
        prices = hxs.select(MOBILESTORE_XPATH2)
54
        
60
        
-
 
61
        #removelist is used for converting price to decimal format containing only numbers and '.'
-
 
62
        MOBILESTORE_REMOVELIST = ["Rs",",","-","/","Rs."]
55
        da = DataHelper()
63
        da = DataHelper()
56
        for price in prices:
64
        for price in prices:
57
             name = str(name).strip()
65
             name = str(name).strip()
58
             price = price.extract()
66
             price = price.extract()
59
             price = str(price).strip()
67
             price = str(price).strip()
60
             price = price.replace("Rs", "")
68
             if price != '':        
-
 
69
                for r in MOBILESTORE_REMOVELIST: 
61
             price = price.replace("/", "")
70
                    while price.find(r) != -1:
62
             price = price.replace("-", "")
71
                        price = price.replace(r, "")
63
             price = price.replace(".", "")
72
             price = price.strip()
64
             shown_pr = int(price)
73
             shown_pr = int(price)
65
             final_pr = shown_pr + vatplustax
74
             final_pr = shown_pr + MOBILESTORE_VATPLUSTAX 
66
             da.add_new_mobstorephone(name,shown_pr,final_pr)
75
             da.add_new_mobstorephone(name,shown_pr,final_pr)
67
             print name
-
 
68
             print final_pr
-
 
69
             print "\n"
-
 
70
    '''          
-
 
71
        for i in items:
-
 
72
            str1 = str(i['title']).strip() 
-
 
73
            print str1
-
 
74
            amnt = i['price'].replace(",","")
-
 
75
            amnt = amnt.replace("Rs", "")
-
 
76
            amnt = amnt.replace("/", "")
-
 
77
            amnt = amnt.replace("-", "")
-
 
78
            amnt = amnt.strip()
-
 
79
            vatplustax = 4*int(amnt)/100
-
 
80
            pr = int(amnt) + vatplustax 
-
 
81
            #print pr
-
 
82
            da.add_new_univerphone(str1,amnt,pr) 
-
 
83
        '''
-
 
84
SPIDER = mobstore_price()
76
SPIDER = mobstore_price()
85
 
77