Subversion Repositories SmartDukaan

Rev

Rev 170 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
154 ashish 1
'''
2
Created on 14-May-2010
3
 
4
@author: gaurav
5
'''
6
 
7
 
8
from scrapy.spider import BaseSpider
9
from scrapy.selector import HtmlXPathSelector
10
from scrapy.http import Request
11
 
12
from demo.items import DemoItem
13
from scrapy.contrib.spidermiddleware import referer
14
from scrapy.http.headers import Headers
15
from scrapy.http.request.form import FormRequest
16
from scrapy.log import msg
17
from scrapy.http.response import Response
18
#from datastore.DataAccessor import add_new_phone
19
 
20
from datastore import DataAccessor
21
from datastore.DataAccessor import DataHelper
22
 
23
class univercell_price(BaseSpider):
24
 
25
    def __init__(self):
26
       self.domain_name = "univercellphones"
27
       da = DataHelper()
28
       for pitem in da.get_all_univervendors():
29
            self.start_urls.append(pitem.v_site.strip())
30
 
31
    def start_requests(self):
32
        listreq = []
33
        for url1 in self.start_urls:
34
            request = Request(url = url1, callback=self.parse)
35
            request.headers.setdefault("Referer", "www.google.com/search")
36
            listreq.append(request)
37
        return listreq
38
 
39
    def parse(self, response):
40
        vatplustax = 0
41
        hxs = HtmlXPathSelector(response)
42
        #sites = hxs.select('//div[@id="productsDiv"]/table/tbody/tr[2]/td/div/table/tbody/tr/td/table/tbody')
43
        #sites = hxs.select('//div[@id="productsDiv"]/table/tr[2]//tr')
44
        sites = hxs.select('//td[@class="gray-border"]')
45
        msg(response.url)
46
        print(len(sites))
47
        items = []
48
        for site in sites:
49
            item = {}
50
            #tmp = site.select('.//tr[2]/td/a/text()')
51
            item['title'] = site.select('.//tr[2]/td/a/text()')[0].extract()
52
            #psite = site.select(".//a[3][@href]/@href")[0].extract()
53
            item['price'] =site.select('.//tr[3]/th/label/text()')[0].extract()
54
            items.append(item)
55
        da = DataHelper()      
56
        for i in items:
57
            str1 = str(i['title']).strip() 
58
            print str1
59
            amnt = i['price'].replace(",","")
60
            amnt = amnt.replace("Rs", "")
61
            amnt = amnt.replace("/", "")
62
            amnt = amnt.replace("-", "")
63
            amnt = amnt.strip()
64
            pr = int(amnt) + vatplustax 
65
            #print pr
66
            da.add_new_univerphone(str1,amnt,pr) 
67
 
68
        #lt = len(da.get_all_phones())  
69
        #print "length" + str(lt)
70
        #for ph in da.get_all_phones():
71
         #   print ph
72
 
73
        #f = open('/home/gaurav/twopassscrapy/pricelinks.txt', 'w')
74
        #for i in items:
75
            #f.write(i['title'])
76
            #f.write("\n")
77
            #f.write(i['link'])
78
            #f.write("\n")
79
        #f.close()    
80
 
81
SPIDER = univercell_price()
82