Subversion Repositories SmartDukaan

Rev

Rev 227 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
180 ashish 1
'''
2
Created on 17-May-2010
3
 
4
@author: gaurav
5
'''
6
 
7
from scrapy.spider import BaseSpider
8
from scrapy.selector import HtmlXPathSelector
9
from scrapy.http import Request
10
 
11
from demo.items import DemoItem
12
from scrapy.contrib.spidermiddleware import referer
13
from scrapy.http.headers import Headers
14
from scrapy.http.request.form import FormRequest
15
from scrapy.log import msg
16
from scrapy.http.response import Response
17
 
18
from datastore import DataAccessor
19
from datastore.DataAccessor import DataHelper
20
import urllib
21
from xml.dom import INDEX_SIZE_ERR
22
 
23
 
24
class indiaplaza_extra(BaseSpider):
25
 
26
    def __init__(self):
27
       self.domain_name = "indiaplazaextrainfo"
28
       da = DataHelper()
29
       for pitem in da.get_all_ipbasic():
30
            self.start_urls.append(pitem.v_site.strip())
31
 
32
    def start_requests(self):
33
        listreq = []
34
        for url1 in self.start_urls:
35
            request = Request(url = url1, callback=self.parse)
36
            request.headers.setdefault("Referer", "www.google.com/search")
37
            listreq.append(request)
38
        return listreq
39
 
40
    def parse(self, response):
41
        hxs = HtmlXPathSelector(response)
42
        #sites = hxs.select('//td[@class="gray-border"]')
43
        #msg(response.url)
44
        #print(len(sites))
45
        name = hxs.select('.//div[@class="finDetHdr"]/h1/text()')[0].extract()
46
        price = hxs.select('.//div[@class="priceArea"]/span[1]/text()')[0].extract()
47
        try:
48
            ship_price = hxs.select('.//div[@class="priceArea"]/div[@class="row"][2]/text()')[0].extract()
49
        except IndexError:
50
            ship_price = hxs.select('.//div[@class="priceArea"]/div[@class="row"][2]/span/text()')[0].extract()
51
        try:
52
            guarantee_info = hxs.select('.//div[@class="priceArea"]/div[@class="row"][3]/text()')[0].extract()
53
        except IndexError:
54
            guarantee_info = hxs.select('.//div[@class="priceArea"]/div[@class="row"][4]/text()')[0].extract()
55
        ship_info = hxs.select('.//div[@class="priceArea"]/div[@class="row"][1]/text()')[0].extract() 
56
 
57
        urllib.unquote(name)
58
        urllib.unquote(price)
59
        urllib.unquote(ship_price)
60
        urllib.unquote(guarantee_info)
61
        urllib.unquote(ship_info)
62
        if ship_price == "Free shipping" :
63
            ship_price = "0"
64
        else :
65
            ship_price = ship_price.replace("Rs.","")
66
 
67
        price = price.replace("Rs.","")
68
 
69
        name = name.strip()
70
        price = price.strip()
71
        ship_price = ship_price.strip()
72
        guarantee_info = guarantee_info.strip()
73
        ship_info = ship_info.strip()
74
 
75
        shown_pr = int(price)
76
        final_pr = shown_pr + int(ship_price)
77
        print name
78
        print shown_pr
79
        print final_pr
80
        print guarantee_info
81
        print ship_info
82
        da = DataHelper()
83
        da.add_ipextra(name,shown_pr,final_pr,guarantee_info,ship_info)
84
    '''
85
        for site in sites:
86
            item = {}
87
            #tmp = site.select('.//tr[2]/td/a/text()')
88
            item['name'] = response.select('.//div[@class="finDetHdr"]/h1/text()')[0].extract()
89
            #psite = site.select(".//a[3][@href]/@href")[0].extract()
90
            item['price'] =site.select('.//tr[3]/th/label/text()')[0].extract()
91
            items.append(item)
92
 
93
 
94
        for i in items:
95
            str1 = str(i['title']).strip() 
96
            print str1
97
            amnt = i['price'].replace(",","")
98
            amnt = amnt.replace("Rs", "")
99
            amnt = amnt.replace("/", "")
100
            amnt = amnt.replace("-", "")
101
            amnt = amnt.strip()
102
            pr = int(amnt) + vatplustax 
103
            #print pr
104
            da.add_new_univerphone(str1,amnt,pr) 
105
        '''    
106
        #lt = len(da.get_all_phones())  
107
        #print "length" + str(lt)
108
        #for ph in da.get_all_phones():
109
         #   print ph
110
 
111
        #f = open('/home/gaurav/twopassscrapy/pricelinks.txt', 'w')
112
        #for i in items:
113
            #f.write(i['title'])
114
            #f.write("\n")
115
            #f.write(i['link'])
116
            #f.write("\n")
117
        #f.close()    
118
 
119
SPIDER = indiaplaza_extra()
120