Subversion Repositories SmartDukaan

Rev

Rev 138 | Rev 169 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
138 ashish 1
'''
2
Created on 14-May-2010
3
 
4
@author: gaurav
5
'''
152 ashish 6
 
7
 
8
from scrapy.spider import BaseSpider
9
from scrapy.selector import HtmlXPathSelector
10
from scrapy.http import Request
11
 
12
from demo.items import DemoItem
13
from scrapy.contrib.spidermiddleware import referer
14
from scrapy.http.headers import Headers
15
from scrapy.http.request.form import FormRequest
16
from scrapy.log import msg
17
from scrapy.http.response import Response
18
 
19
from datastore import DataAccessor
20
from datastore.DataAccessor import DataHelper
21
 
22
 
23
class vendor_links(BaseSpider):
24
    domain_name = "univercellvendors"
25
    start_urls = [
26
          "http://www.univercell.in/mobiles/populateStore.action"
27
    ]
28
 
29
    def start_requests(self):
30
        request = Request(url = "http://www.univercell.in/mobiles/populateStore.action", callback=self.parse)
31
        request.headers.setdefault("Referer", "www.google.com/search")
32
        return [request]
33
 
34
    def parse(self, response):
35
        str1 = "http://www.univercell.in"
36
        hxs = HtmlXPathSelector(response)
37
        vendor_info = hxs.select('//div[@id="mobilesTab"]/table/tr[1]/td/table/tr')
38
        print len(vendor_info)
39
        items = []
40
        for i in vendor_info:
41
            item = {}
42
            item['name'] = i.select('.//a/text()')[0].extract()
43
            temp = i.select('.//a/@href')[0].extract()
44
            a = str(temp).find(";")
45
            b = str(temp).find("?")
46
            temp1 = str(temp)[a:b]
47
            temp2 = str(temp).replace(temp1,"")
48
            item['site'] =  str(temp2).replace("populate","rePopulate")
49
            items.append(item)
50
 
51
        da = DataHelper()
52
        for item in items:
53
            str2 = str1 + str(item['site'])
54
            da.add_univervendor(item['name'].strip(), str2)
55
            print item['name']
56
            print str2
57
 
58
SPIDER = vendor_links()