Subversion Repositories SmartDukaan

Rev

Rev 152 | Rev 239 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
138 ashish 1
'''
2
Created on 14-May-2010
3
 
4
@author: gaurav
5
'''
152 ashish 6
 
7
 
8
from scrapy.spider import BaseSpider
9
from scrapy.selector import HtmlXPathSelector
10
from scrapy.http import Request
11
 
12
from demo.items import DemoItem
13
from scrapy.contrib.spidermiddleware import referer
14
from scrapy.http.headers import Headers
15
from scrapy.http.request.form import FormRequest
16
from scrapy.log import msg
17
from scrapy.http.response import Response
18
 
19
from datastore import DataAccessor
20
from datastore.DataAccessor import DataHelper
21
 
22
 
23
class vendor_links(BaseSpider):
169 ashish 24
    domain_name = "univercell"
152 ashish 25
    start_urls = [
26
          "http://www.univercell.in/mobiles/populateStore.action"
27
    ]
28
 
29
    def start_requests(self):
169 ashish 30
        da = DataHelper()
31
        da.add_supplier(self.domain_name, "www.univercell.in")
152 ashish 32
        request = Request(url = "http://www.univercell.in/mobiles/populateStore.action", callback=self.parse)
33
        request.headers.setdefault("Referer", "www.google.com/search")
34
        return [request]
35
 
36
    def parse(self, response):
37
        str1 = "http://www.univercell.in"
38
        hxs = HtmlXPathSelector(response)
39
        vendor_info = hxs.select('//div[@id="mobilesTab"]/table/tr[1]/td/table/tr')
169 ashish 40
        #print len(vendor_info)
152 ashish 41
        items = []
42
        for i in vendor_info:
43
            item = {}
44
            item['name'] = i.select('.//a/text()')[0].extract()
45
            temp = i.select('.//a/@href')[0].extract()
46
            a = str(temp).find(";")
47
            b = str(temp).find("?")
48
            temp1 = str(temp)[a:b]
49
            temp2 = str(temp).replace(temp1,"")
50
            item['site'] =  str(temp2).replace("populate","rePopulate")
51
            items.append(item)
52
 
53
        da = DataHelper()
54
        for item in items:
55
            str2 = str1 + str(item['site'])
56
            da.add_univervendor(item['name'].strip(), str2)
57
            print item['name']
58
            print str2
59
 
60
SPIDER = vendor_links()