Subversion Repositories SmartDukaan

Rev

Rev 147 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
136 ashish 1
'''
2
Created on 11-May-2010
3
 
4
@author: gaurav
5
'''
6
 
7
from scrapy.spider import BaseSpider
8
from scrapy.selector import HtmlXPathSelector
9
from scrapy.http import Request
10
 
11
from demo.items import DemoItem
12
from scrapy.contrib.spidermiddleware import referer
13
from scrapy.http.headers import Headers
14
from scrapy.http.request.form import FormRequest
15
from scrapy.log import msg
16
from scrapy.http.response import Response
17
 
18
from datastore import DataAccessor
19
from datastore.DataAccessor import DataHelper
20
 
21
 
22
class vendor_links(BaseSpider):
23
    domain_name = "vendors"
24
    start_urls = [
25
          "http://www.infibeam.com/Mobiles/"
26
    ]
27
 
28
    def start_requests(self):
29
        request = Request(url = "http://www.infibeam.com/Mobiles/", callback=self.parse)
30
        request.headers.setdefault("Referer", "www.google.com/search")
31
        return [request]
32
 
33
    def parse(self, response):
141 ashish 34
        str1 = "http://www.infibeam.com"
136 ashish 35
        hxs = HtmlXPathSelector(response)
36
        vendor_info = hxs.select('//td[h3="Mobiles"]/ul/li')
37
        print len(vendor_info)
38
        items = []
39
        for i in vendor_info:
40
            item = {}
41
            item['name'] = i.select('.//a/@title')[0].extract()
42
            item['site'] = i.select('.//a/@href')[0].extract()
43
            items.append(item)
44
 
151 ashish 45
        #da = DataHelper()
136 ashish 46
        for item in items:
47
            str2 = str1 + str(item['site'])
151 ashish 48
            #da.add_vendor(item['name'], str2)
136 ashish 49
            print item['name']
50
            print str2
51
 
52
SPIDER = vendor_links()