Subversion Repositories SmartDukaan

Rev

Rev 152 | Rev 239 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 152 Rev 169
Line 19... Line 19...
19
from datastore import DataAccessor
19
from datastore import DataAccessor
20
from datastore.DataAccessor import DataHelper
20
from datastore.DataAccessor import DataHelper
21
 
21
 
22
 
22
 
23
class vendor_links(BaseSpider):
23
class vendor_links(BaseSpider):
24
    domain_name = "univercellvendors"
24
    domain_name = "univercell"
25
    start_urls = [
25
    start_urls = [
26
          "http://www.univercell.in/mobiles/populateStore.action"
26
          "http://www.univercell.in/mobiles/populateStore.action"
27
    ]
27
    ]
28
    
28
    
29
    def start_requests(self):
29
    def start_requests(self):
-
 
30
        da = DataHelper()
-
 
31
        da.add_supplier(self.domain_name, "www.univercell.in")
30
        request = Request(url = "http://www.univercell.in/mobiles/populateStore.action", callback=self.parse)
32
        request = Request(url = "http://www.univercell.in/mobiles/populateStore.action", callback=self.parse)
31
        request.headers.setdefault("Referer", "www.google.com/search")
33
        request.headers.setdefault("Referer", "www.google.com/search")
32
        return [request]
34
        return [request]
33
    
35
    
34
    def parse(self, response):
36
    def parse(self, response):
35
        str1 = "http://www.univercell.in"
37
        str1 = "http://www.univercell.in"
36
        hxs = HtmlXPathSelector(response)
38
        hxs = HtmlXPathSelector(response)
37
        vendor_info = hxs.select('//div[@id="mobilesTab"]/table/tr[1]/td/table/tr')
39
        vendor_info = hxs.select('//div[@id="mobilesTab"]/table/tr[1]/td/table/tr')
38
        print len(vendor_info)
40
        #print len(vendor_info)
39
        items = []
41
        items = []
40
        for i in vendor_info:
42
        for i in vendor_info:
41
            item = {}
43
            item = {}
42
            item['name'] = i.select('.//a/text()')[0].extract()
44
            item['name'] = i.select('.//a/text()')[0].extract()
43
            temp = i.select('.//a/@href')[0].extract()
45
            temp = i.select('.//a/@href')[0].extract()