Subversion Repositories SmartDukaan

Rev

Rev 138 | Rev 169 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

'''
Created on 14-May-2010

@author: gaurav
'''


from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from scrapy.http import Request

from demo.items import DemoItem
from scrapy.contrib.spidermiddleware import referer
from scrapy.http.headers import Headers
from scrapy.http.request.form import FormRequest
from scrapy.log import msg
from scrapy.http.response import Response

from datastore import DataAccessor
from datastore.DataAccessor import DataHelper


class vendor_links(BaseSpider):
    domain_name = "univercellvendors"
    start_urls = [
          "http://www.univercell.in/mobiles/populateStore.action"
    ]
    
    def start_requests(self):
        request = Request(url = "http://www.univercell.in/mobiles/populateStore.action", callback=self.parse)
        request.headers.setdefault("Referer", "www.google.com/search")
        return [request]
    
    def parse(self, response):
        str1 = "http://www.univercell.in"
        hxs = HtmlXPathSelector(response)
        vendor_info = hxs.select('//div[@id="mobilesTab"]/table/tr[1]/td/table/tr')
        print len(vendor_info)
        items = []
        for i in vendor_info:
            item = {}
            item['name'] = i.select('.//a/text()')[0].extract()
            temp = i.select('.//a/@href')[0].extract()
            a = str(temp).find(";")
            b = str(temp).find("?")
            temp1 = str(temp)[a:b]
            temp2 = str(temp).replace(temp1,"")
            item['site'] =  str(temp2).replace("populate","rePopulate")
            items.append(item)
            
        da = DataHelper()
        for item in items:
            str2 = str1 + str(item['site'])
            da.add_univervendor(item['name'].strip(), str2)
            print item['name']
            print str2
            
SPIDER = vendor_links()