Rev 185 | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 20-May-2010@author: gaurav'''from scrapy.spider import BaseSpiderfrom scrapy.selector import HtmlXPathSelectorfrom scrapy.http import Requestfrom demo.items import DemoItemfrom scrapy.contrib.spidermiddleware import refererfrom scrapy.http.headers import Headersfrom scrapy.http.request.form import FormRequestfrom scrapy.log import msgfrom scrapy.http.response import Responsefrom datastore import DataAccessorfrom datastore.DataAccessor import DataHelperfrom html2text.unescaping import *class mobstore_price(BaseSpider):def __init__(self):MOBILESTORE_DOMAINNAME1 = "mobilestore1"self.domain_name = MOBILESTORE_DOMAINNAME1# get urls from the database and append them in the list for crawlingda = DataHelper()for pitem in da.get_allmobstoreurls():self.start_urls.append(pitem.url.strip())def start_requests(self):listreq = []#for each request a referer has to be setMOBILESTORE_REFERER = "www.google.com/search"for url1 in self.start_urls:request = Request(url = str(url1), callback=self.parse)request.headers.setdefault("Referer", MOBILESTORE_REFERER)listreq.append(request)return listreqdef parse(self, response):site = response.urlsite = unescape(site)MOBILESTORE_VATPLUSTAX = 0#retreiving model-name from the urlpos1 = pos2 = 0temp = ""pos1 = site.rfind('/')if pos1 != -1:temp = site[pos1+1:len(site)]pos3 = temp.find('.')temp1 = temp[pos3:len(temp)]name = temp.replace(temp1,"")hxs = HtmlXPathSelector(response)MOBILESTORE_XPATH2 = '//div[@id ="priceComp"]//tr[2]/td[3]/span/text()'prices = hxs.select(MOBILESTORE_XPATH2)#removelist is used for converting price to decimal format containing only numbers and '.'MOBILESTORE_REMOVELIST = ["Rs",",","-","/","Rs."]da = DataHelper()for price in prices:name = str(name).strip()price = price.extract()price = str(price).strip()if price != '':for r in MOBILESTORE_REMOVELIST:while price.find(r) != -1:price = price.replace(r, "")price = price.strip()shown_pr = int(price)final_pr = shown_pr + MOBILESTORE_VATPLUSTAXda.add_new_mobstorephone(name,shown_pr,final_pr)SPIDER = mobstore_price()