| 138 |
ashish |
1 |
'''
|
|
|
2 |
Created on 14-May-2010
|
|
|
3 |
|
|
|
4 |
@author: gaurav
|
|
|
5 |
'''
|
| 152 |
ashish |
6 |
|
|
|
7 |
|
|
|
8 |
from scrapy.spider import BaseSpider
|
|
|
9 |
from scrapy.selector import HtmlXPathSelector
|
|
|
10 |
from scrapy.http import Request
|
|
|
11 |
|
|
|
12 |
from demo.items import DemoItem
|
|
|
13 |
from scrapy.contrib.spidermiddleware import referer
|
|
|
14 |
from scrapy.http.headers import Headers
|
|
|
15 |
from scrapy.http.request.form import FormRequest
|
|
|
16 |
from scrapy.log import msg
|
|
|
17 |
from scrapy.http.response import Response
|
|
|
18 |
|
|
|
19 |
from datastore import DataAccessor
|
|
|
20 |
from datastore.DataAccessor import DataHelper
|
|
|
21 |
|
|
|
22 |
|
|
|
23 |
class vendor_links(BaseSpider):
|
| 169 |
ashish |
24 |
domain_name = "univercell"
|
| 152 |
ashish |
25 |
start_urls = [
|
|
|
26 |
"http://www.univercell.in/mobiles/populateStore.action"
|
|
|
27 |
]
|
|
|
28 |
|
|
|
29 |
def start_requests(self):
|
| 169 |
ashish |
30 |
da = DataHelper()
|
|
|
31 |
da.add_supplier(self.domain_name, "www.univercell.in")
|
| 152 |
ashish |
32 |
request = Request(url = "http://www.univercell.in/mobiles/populateStore.action", callback=self.parse)
|
|
|
33 |
request.headers.setdefault("Referer", "www.google.com/search")
|
|
|
34 |
return [request]
|
|
|
35 |
|
|
|
36 |
def parse(self, response):
|
|
|
37 |
str1 = "http://www.univercell.in"
|
|
|
38 |
hxs = HtmlXPathSelector(response)
|
|
|
39 |
vendor_info = hxs.select('//div[@id="mobilesTab"]/table/tr[1]/td/table/tr')
|
| 169 |
ashish |
40 |
#print len(vendor_info)
|
| 152 |
ashish |
41 |
items = []
|
|
|
42 |
for i in vendor_info:
|
|
|
43 |
item = {}
|
|
|
44 |
item['name'] = i.select('.//a/text()')[0].extract()
|
|
|
45 |
temp = i.select('.//a/@href')[0].extract()
|
|
|
46 |
a = str(temp).find(";")
|
|
|
47 |
b = str(temp).find("?")
|
|
|
48 |
temp1 = str(temp)[a:b]
|
|
|
49 |
temp2 = str(temp).replace(temp1,"")
|
|
|
50 |
item['site'] = str(temp2).replace("populate","rePopulate")
|
|
|
51 |
items.append(item)
|
|
|
52 |
|
|
|
53 |
da = DataHelper()
|
|
|
54 |
for item in items:
|
|
|
55 |
str2 = str1 + str(item['site'])
|
|
|
56 |
da.add_univervendor(item['name'].strip(), str2)
|
|
|
57 |
print item['name']
|
|
|
58 |
print str2
|
|
|
59 |
|
|
|
60 |
SPIDER = vendor_links()
|