Subversion Repositories SmartDukaan

Rev

Rev 240 | Rev 267 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 240 Rev 266
Line 20... Line 20...
20
from datastore.DataCodeAccessor import *
20
from datastore.DataCodeAccessor import *
21
from datastore.DataAccessor import *
21
from datastore.DataAccessor import *
22
from html2text.unescaping import *
22
from html2text.unescaping import *
23
 
23
 
24
class univercell_price(BaseSpider):
24
class univercell_price(BaseSpider):
-
 
25
    """
-
 
26
    Documentation for class univercell_price
-
 
27
    This spider collects the information for the individual phones
-
 
28
    and store them in table datastore_datadefinition_univercell_items
25
    
29
    """
26
    def __init__(self):
30
    def __init__(self):
-
 
31
       """
-
 
32
        Documentation for constructor
-
 
33
        initialize_table is called to make all the tables known in
-
 
34
        the scope of this class.
-
 
35
        Also start url needs to be feeded to the spider through start_urls.append
-
 
36
        Domainname1 is name by which this spider is known outside
-
 
37
        So this will be used as an argument for calling this spider 
-
 
38
        """ 
27
       initialize_table()
39
       initialize_table()
28
       #UNIVERCELL_DOMAINNAME1 = "univercell1"   
40
       #UNIVERCELL_DOMAINNAME1 = "univercell1"   
29
       UNIVERCELL_DOMAINNAME1 = get_code_word("UNIVERCELL_DOMAINNAME1")
41
       UNIVERCELL_DOMAINNAME1 = get_code_word("UNIVERCELL_DOMAINNAME1")
30
       self.domain_name = UNIVERCELL_DOMAINNAME1 
42
       self.domain_name = UNIVERCELL_DOMAINNAME1 
31
       
43
       
Line 33... Line 45...
33
       da = DataHelper()
45
       da = DataHelper()
34
       for pitem in da.get_all_univervendors():
46
       for pitem in da.get_all_univervendors():
35
            self.start_urls.append(pitem.v_site.strip())
47
            self.start_urls.append(pitem.v_site.strip())
36
    
48
    
37
    def start_requests(self):
49
    def start_requests(self):
38
        
50
        """
-
 
51
        Documentation for method start_requests
-
 
52
        To set various properties of the request to be made
-
 
53
        like referer, headers and all.
-
 
54
        @return a list of well formed requests which will be 
-
 
55
        crawled by spider and spider will return the response
-
 
56
        """
39
        #for each request a referer has to be set
57
        #for each request a referer has to be set
40
        listreq = []
58
        listreq = []
41
        #UNIVERCELL_REFERER = "www.google.com/search"
59
        #UNIVERCELL_REFERER = "www.google.com/search"
42
        UNIVERCELL_REFERER = get_code_word("UNIVERCELL_REFERER")
60
        UNIVERCELL_REFERER = get_code_word("UNIVERCELL_REFERER")
43
        for url1 in self.start_urls:
61
        for url1 in self.start_urls:
Line 45... Line 63...
45
            request.headers.setdefault("Referer", UNIVERCELL_REFERER)
63
            request.headers.setdefault("Referer", UNIVERCELL_REFERER)
46
            listreq.append(request)
64
            listreq.append(request)
47
        return listreq
65
        return listreq
48
        
66
        
49
    def parse(self, response):
67
    def parse(self, response):
-
 
68
        """
-
 
69
        Documentation for method parse
-
 
70
        @param response of individual requests
-
 
71
        Using Xpaths needed information is extracted out of the response
-
 
72
        and added to the database
-
 
73
        Xpath4 = Give us section for individual phone
-
 
74
        Xpath5 = Give us name for individual phone
-
 
75
        Xpath6 = Give us quoted-price for individual phone
-
 
76
        vatplustax = Give us final_price for individual phone on adding with quoted-price
-
 
77
        Removelist = To filer the prices so as to make them integer for eg remove ',' or 'Rs'
-
 
78
        """
50
        da = DataHelper()
79
        da = DataHelper()
51
        #VATPLUSTAX = 0
80
        #VATPLUSTAX = 0
52
        #removelist is used for converting price to decimal format containing only numbers and '.'
81
        #removelist is used for converting price to decimal format containing only numbers and '.'
53
        #UNIVERCELL_REMOVELIST = ["Rs",",","-","/"]
82
        #UNIVERCELL_REMOVELIST = ["Rs",",","-","/"]
54
        #list separated by ';'
83
        #list separated by ';'