Subversion Repositories SmartDukaan

Rev

Rev 5639 | Rev 5770 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 5639 Rev 5761
Line 4... Line 4...
4
 
4
 
5
import time
5
import time
6
 
6
 
7
class TradusScraper(BaseScraper):
7
class TradusScraper(BaseScraper):
8
 
8
 
-
 
9
    mobilePageCount = 32
9
    pageCount = 67
10
    tabletPageCount = 21
10
    productCountPerScraping = 20
11
    productCountPerScraping = 20
-
 
12
    mobileCurrentPage = 0
11
    currentPage = 0
13
    tabletCurrentPage = 0
12
    
14
    
13
 
15
 
14
    def __init__(self):
16
    def __init__(self):
15
        BaseScraper.__init__(self)
17
        BaseScraper.__init__(self)
16
        self.url = None
18
        self.url = None
Line 28... Line 30...
28
    def getPhones(self):
30
    def getPhones(self):
29
        phones = []
31
        phones = []
30
        for div in self.soup.findAll('div', {'class': 'mainresult-show-right'}):
32
        for div in self.soup.findAll('div', {'class': 'mainresult-show-right'}):
31
            try:
33
            try:
32
                anchor = div.find('a')
34
                anchor = div.find('a')
33
                name = anchor.contents[2].strip()
-
 
34
                if(len(name)== 0):
35
                if(len(anchor.contents)==1):
35
                    name = anchor.contents[0].strip()
36
                    name = anchor.contents[0].strip()
-
 
37
                elif(anchor.contents[1].string =="Tablet"):
-
 
38
                    name = anchor.contents[0] + "Tablet"
-
 
39
                    if(len(anchor.contents)>2):
-
 
40
                        name = name + anchor.contents[2];
-
 
41
                else:
-
 
42
                    name = anchor.contents[2].strip()
-
 
43
                    if(len(name)== 0):
-
 
44
                        name = anchor.contents[0].strip()
36
                product_url = anchor['href'].strip()
45
                product_url = anchor['href'].strip()
37
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
46
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
38
                in_stock = 1
47
                in_stock = 1
39
                
48
                
40
                try:
49
                try:
Line 69... Line 78...
69
                    print ex
78
                    print ex
70
                    pass
79
                    pass
71
            except Exception as e:
80
            except Exception as e:
72
                print e
81
                print e
73
                pass
82
                pass
74
            self.phones = phones
83
        self.phones = phones
75
            return phones
84
        return phones
76
    
85
    
77
    def setPageCount(self):
86
    def setPageCount(self):
78
        self.currentPage = self.currentPage + 1
87
        self.currentPage = self.currentPage + 1
79
    
88
    
80
    def getNextUrl(self):
89
    def getNextUrl(self):
81
        time.sleep(1)
90
        time.sleep(1)
-
 
91
        if "mobile" in self.url:
82
        if TradusScraper.currentPage < TradusScraper.pageCount:
92
            if TradusScraper.mobileCurrentPage < TradusScraper.mobilePageCount:
83
            TradusScraper.currentPage += 1
93
                TradusScraper.mobileCurrentPage += 1
84
            return 'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756&page=%s' % (TradusScraper.currentPage)
94
                return 'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756&page=%s' % (TradusScraper.mobileCurrentPage)
-
 
95
            else:
-
 
96
                return None
-
 
97
        elif "tablets" in self.url:
-
 
98
            if TradusScraper.tabletCurrentPage < TradusScraper.tabletPageCount:
-
 
99
                TradusScraper.tabletCurrentPage += 1
-
 
100
                return 'http://www.tradus.com/search/tradus_search/?query=tablets&filters=cat:7756&cat:7762&page=%s' % (TradusScraper.tabletCurrentPage)
-
 
101
            else:
-
 
102
                return None
85
        else:
103
        else:
86
            return None
104
            return None
-
 
105
            
87
 
106
 
88
    def getDataFromProductPage(self, url):
107
    def getDataFromProductPage(self, url):
89
        html = BaseScraper.read(self, url)
108
        html = BaseScraper.read(self, url)
90
        soup = BeautifulSoup(html)
109
        soup = BeautifulSoup(html)
91
        name = soup.find('h1',{'class': 'left-content-product-heading'}).string.strip()
110
        name = soup.find('h1',{'class': 'left-content-product-heading'}).string.strip()