Subversion Repositories SmartDukaan

Rev

Rev 9236 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
8168 kshitij.so 1
from BeautifulSoup import BeautifulSoup
2
import mechanize
3
import urllib2
4
import sys
5
import urllib
6
import cookielib
7
import urllib
8
 
9
 
10
def getBrowserObject():
11
    br = mechanize.Browser(factory=mechanize.RobustFactory())
12
    cj = cookielib.LWPCookieJar()
13
    br.set_cookiejar(cj)
14
    br.set_handle_equiv(True)
15
    br.set_handle_redirect(True)
16
    br.set_handle_referer(True)
17
    br.set_handle_robots(False)
18
    br.set_debug_http(False)
19
    br.set_debug_redirects(False)
20
    br.set_debug_responses(False)
21
 
22
    br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
23
 
24
    br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),
25
                     ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
26
                     ('Accept-Encoding', 'gzip,deflate,sdch'),                  
27
                     ('Accept-Language', 'en-US,en;q=0.8'),                     
28
                     ('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]
29
    return br
30
 
31
def login(url):
32
    br = getBrowserObject()
33
    br.open(url)
34
    response = br.open(url)
35
    ungzipResponse(response, br)
36
    html = response.read()
37
    br.select_form(name="login")
38
    br.form['unmae'] = "saholic"
39
    br.form['pword'] = "2020shop"
40
    response = br.submit()
41
    print "********************"
42
    print "Attempting to Login"
43
    print "********************"
44
    ungzipResponse(response, br)
45
    return br
46
 
47
def fetchItemDetails(merchant_url,br):
48
    response = br.open(merchant_url)
49
    ungzipResponse(response, br)
50
    page = response.read()
51
    page=page.decode("utf-8")
52
    soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
53
    table = soup.find("table" , {"class" : "stdtable tablesorter"})
54
    table_rows = soup.findAll("tr" , {"class" : "gradeX"})
55
    data = []
56
    for tr in table_rows:
57
        row_count=1
58
        new = []
59
        for t in tr:
60
            if row_count==4:
61
                print "********************"
62
                print "Product name field :",
63
                te =str(t)
64
                start = te.rindex("mid=\"") + len( "mid=\"" )
65
                end = te.rindex( "\">", start )
66
                parse= te[start:end]
67
                print parse
68
                new.append(parse)
69
            if row_count==8:
70
                print "********************"
71
                print "Cheapest price :",t.text
72
                cheapest_price = t.text
73
                cheapest_price = cheapest_price.replace("Rs.",'')
74
                cheapest_price =  int(cheapest_price.replace(",",''))
75
                new.append(cheapest_price)
76
            if row_count==16:
77
                print "********************"
78
                print "Saholic price :",t.text
79
                our_price = t.text
80
                our_price = our_price.replace("Rs.",'')
81
                our_price =  int(our_price.replace(",",''))
82
                new.append(our_price)
83
            row_count+=1
84
        data.append(new)
85
    print "******************************"
86
    print "Data Populated from 91 Mobiles"
87
    print "******************************"
88
    return data
89
 
90
def getSaholicEntityId(data):
91
    br = getBrowserObject()
92
    for i in data:
93
        response = br.open("http://www.91mobiles.com/redir.php?origin=detail&mobileid=%s&storename=saholic.com&sf=&storeflag=1&cc="%(i[0]))
9354 kshitij.so 94
        ungzipResponse(response, br)
95
        page = response.read()
96
        page=page.decode("utf-8")
97
        soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
98
        link = soup.find("body" , {"class" : ""})
99
        for a in soup.findAll('a', href=True):
100
            url = str(a['href'])
101
            end = url.rindex("?afid")
102
            our_url = url[0:end]
103
            length= len(our_url)
104
            ind = our_url.rfind("-")
105
            entityId = url[ind+1:length]
106
            print "*****************************"
107
            print "Trying to fetch entity id...."
108
            print "EntityId :",entityId   
109
            print "*****************************"
8168 kshitij.so 110
 
111
def ungzipResponse(r,b):
112
    headers = r.info()
113
    if headers['Content-Encoding']=='gzip':
114
        import gzip
115
        print "********************"
116
        print "Deflating gzip response"
117
        print "********************"
118
        gz = gzip.GzipFile(fileobj=r, mode='rb')
119
        html = gz.read()
120
        gz.close()
121
        headers["Content-type"] = "text/html; charset=utf-8"
122
        r.set_data( html )
123
        b.set_response(r)
124
 
125
 
126
def main():
127
    print "Opening 91 Mobiles merchant login page"
128
    login_url = "http://www.91mobiles.com/91merchants/login.php"
129
    merchant_url = "http://www.91mobiles.com/91merchants/manage_merchants.php"
130
    br = login(login_url)
131
    itemData = fetchItemDetails(merchant_url,br)
132
    getSaholicEntityId(itemData)
133
 
134
if __name__ == "__main__":
135
    main()