| 6395 |
kshitij.so |
1 |
from BeautifulSoup import BeautifulSoup
|
|
|
2 |
import urllib2
|
|
|
3 |
import urllib
|
|
|
4 |
import MySQLdb
|
|
|
5 |
import re
|
|
|
6 |
import sys
|
|
|
7 |
listCircle = [
|
|
|
8 |
['Andhra Pradesh','hyd',1,0],
|
|
|
9 |
['Assam','asm',2,0],
|
|
|
10 |
['Bihar','bih',3,0],
|
|
|
11 |
['Chennai','che',4,0],
|
|
|
12 |
['Delhi NCR','del',5,0],
|
|
|
13 |
['Gujarat','guj',6,0],
|
|
|
14 |
['Haryana','har',9,0],
|
|
|
15 |
['Himachal Pradesh','hip',7,0],
|
|
|
16 |
['Jammu','jnk',8,0],
|
|
|
17 |
['Karnataka','kar',10,0],
|
|
|
18 |
['Kerela','ker',11,0],
|
|
|
19 |
['Kolkata','kol',12,0],
|
|
|
20 |
['MP','map',14,0],
|
|
|
21 |
['Maharashtra','mah',13,1],
|
|
|
22 |
['Mumbai','mum',15,1],
|
|
|
23 |
['North East','ner',16,0],
|
|
|
24 |
['Orissa','ors',17,0],
|
|
|
25 |
['Punjab','pun',18,0],
|
|
|
26 |
['Rajasthan','raj',19,1],
|
|
|
27 |
['Tamil Nadu','tam',20,0],
|
|
|
28 |
['UP East','upe',21,1],
|
|
|
29 |
['UP(West)','upw',22,0],
|
|
|
30 |
['West Bengal','rob',23,0]
|
|
|
31 |
]
|
|
|
32 |
|
|
|
33 |
j =0
|
|
|
34 |
|
|
|
35 |
db = MySQLdb.connect("localhost","root","shop2020","transaction" )
|
|
|
36 |
for i in listCircle:
|
|
|
37 |
print "Circle Name: "+listCircle[j][0]
|
|
|
38 |
pageno=1
|
|
|
39 |
start=1
|
|
|
40 |
while(True):
|
|
|
41 |
url = "https://shop.vodafone.in/shop/rechargeOnline.jsp?start=%d"%(start)
|
|
|
42 |
data = {'cid' : listCircle[j][1]}
|
|
|
43 |
Attempt = 1
|
|
|
44 |
connected = False
|
|
|
45 |
while not connected:
|
|
|
46 |
try:
|
|
|
47 |
data = urllib.urlencode(data)
|
|
|
48 |
req = urllib2.Request(url, data)
|
|
|
49 |
response = urllib2.urlopen(req)
|
|
|
50 |
page = response.read()
|
|
|
51 |
soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
|
|
|
52 |
connected =True
|
|
|
53 |
except:
|
|
|
54 |
print "In Except Block for Url "+url+" And Circle "+listCircle[j][0]
|
|
|
55 |
print "Retrying:",
|
|
|
56 |
print Attempt
|
|
|
57 |
Attempt = Attempt+1
|
|
|
58 |
if Attempt ==6:
|
|
|
59 |
sys.exit(1)
|
|
|
60 |
links = soup.findAll("a", {"class" : "arrowlink"})
|
|
|
61 |
size = len(links)
|
|
|
62 |
if size == 0:
|
|
|
63 |
print "Break for url "+url
|
|
|
64 |
break
|
|
|
65 |
print "Page: ",
|
|
|
66 |
print pageno
|
|
|
67 |
for i in links:
|
|
|
68 |
connected_plans = False
|
|
|
69 |
Attempt_Plans =1
|
|
|
70 |
result = re.search('sku[0-9]+', str(i))
|
|
|
71 |
sku = result.group(0)
|
|
|
72 |
print "SkuId:",
|
|
|
73 |
print sku
|
|
|
74 |
table_url = "https://shop.vodafone.in/shop/ajax/viewRechargeDetails.jsp?id="+sku
|
|
|
75 |
while not connected_plans:
|
|
|
76 |
try:
|
|
|
77 |
req = urllib2.Request(table_url)
|
|
|
78 |
response = urllib2.urlopen(req)
|
|
|
79 |
page = response.read()
|
|
|
80 |
soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
|
|
|
81 |
connected_plans = True
|
|
|
82 |
except:
|
|
|
83 |
print "Except Block Hitting SkuUrl "
|
|
|
84 |
print "Retrying:",
|
|
|
85 |
print Attempt_Plans
|
|
|
86 |
Attempt_Plans = Attempt_Plans+1
|
|
|
87 |
if Attempt_Plans ==6:
|
|
|
88 |
sys.exit(2)
|
|
|
89 |
Table = soup.find("table")
|
|
|
90 |
td = Table.findAll("td")
|
|
|
91 |
column_count = 1
|
|
|
92 |
for value in td:
|
|
|
93 |
if column_count == 2:
|
|
|
94 |
Denomination = value.text
|
|
|
95 |
print "Deno: "+Denomination
|
|
|
96 |
Denomination = float(Denomination)
|
|
|
97 |
column_count = column_count+1
|
|
|
98 |
if Denomination < 10:
|
|
|
99 |
break
|
|
|
100 |
elif column_count ==7:
|
|
|
101 |
Talktime = value.text
|
|
|
102 |
print "Talktime String: "+Talktime
|
|
|
103 |
Talktime = float(Talktime)
|
|
|
104 |
print "Talktime: ",
|
|
|
105 |
print Talktime
|
|
|
106 |
column_count = column_count+1
|
|
|
107 |
elif column_count ==4:
|
|
|
108 |
RechargeType = value.text
|
|
|
109 |
column_count = column_count+1
|
|
|
110 |
print "RechargeType: "+RechargeType
|
|
|
111 |
elif column_count ==11:
|
|
|
112 |
Benefit = value.text
|
|
|
113 |
column_count = column_count+1
|
|
|
114 |
elif column_count ==13:
|
|
|
115 |
Validity = value.text
|
|
|
116 |
column_count = column_count+1
|
|
|
117 |
print "Validity: "+Validity
|
|
|
118 |
elif column_count ==15:
|
|
|
119 |
Deduction = value.text
|
|
|
120 |
print 'Deduction: '+Deduction
|
|
|
121 |
Deduction = float(Deduction)
|
|
|
122 |
column_count = column_count+1
|
|
|
123 |
if Deduction > (Denomination/2):
|
|
|
124 |
IsSpecial = 2
|
|
|
125 |
else:
|
|
|
126 |
IsSpecial = 1
|
|
|
127 |
if Validity == '-' or Validity == 'NA' or Validity =='na' or Validity =='nil' or Validity =='' or Validity =='Nil' or Validity =='NIL':
|
|
|
128 |
Validity = '0 days'
|
|
|
129 |
if IsSpecial ==1 and (Validity =='lifetime' or Validity =='Lifetime' or Validity =='life time' or Validity=='Life Time' or Validity =='Life time' or Validity =='Lifetime Validity' or Validity== 'UNLIMITED' or Validity== 'unlimited' or Validity== 'Unlimited'):
|
|
|
130 |
Validity='0 days'
|
|
|
131 |
if 'Full Talktime' in Benefit:
|
|
|
132 |
print "****-----Full Talktime----****"
|
|
|
133 |
if not('Full Talktime' in Benefit) and listCircle[j][3] == 1 and Denomination > 1000 and IsSpecial ==1:
|
|
|
134 |
print "Skipping Bogus Plans"
|
|
|
135 |
break
|
|
|
136 |
if not('Full Talktime' in Benefit) and listCircle[j][3] == 1 and Denomination%100 !=0 and Denomination > 50 and IsSpecial ==1:
|
|
|
137 |
print "Skipping Bogus Plans"
|
|
|
138 |
break
|
|
|
139 |
if IsSpecial ==1:
|
|
|
140 |
Benefit = str(Talktime)
|
|
|
141 |
Benefit = "Talktime of Rs "+Benefit
|
|
|
142 |
else:
|
|
|
143 |
print "Benefit: "+Benefit
|
|
|
144 |
cursor = db.cursor()
|
|
|
145 |
sql = '''INSERT INTO rechargedenomination (operatorId, circleId, denominationType, validity, amount, description)\
|
|
|
146 |
VALUES ('%d', '%d', '%d', '%s', '%s', "%s")''' % \
|
|
|
147 |
(25, listCircle[j][2], IsSpecial, Validity, Denomination, Benefit)
|
|
|
148 |
cursor.execute(sql)
|
|
|
149 |
db.commit()
|
|
|
150 |
else:
|
|
|
151 |
column_count = column_count+1
|
|
|
152 |
start = start+10
|
|
|
153 |
pageno = pageno+1
|
|
|
154 |
j = j+1
|
|
|
155 |
db.close()
|