Subversion Repositories SmartDukaan

Rev

Rev 6428 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
6428 kshitij.so 1
from BeautifulSoup import BeautifulSoup
2
import urllib2
3
import sys
4
import MySQLdb
5
listCircle = [
6
['AP',1],
7
['AS',2],
8
['BR',3],
9
['CH',4],
10
['DL',5],
11
['GJ',6],
12
['HR',9],
13
['HP',7],
14
['JK',8],
15
['KA',10],
16
['KL',11],
17
['KO',12],
18
['MP',14],
19
['MH',13],
20
['MU',15],
21
['NE',16],
22
['OR',17],
23
['PB',18],
24
['RJ',19],
25
['TN',20],
26
['UE',21],
27
['UW',22],
28
['WB',23]
29
]
30
listOperator =[
31
               ['AC',6],
32
               ['AT',7],
33
               ['LM',8],
34
               ['CC',9],
35
               ['CG',9],
36
               ['ET',10],
37
               ['PG',11],
38
               ['ID',12],
39
               ['MTNLD',13],
40
               ['MTNLM',14],
41
               ['MT',15],
42
               ['RG',16],
43
               ['RC',17],
44
               ['ST',18],
45
               ['TD',20],
46
               ['Tata CDMA',21],
47
               ['UN',23],
48
               ['DC',24],
49
               ['VF',25]
50
               ]
6429 kshitij.so 51
db = MySQLdb.connect("localhost","root","shop2020","transaction" )
6428 kshitij.so 52
url="http://en.wikipedia.org/wiki/Mobile_telephone_numbering_in_India#Mobile.2C_Operator_and_Circle"
53
Attempt =1
54
connected = False
55
while not connected:
56
        try:
57
            request = urllib2.Request(url)
58
            request.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.218 Safari/535.1')
59
            opener = urllib2.build_opener()
60
            response_data = opener.open(request).read()
61
            soup = BeautifulSoup(response_data,convertEntities=BeautifulSoup.HTML_ENTITIES)
62
            #table = soup.findAll(lambda tag: tag.name=='table' and tag.has_key('class') and tag['class']=="wikitable") 
63
            table = soup.findAll("table" , {"class" : "wikitable"})
64
            connected =True
65
        except:
66
            print "Retrying:",
67
            print Attempt
68
            Attempt = Attempt+1
69
            if Attempt ==6:
70
                sys.exit(1)
71
 
72
for x in range(1, 4):
73
    rows = table[x].findAll("td")
74
    column_count=1
75
    CDMA = 0
76
    for td in rows:
77
        if column_count ==1:
78
            Series = int(td.text)
79
            if (((Series/100) == 92) or ((Series/100) ==93) or ((Series/100) ==94) ):
80
                '''SERIES HAVING JUST TWO COLUMNS'''
81
                CDMA =1
82
                print "Series :%d"%Series
83
                if ((Series/100) == 92):
84
                    Operator = "Tata CDMA"
85
                elif ((Series/100) == 93):
86
                    Operator = "RC"
87
                else:
88
                    Operator = "CG"
89
                column_count= column_count+1
90
                print "Operator :"+Operator
91
            else:
92
                print "Series :%d"%Series
93
        if column_count ==2:
94
            if CDMA ==1:
95
                pass
96
            else:
97
                Operator = str(td.text)
98
                print "Operator :"+Operator
99
        if column_count ==3:
100
            Circle = str(td.text)
101
            if ( Operator == 'DP'):
102
                if Circle == 'MU':
103
                    Operator = 'MTNLM'
104
                else:
105
                    Operator = 'MTNLD'
106
            print "Circle :"+Circle
107
            column_count =0
108
            CDMA = 0
109
            for i in xrange(len(listCircle)):
110
                if listCircle[i][0] == Circle:
111
                    Circle_ID =listCircle[i][1]
112
                    print "Circle_ID",
113
                    print Circle_ID
114
            for j in xrange(len(listOperator)):
115
                if listOperator[j][0] == Operator:
116
                    Operator_ID =listOperator[j][1]
117
                    print "Operator_ID",
118
                    print Operator_ID
119
                    cursor = db.cursor()
120
                    sql = '''INSERT INTO operatorseries (operatorId, circleId, series)\
121
                    VALUES ('%d', '%d', '%d')''' % \
122
                    (Operator_ID,Circle_ID,Series)
123
                    cursor.execute(sql)
124
                    db.commit()
125
            print "-----"
126
        column_count =column_count+1 
127
db.close()