Subversion Repositories SmartDukaan

Rev

Rev 6428 | Blame | Compare with Previous | Last modification | View Log | RSS feed

from BeautifulSoup import BeautifulSoup
import urllib2
import sys
import MySQLdb
listCircle = [
['AP',1],
['AS',2],
['BR',3],
['CH',4],
['DL',5],
['GJ',6],
['HR',9],
['HP',7],
['JK',8],
['KA',10],
['KL',11],
['KO',12],
['MP',14],
['MH',13],
['MU',15],
['NE',16],
['OR',17],
['PB',18],
['RJ',19],
['TN',20],
['UE',21],
['UW',22],
['WB',23]
]
listOperator =[
               ['AC',6],
               ['AT',7],
               ['LM',8],
               ['CC',9],
               ['CG',9],
               ['ET',10],
               ['PG',11],
               ['ID',12],
               ['MTNLD',13],
               ['MTNLM',14],
               ['MT',15],
               ['RG',16],
               ['RC',17],
               ['ST',18],
               ['TD',20],
               ['Tata CDMA',21],
               ['UN',23],
               ['DC',24],
               ['VF',25]
               ]
db = MySQLdb.connect("localhost","root","shop2020","transaction" )
url="http://en.wikipedia.org/wiki/Mobile_telephone_numbering_in_India#Mobile.2C_Operator_and_Circle"
Attempt =1
connected = False
while not connected:
        try:
            request = urllib2.Request(url)
            request.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.218 Safari/535.1')
            opener = urllib2.build_opener()
            response_data = opener.open(request).read()
            soup = BeautifulSoup(response_data,convertEntities=BeautifulSoup.HTML_ENTITIES)
            #table = soup.findAll(lambda tag: tag.name=='table' and tag.has_key('class') and tag['class']=="wikitable") 
            table = soup.findAll("table" , {"class" : "wikitable"})
            connected =True
        except:
            print "Retrying:",
            print Attempt
            Attempt = Attempt+1
            if Attempt ==6:
                sys.exit(1)

for x in range(1, 4):
    rows = table[x].findAll("td")
    column_count=1
    CDMA = 0
    for td in rows:
        if column_count ==1:
            Series = int(td.text)
            if (((Series/100) == 92) or ((Series/100) ==93) or ((Series/100) ==94) ):
                '''SERIES HAVING JUST TWO COLUMNS'''
                CDMA =1
                print "Series :%d"%Series
                if ((Series/100) == 92):
                    Operator = "Tata CDMA"
                elif ((Series/100) == 93):
                    Operator = "RC"
                else:
                    Operator = "CG"
                column_count= column_count+1
                print "Operator :"+Operator
            else:
                print "Series :%d"%Series
        if column_count ==2:
            if CDMA ==1:
                pass
            else:
                Operator = str(td.text)
                print "Operator :"+Operator
        if column_count ==3:
            Circle = str(td.text)
            if ( Operator == 'DP'):
                if Circle == 'MU':
                    Operator = 'MTNLM'
                else:
                    Operator = 'MTNLD'
            print "Circle :"+Circle
            column_count =0
            CDMA = 0
            for i in xrange(len(listCircle)):
                if listCircle[i][0] == Circle:
                    Circle_ID =listCircle[i][1]
                    print "Circle_ID",
                    print Circle_ID
            for j in xrange(len(listOperator)):
                if listOperator[j][0] == Operator:
                    Operator_ID =listOperator[j][1]
                    print "Operator_ID",
                    print Operator_ID
                    cursor = db.cursor()
                    sql = '''INSERT INTO operatorseries (operatorId, circleId, series)\
                    VALUES ('%d', '%d', '%d')''' % \
                    (Operator_ID,Circle_ID,Series)
                    cursor.execute(sql)
                    db.commit()
            print "-----"
        column_count =column_count+1 
db.close()