Rev 238 | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 12-May-2010@author: gaurav'''from elixir import *'''class PhoneItem(Entity):url = Field(String(1000))name = Field(String(50))price = Field(Integer)source = Field(String(100))is_crawled = Field(Boolean)phones = ManyToOne("Phones")def __repr__(self):return "%s %s" %(self.url, self.name)class Phones(Entity):vendor = Field(String(100))base_url = Field(String(1000))items = OneToMany("PhoneItem")class Vendor(Entity):v_name = Field(String(100))v_url = Field(String(1000))class themobilestoreurls(Entity):url = Field(String(1000))class themobilestorephones(Entity):name = Field(String(100))shown_price = Field(Integer)final_price = Field(Integer)class pricesbolourls(Entity):url = Field(String(1000))class pricesbolophones(Entity):name = Field(String(100))shown_price = Field(Integer)final_price = Field(Integer)'''"""For each class crawl_id is used to retain past data for comparison, on each new crawl a new crawl_id is generated"""class crawl(Entity):"""Documentation for class crawlIt represents database table for crawl, it storescrawled_date = date of crawlingOn each new crawl a new entry is madeReason for creating this table is for retaining past data for comparison"""crawled_date = Field(DATE(100))class infibeam_data(Entity):"""Documentation for class infibeam_dataIt represents database table for infibeam, it storesname = name of the phoneshown_price = price offered by infibeamfinal_price = price which one has to pay,final_price = shown_price + taxes + ship-price"""crawl_id = Field(Integer)name = Field(String(100))shown_price = Field(Integer)final_price = Field(Integer)class univercell_data(Entity):"""Documentation for class univercell_dataIt represents database table for univercell, it storesv_name = name of the vendorv_site = url of the vendor"""crawl_id = Field(Integer)v_name = Field(String(100))v_site = Field(String(1000))class univercell_items(Entity):"""Documentation for class univercell_itemsIt represents database table for univercell, it storesp_title = name of the phonep_shown_price = price offered by univercellp_final_price = price which one has to pay,p_final_price = p_shown_price + taxes + ship-price"""crawl_id = Field(Integer)p_title = Field(String(100))p_shown_price = Field(Integer)p_final_price = Field(Integer)class indiaplaza_data(Entity):"""Documentation for class indiaplaza_dataIt represents database table for indiaplaza, it storesv_name = name of the vendorv_site = url of the vendor"""crawl_id = Field(Integer)v_name = Field(String(100))v_site = Field(String(1000))class indiaplaza_items(Entity):"""Documentation for class indiaplaza_itemsIt represents database table for indiaplaza, it storesp_name = name of the phonep_shown_price = price offered by indiaplazap_final_price = price which one has to pay,p_final_price = p_shown_price + taxes + ship-pricep_guaranteeinfo = duaration of guarantee and whether guarantee is from vendor or manufacturerp_shipinfo = how much time would be taken for shipping"""crawl_id = Field(Integer)p_name = Field(String(100))p_shown_price = Field(Integer)p_final_price = Field(Integer)p_guaranteeinfo = Field(String(100))p_shipinfo = Field(String(100))class themobilestorephones_new(Entity):"""Documentation for class themobilestorephones_newIt represents database table for themobilestore, it storesname = name of the phoneshown_price = price offered by themobilestorefinal_price = price which one has to pay,final_price = shown_price + taxes + ship-priceextra_info = whether phone can be bought or not"""crawl_id = Field(Integer)name = Field(String(100))shown_price = Field(Integer)final_price = Field(Integer)extra_info = Field(String(1000))class naaptolurls(Entity):"""Documentation for class naaptolurlsIt represents database table for naaptol, it storesurl = url of the phones, which we got from sitemap.xml"""crawl_id = Field(Integer)url = Field(String(1000))class morenaaptolurls(Entity):"""Documentation for class naaptolurlsIt represents database table for naaptol, it storesurl = url of the phones, here urls are the ones which are redirectedand contained 'price' but before storing 'price' is replaced by 'features'"""crawl_id = Field(Integer)url = Field(String(1000))class naaptolphones(Entity):"""Documentation for class naaptolphonesIt represents database table for naaptol, it storesname = name of the phone,range = price range for each phonerange is in one of the 3 forms, i.erange = a to brange = arange = a(approx)here a,b are integers"""crawl_id = Field(Integer)name = Field(String(100))range = Field(String(100))class ntonlinesp(Entity):"""Documentation for class ntonlinespIt represents database table for naaptol, it storesnid = id of the phone in naaptolphonesname = name of the onlinesupplier,price = price offered by the supplier for the phone"""crawl_id = Field(Integer)nid = Field(Integer)name = Field(String(100))price = Field(Integer)class ntofflinesp(Entity):"""Documentation for class ntofflinespIt represents database table for naaptol, it storesnid = id of the phone in naaptolphonesname = name of the offlinesupplier,price = price offered by the supplier for the phone"""crawl_id = Field(Integer)nid = Field(Integer)name = Field(String(100))price = Field(Integer)class babuchak_urls(Entity):"""Documentation for class babuchak_urlsIt represents database table for babuchak, it storesurl = url for the vendorsno_pages = number of pages for individual vendor"""crawl_id = Field(Integer)url = Field(String(100))no_pages = Field(Integer)class babuchak_phoneurls(Entity):"""Documentation for class babuchak_phoneurlsIt represents database table for babuchak, it storesurl = url for the individual phones"""crawl_id = Field(Integer)url = Field(String(100))class babuchak_phones(Entity):"""Documentation for class babuchak_phonesIt represents database table for babuchak, it storesname = name of the phoneshown_price = price offered by babuchakfinal_price = price which one has to pay,final_price = shown_price + taxes + ship-price"""crawl_id = Field(Integer)name = Field(String(100))shown_price = Field(Integer)final_price = Field(Integer)class suppliers(Entity):"""Documentation for class suppliersIt represents database table for suppliers, it storesname = name of the suppliersite = url of the supplierlast_crawled = date of the last run for this supplierThis table spans all the suppliers in our database"""name = Field(String(100))site = Field(String(100))last_crawled = Field(DATE(100))class models(Entity):"""Documentation for class modelsIt represents database table for models, it storesbrand = name of the brand for a particular phonemodel = name of the model for a particular phoneThis table spans all the phones-models in our database"""crawl_id = Field(Integer)brand = Field(String(100))model = Field(String(100))class prices(Entity):"""Documentation for class pricesIt represents database table for prices, it storessupplier_id = id of the supplier who is selling this phone, from suppliers tablemobile_id = id of the model of this phone, from models tablequoted_price = price of the phone as offered by the supplierfinal_price = price one has to pay to buy this phone, i.eit includes vat, tax and shippping chargesextra_info = extra-info about this phoneThis table spans all the phones in our database"""crawl_id = Field(Integer)supplier_id = Field(Integer)mobile_id = Field(Integer)quoted_price = Field(Integer)final_price = Field(Integer)extra_info = Field(String(1000))class guarantee_info(Entity):"""Documentation for class guarantee_infoIt represents database table for guarantee_info, it storesmid = id of the phone in models tableguaranteeinfo = duaration of guarantee and whether guarantee is from vendor or manufacturershipinfo = how much time would be taken for shipping.This table spans all the phones in our database"""crawl_id = Field(Integer)mid = Field(Integer)guaranteeinfo = Field(String(100))shipinfo = Field(String(100))class extra_vars(Entity):"""Documentation for class extra_varsIt represents database table for extra_vars, it storesvar = name of the variableval = value of the variabledesc = description of the variableFor some suppliers the number of pages to be crawled is not fixed,for them variables are created and based on the value of the variablenumber of the pages to be crawled is determined dynamically"""var = Field(String(100))val = Field(String(100))desc = Field(String(1000))def init():"""Documentation for method initBefore using all the tables described in this module, one has to call this method"""#metadata.bind = "sqlite:///phones.sqlite"metadata.bind = "mysql://root@localhost/phonecrawler"metadata.bind.echo = Truesetup_all(True)passif __name__ == "__main__":init()