Rev 3313 | Rev 4198 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 30-Aug-2011@author: Varun Gupta'''from lucene import File, StandardAnalyzer, IndexWriter, Version, SimpleFSDirectory, Document, Fieldfrom Clients.GAEServletClient import getPhonePricesJSON, urlfrom PhonePriceDoc import PhonePriceDocimport lucene, Utilsclass IndexBuilder:def __init__(self, price_data, new_index = True):self.indexDir = "/tmp/lucene-index-dir"lucene.initVM()dir = SimpleFSDirectory(File(self.indexDir))self.analyzer = StandardAnalyzer(Version.LUCENE_30)self.writer = IndexWriter(dir, self.analyzer, new_index, IndexWriter.MaxFieldLength(512))self.price_data = price_dataself.brand_model_extracter = Utils.BrandAndModelExtracter()def build(self):print "Currently there are %d documents in the index..." % self.writer.numDocs()count = 0for phone_price in self.price_data:print phone_price#doc = PhonePriceDoc(phone_price)count += 1brand, name = self.brand_model_extracter.extract(phone_price['name'])print "Brand: %s, Model: %s" % (brand, name)doc = Document()doc.add(Field("name", name, Field.Store.YES, Field.Index.ANALYZED))doc.add(Field("brand", brand, Field.Store.YES, Field.Index.ANALYZED))doc.add(Field("source", str(phone_price['source']), Field.Store.YES, Field.Index.ANALYZED))doc.add(Field("price", str(phone_price['price']), Field.Store.YES, Field.Index.NO))doc.add(Field("in_stock", str(phone_price['in_stock']), Field.Store.YES, Field.Index.NO))doc.add(Field("url", str(phone_price['url']), Field.Store.YES, Field.Index.NO))self.writer.addDocument(doc)print "Indexed lines from stdin (%d documents in index)" % (self.writer.numDocs())print "About to optimize index of %d documents..." % self.writer.numDocs()self.writer.optimize()print "...done optimizing index of %d documents" % self.writer.numDocs()print "Closing index of %d documents..." % self.writer.numDocs()print "%d docs added" % countself.writer.close()if __name__ == '__main__':phones = getPhonePricesJSON(url)# print phonesindexer = IndexBuilder(price_data = phones, new_index = False)indexer.build()# catalog_client = InventoryClient().get_client()# items = catalog_client.getAllItems(True)# print phones# for item in items: print item.id, item.brand, item.modelName, item.modelNumber