Rev 3313 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 30-Aug-2011@author: Varun Gupta'''from lucene import File, StandardAnalyzer, IndexWriter, Version, SimpleFSDirectory, Document, Fieldfrom Clients import GAEServletClientfrom PhonePriceDoc import PhonePriceDocimport lucene, Utilsclass IndexBuilder:def __init__(self, price_data):self.indexDir = "/tmp/lucene-index-dir"lucene.initVM()dir = SimpleFSDirectory(File(self.indexDir))self.analyzer = StandardAnalyzer(Version.LUCENE_30)self.writer = IndexWriter(dir, self.analyzer, True, IndexWriter.MaxFieldLength(512))self.price_data = price_datadef build(self):print "Currently there are %d documents in the index..." % self.writer.numDocs()for phone_price in self.price_data:print phone_price#doc = PhonePriceDoc(phone_price)brand, name = Utils.extractBrandAndName(str(phone_price['name']))doc = Document()doc.add(Field("name", name, Field.Store.YES, Field.Index.ANALYZED))doc.add(Field("brand", brand, Field.Store.YES, Field.Index.ANALYZED))doc.add(Field("source", str(phone_price['source']), Field.Store.YES, Field.Index.ANALYZED))doc.add(Field("price", str(phone_price['price']), Field.Store.YES, Field.Index.NO))doc.add(Field("in_stock", str(phone_price['in_stock']), Field.Store.YES, Field.Index.NO))doc.add(Field("url", str(phone_price['url']), Field.Store.YES, Field.Index.NO))self.writer.addDocument(doc)print "Indexed lines from stdin (%d documents in index)" % (self.writer.numDocs())print "About to optimize index of %d documents..." % self.writer.numDocs()self.writer.optimize()print "...done optimizing index of %d documents" % self.writer.numDocs()print "Closing index of %d documents..." % self.writer.numDocs()self.writer.close()if __name__ == '__main__':phones = GAEServletClient.getPhonePricesJSON()indexer = IndexBuilder(phones)indexer.build()# catalog_client = InventoryClient().get_client()# items = catalog_client.getAllItems(True)# print phones# for item in items: print item.id, item.brand, item.modelName, item.modelNumber