Subversion Repositories SmartDukaan

Rev

Rev 3313 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

'''
Created on 30-Aug-2011

@author: Varun Gupta
'''
from lucene import File, StandardAnalyzer, IndexWriter, Version, SimpleFSDirectory, Document, Field
from Clients import GAEServletClient
from PhonePriceDoc import PhonePriceDoc
import lucene, Utils

class IndexBuilder:

    def __init__(self, price_data):
        self.indexDir = "/tmp/lucene-index-dir"
        lucene.initVM()
        dir = SimpleFSDirectory(File(self.indexDir))
        self.analyzer = StandardAnalyzer(Version.LUCENE_30)
        self.writer = IndexWriter(dir, self.analyzer, True, IndexWriter.MaxFieldLength(512))
        self.price_data = price_data
    
    def build(self):
        print "Currently there are %d documents in the index..." % self.writer.numDocs()
        
        for phone_price in self.price_data:
            print phone_price
            #doc = PhonePriceDoc(phone_price)
            brand, name = Utils.extractBrandAndName(str(phone_price['name']))
            doc = Document()
            doc.add(Field("name", name, Field.Store.YES, Field.Index.ANALYZED))
            doc.add(Field("brand", brand, Field.Store.YES, Field.Index.ANALYZED))
            doc.add(Field("source", str(phone_price['source']), Field.Store.YES, Field.Index.ANALYZED))
            doc.add(Field("price", str(phone_price['price']), Field.Store.YES, Field.Index.NO))
            doc.add(Field("in_stock", str(phone_price['in_stock']), Field.Store.YES, Field.Index.NO))
            doc.add(Field("url", str(phone_price['url']), Field.Store.YES, Field.Index.NO))
            
            self.writer.addDocument(doc)
        
        print "Indexed lines from stdin (%d documents in index)" % (self.writer.numDocs())
        print "About to optimize index of %d documents..." % self.writer.numDocs()
        self.writer.optimize()
        print "...done optimizing index of %d documents" % self.writer.numDocs()
        print "Closing index of %d documents..." % self.writer.numDocs()
        self.writer.close()
        
if __name__ == '__main__':
    phones = GAEServletClient.getPhonePricesJSON()
    indexer = IndexBuilder(phones)
    indexer.build()
#    catalog_client = InventoryClient().get_client()
#    items = catalog_client.getAllItems(True)
#    print phones
#    for item in items:  print item.id, item.brand, item.modelName, item.modelNumber