Subversion Repositories SmartDukaan

Rev

Rev 3232 | Rev 4039 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 3232 Rev 3313
Line 8... Line 8...
8
from PhonePriceDoc import PhonePriceDoc
8
from PhonePriceDoc import PhonePriceDoc
9
import lucene, Utils
9
import lucene, Utils
10
 
10
 
11
class IndexBuilder:
11
class IndexBuilder:
12
 
12
 
13
    def __init__(self, price_data):
13
    def __init__(self, price_data, new_index = True):
14
        self.indexDir = "/tmp/lucene-index-dir"
14
        self.indexDir = "/tmp/lucene-index-dir"
15
        lucene.initVM()
15
        lucene.initVM()
16
        dir = SimpleFSDirectory(File(self.indexDir))
16
        dir = SimpleFSDirectory(File(self.indexDir))
17
        self.analyzer = StandardAnalyzer(Version.LUCENE_30)
17
        self.analyzer = StandardAnalyzer(Version.LUCENE_30)
18
        self.writer = IndexWriter(dir, self.analyzer, True, IndexWriter.MaxFieldLength(512))
18
        self.writer = IndexWriter(dir, self.analyzer, new_index, IndexWriter.MaxFieldLength(512))
19
        self.price_data = price_data
19
        self.price_data = price_data
20
    
20
    
21
    def build(self):
21
    def build(self):
22
        print "Currently there are %d documents in the index..." % self.writer.numDocs()
22
        print "Currently there are %d documents in the index..." % self.writer.numDocs()
-
 
23
        count = 0
23
        
24
        
24
        for phone_price in self.price_data:
25
        for phone_price in self.price_data:
25
            print phone_price
26
            print phone_price
26
            #doc = PhonePriceDoc(phone_price)
27
            #doc = PhonePriceDoc(phone_price)
-
 
28
            count += 1
-
 
29
            
27
            brand, name = Utils.extractBrandAndName(str(phone_price['name']))
30
            brand, name = Utils.extractBrandAndName(str(phone_price['name']))
28
            doc = Document()
31
            doc = Document()
29
            doc.add(Field("name", name, Field.Store.YES, Field.Index.ANALYZED))
32
            doc.add(Field("name", name, Field.Store.YES, Field.Index.ANALYZED))
30
            doc.add(Field("brand", brand, Field.Store.YES, Field.Index.ANALYZED))
33
            doc.add(Field("brand", brand, Field.Store.YES, Field.Index.ANALYZED))
31
            doc.add(Field("source", str(phone_price['source']), Field.Store.YES, Field.Index.ANALYZED))
34
            doc.add(Field("source", str(phone_price['source']), Field.Store.YES, Field.Index.ANALYZED))
32
            doc.add(Field("price", str(phone_price['price']), Field.Store.YES, Field.Index.NO))
35
            doc.add(Field("price", str(phone_price['price']), Field.Store.YES, Field.Index.NO))
33
            doc.add(Field("in_stock", str(phone_price['in_stock']), Field.Store.YES, Field.Index.NO))
36
            doc.add(Field("in_stock", str(phone_price['in_stock']), Field.Store.YES, Field.Index.NO))
34
            doc.add(Field("url", str(phone_price['url']), Field.Store.YES, Field.Index.NO))
37
            doc.add(Field("url", str(phone_price['url']), Field.Store.YES, Field.Index.NO))
35
            
38
            
36
            self.writer.addDocument(doc)
39
            self.writer.addDocument(doc)
37
        
40
            
38
        print "Indexed lines from stdin (%d documents in index)" % (self.writer.numDocs())
41
        print "Indexed lines from stdin (%d documents in index)" % (self.writer.numDocs())
39
        print "About to optimize index of %d documents..." % self.writer.numDocs()
42
        print "About to optimize index of %d documents..." % self.writer.numDocs()
40
        self.writer.optimize()
43
        self.writer.optimize()
41
        print "...done optimizing index of %d documents" % self.writer.numDocs()
44
        print "...done optimizing index of %d documents" % self.writer.numDocs()
42
        print "Closing index of %d documents..." % self.writer.numDocs()
45
        print "Closing index of %d documents..." % self.writer.numDocs()
-
 
46
        print "%d docs added" % count
43
        self.writer.close()
47
        self.writer.close()
44
        
48
        
45
if __name__ == '__main__':
49
if __name__ == '__main__':
46
    phones = GAEServletClient.getPhonePricesJSON()
50
    phones = GAEServletClient.getPhonePricesJSON()
-
 
51
#    print phones
47
    indexer = IndexBuilder(phones)
52
    indexer = IndexBuilder(price_data = phones, new_index = False)
48
    indexer.build()
53
    indexer.build()
49
#    catalog_client = InventoryClient().get_client()
54
#    catalog_client = InventoryClient().get_client()
50
#    items = catalog_client.getAllItems(True)
55
#    items = catalog_client.getAllItems(True)
51
#    print phones
56
#    print phones
52
#    for item in items:  print item.id, item.brand, item.modelName, item.modelNumber
57
#    for item in items:  print item.id, item.brand, item.modelName, item.modelNumber
53
58