Subversion Repositories SmartDukaan

Rev

Rev 3313 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 30-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
6
import lucene
7
from lucene import SimpleFSDirectory, File, StandardAnalyzer, IndexSearcher, Version, MultiFieldQueryParser, QueryParser
8
 
9
class Retriever:
10
 
11
    def __init__(self):
12
        self.indexDir = "/tmp/lucene-index-dir"
13
        lucene.initVM()
14
 
15
        dir = SimpleFSDirectory(File(self.indexDir))
16
        self.analyzer = StandardAnalyzer(Version.LUCENE_30)
17
        self.searcher = IndexSearcher(dir)
18
        self.MAX_RESULTS = 1000
19
 
20
 
21
    def retrieve(self, model_number, model_name = None, brand = None):
22
 
23
        if model_number is None:  return
24
        print model_number, model_name, brand
25
 
26
        query_str = ''
27
 
28
        if brand is not None:
29
            query_brand = "brand:%s" % brand
30
 
31
        if model_name is not None:
32
            name_chunks = filter(lambda chunk: len(chunk.strip()) > 0, model_name.split(' '))
33
 
34
            if brand is not None:   query_str += ' AND '
35
 
36
            query_model_name = ' OR '.join(name_chunks)
37
        else:
38
            query_model_name = ''
39
 
40
        if model_number is not None:
41
            chunks = filter(lambda chunk: len(chunk.strip()) > 0, model_number.split(' '))
42
 
43
            for i in range(0, len(chunks)):
44
 
45
                if chunks[i].find('-') > -1:
46
                    chunk_parts = chunks[i].split('-')
47
                    chunks.append(''.join(chunk_parts))
48
                    chunks.append(' '.join(chunk_parts))
49
 
50
            query_model_number = (' OR '.join(chunks)).strip()
51
        else:
52
            query_model_number = ''
53
 
54
        if query_model_name:
55
            query_str = "%s AND (%s" % (query_brand, query_model_name)
56
        else:
57
            query_str = "%s AND (" % query_brand
58
 
59
        if query_model_number:
60
            if query_model_name:
61
                query_str += " OR %s)" % query_model_number
62
            else:
63
                query_str += " %s)" % query_model_number
64
        else:
65
            query_str += " )"
66
 
67
        print query_str
68
#        query = QueryParser(Version.LUCENE_30, "name", analyzer).parse(user_query)
69
        qp = MultiFieldQueryParser(Version.LUCENE_30, ["name", "brand"], self.analyzer)
70
        query = qp.parse(qp, query_str)
71
        hits = self.searcher.search(query, self.MAX_RESULTS)
72
 
73
#        print "Found %d document(s) that matched query '%s':" % (hits.totalHits, query)
74
 
75
        phones = []
76
 
77
        for hit in hits.scoreDocs:
78
            doc = self.searcher.doc(hit.doc)
79
            phones.append({
80
                           "name": str(doc.get("name")),
81
                           "price": str(doc.get("price")),
82
                           'score': hit.score,
83
                           'in_stock': str(doc.get("in_stock")),
84
                           'url': str(doc.get("url"))
85
                           })
86
        return phones
87
 
88
 
89
if __name__ == "__main__":
90
    retriever = Retriever()
91
    print retriever.retrieve(model_number="M-6350", brand="Spice")
92
    '''
93
    catalog_client = CatalogClient().get_client()
94
    items = catalog_client.getAllItems(True)
95
    products = {}
96
 
97
    for item in items:
98
        if item.category in (10002, 10003, 10004, 10005):  products[item.catalogItemId] = item
99
 
100
    for key, item in products.iteritems():
101
        try:
102
            name = "%s %s" % (item.modelName, item.modelNumber)
103
            matching_phones_with_top_score = getItemsWithTopScore(retriever.retrieve(name))
104
            print name, '\n', isPriceSame(matching_phones_with_top_score), matching_phones_with_top_score
105
        except Exception as e:
106
            print e
107
    '''