Rev 5291 | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 30-Aug-2011@author: Varun Gupta'''import lucenefrom lucene import SimpleFSDirectory, File, StandardAnalyzer, IndexSearcher, Version, MultiFieldQueryParser, QueryParserclass Retriever:def __init__(self):self.indexDir = "/usr/price-comp-dashboard/lucene-index-dir"lucene.initVM()dir = SimpleFSDirectory(File(self.indexDir))self.analyzer = StandardAnalyzer(Version.LUCENE_30)self.searcher = IndexSearcher(dir)self.MAX_RESULTS = 1000def retrieve(self, model_number, model_name = None, brand = None, synonyms = None):if model_number is None: returnprint model_number, model_name, brandquery_str = ''if brand is not None:query_brand = "brand:%s" % brandif model_name is not None:name_chunks = filter(lambda chunk: len(chunk.strip()) > 0, model_name.split(' '))if brand is not None: query_str += ' AND 'query_model_name = ' OR '.join(name_chunks)else:query_model_name = ''if model_number is not None:chunks = filter(lambda chunk: len(chunk.strip()) > 0, model_number.split(' '))for i in range(0, len(chunks)):if chunks[i].find('-') > -1:chunk_parts = chunks[i].split('-')chunks.append(''.join(chunk_parts))chunks.append("(%s)" % ' AND '.join(chunk_parts))query_model_number = (' OR '.join(chunks)).strip()else:query_model_number = ''query_synonyms = ' '.join(synonyms) if synonyms is not None else ''if query_model_name:query_str = "%s AND (%s" % (query_brand, query_model_name)else:query_str = "%s AND (" % query_brandif query_model_number:if query_model_name:query_str += " OR %s %s)" % (query_model_number, query_synonyms)else:query_str += " %s %s)" % (query_model_number, query_synonyms)else:query_str += " %s)" % query_synonyms# query = QueryParser(Version.LUCENE_30, "name", analyzer).parse(user_query)qp = MultiFieldQueryParser(Version.LUCENE_30, ["name", "brand"], self.analyzer)query = qp.parse(qp, query_str)hits = self.searcher.search(query, self.MAX_RESULTS)print "Found %d document(s) that matched query '%s':" % (hits.totalHits, query_str)phones = []for hit in hits.scoreDocs:doc = self.searcher.doc(hit.doc)phones.append({'name': str(doc.get("name")),'price': str(doc.get("price")),'score': hit.score,'in_stock': str(doc.get("in_stock")),'url': str(doc.get("url")),'source': str(doc.get("source"))})print phonesreturn phonesdef retrieveForQuery(self, query_str):qp = MultiFieldQueryParser(Version.LUCENE_30, ["name", "brand"], self.analyzer)query = qp.parse(qp, query_str)hits = self.searcher.search(query, self.MAX_RESULTS)# print "Found %d document(s) that matched query '%s':" % (hits.totalHits, query)phones = []for hit in hits.scoreDocs:doc = self.searcher.doc(hit.doc)phones.append({'name': str(doc.get("name")),'price': str(doc.get("price")),'score': hit.score,'in_stock': str(doc.get("in_stock")),'url': str(doc.get("url")),'source': str(doc.get("source"))})return phonesif __name__ == "__main__":retriever = Retriever()# print retriever.retrieve(model_number="E2230", model_name = "Hero", brand="Samsung")docs = retriever.retrieveForQuery('brand:Spice AND (M-6868 OR M6868 OR (M AND 6868) )')print len(docs)print docs# print phone'''catalog_client = CatalogClient().get_client()items = catalog_client.getAllItems(True)products = {}for item in items:if item.category in (10002, 10003, 10004, 10005): products[item.catalogItemId] = itemfor key, item in products.iteritems():try:name = "%s %s" % (item.modelName, item.modelNumber)matching_phones_with_top_score = getItemsWithTopScore(retriever.retrieve(name))print name, '\n', isPriceSame(matching_phones_with_top_score), matching_phones_with_top_scoreexcept Exception as e:print e'''