Subversion Repositories SmartDukaan

Rev

Rev 5943 | Rev 6896 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
317 ashish 1
/**
2
 * 
3
 */
4
package in.shop2020.serving.services;
5
 
6
 
1698 chandransh 7
import in.shop2020.config.ConfigException;
6866 amit.gupta 8
import in.shop2020.serving.utils.Utils;
1698 chandransh 9
import in.shop2020.thrift.clients.config.ConfigClient;
2070 rajveer 10
import in.shop2020.utils.CategoryManager;
1698 chandransh 11
 
6866 amit.gupta 12
import java.util.ArrayList;
354 rajveer 13
import java.util.Arrays;
5729 amit.gupta 14
import java.util.Collections;
354 rajveer 15
import java.util.HashMap;
6866 amit.gupta 16
import java.util.HashSet;
5729 amit.gupta 17
import java.util.Iterator;
18
import java.util.LinkedHashMap;
790 vikas 19
import java.util.LinkedList;
20
import java.util.List;
5729 amit.gupta 21
import java.util.Map;
6866 amit.gupta 22
import java.util.Set;
354 rajveer 23
import java.util.TreeMap;
6866 amit.gupta 24
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
354 rajveer 26
 
317 ashish 27
import javax.xml.xpath.XPath;
28
import javax.xml.xpath.XPathConstants;
29
import javax.xml.xpath.XPathExpressionException;
30
import javax.xml.xpath.XPathFactory;
31
 
832 rajveer 32
import org.apache.log4j.Logger;
317 ashish 33
import org.w3c.dom.Node;
34
import org.w3c.dom.NodeList;
35
import org.xml.sax.InputSource;
36
 
354 rajveer 37
 
317 ashish 38
/**
545 rajveer 39
 * @author rajveer
317 ashish 40
 *
41
 */
42
public class SolrSearchService {
43
	/**
44
	 * 
45
	 */
2147 chandransh 46
	private static Logger log = Logger.getLogger(Class.class);
545 rajveer 47
 
48
	/**
49
	 * 
50
	 */
1698 chandransh 51
	public static final String SOLR_URL;
317 ashish 52
 
6866 amit.gupta 53
	private static final Pattern FACET_PATTERN = Pattern.compile("(?=(F_\\d{5}))");
54
 
5729 amit.gupta 55
	private static final Map<String, List<String>> SORTED_FACET_VALUE_MAP = Collections.unmodifiableMap(
56
			new HashMap<String, List<String>>(){
57
				/**
58
				 * 
59
				 */
60
				private static final long serialVersionUID = 1L;
61
 
62
				{
63
					put("F_50007", Arrays.asList("Upto 2 Mpx", "2 - 5 Mpx", "5 - 10 Mpx", "Above 10 Mpx"));
5873 amit.gupta 64
					put("F_50024", Arrays.asList("Upto 10 Mpx", "10 - 12 Mpx", "12 - 15 Mpx", "Above 10 Mpx", "15 - 18 Mpx", "Above 18 Mpx"));
65
					put("F_50025", Arrays.asList("Upto 4x", "4 - 6x", "6 - 10x", "10 - 14x", "14 - 18x", "Above 18x"));
5943 amit.gupta 66
					put("F_50026", Arrays.asList("Below 2 in.", "2 to 2.9 in.", "3 to 3.9 in.", "Above 4 in."));
5941 amit.gupta 67
					put("F_50027", Arrays.asList("Upto 10 Mpx", "10 - 15 Mpx", "15 - 20 Mpx", "Above 20 Mpx"));
5729 amit.gupta 68
				}
69
		});
1698 chandransh 70
	static {
71
		String solr_url = null;
72
		try {
73
			solr_url = ConfigClient.getClient().get("solr_url");
74
		}catch(ConfigException cex){
2949 chandransh 75
		    log.error("Unable to get the solr URL from the config server. Setting the default value.", cex);
1698 chandransh 76
			solr_url = "http://localhost:8983/solr/select/";
77
		}
78
		SOLR_URL = solr_url;
79
	}
80
 
317 ashish 81
	/**
82
	 * 
83
	 */
84
	private XPath xpath;
85
 
86
	/**
87
	 * 
88
	 */
89
	private InputSource inputSource;
90
 
5729 amit.gupta 91
	Map<String,Map<String,Integer>> facetMap;
354 rajveer 92
 
6866 amit.gupta 93
	private String 	query;
94
 
790 vikas 95
	List<String> resultMap;
545 rajveer 96
 
97
	long numberOfResults=0;
3561 rajveer 98
 
99
	String priceFacetName = "F_50002";
100
 
317 ashish 101
	/**
102
	 * 
103
	 * @param query
104
	 * @param facetDefinitionIDs
105
	 */
3561 rajveer 106
	public SolrSearchService(String query, String[] facetqueries, String[] facetDefinitionIDs, long start, long rows,  Double minPrice, Double maxPrice, long categoryId, String sortOrder, long sourceId) {
6866 amit.gupta 107
		this.query = query;
354 rajveer 108
 
6866 amit.gupta 109
		List<String> rootFacetsQueried = new ArrayList<String>();
3561 rajveer 110
		if(sourceId != -1){
111
			priceFacetName = priceFacetName + "_" + sourceId;
112
		}
113
 
317 ashish 114
		this.xpath = XPathFactory.newInstance().newXPath();
545 rajveer 115
 
2606 rajveer 116
		query = query.trim().replaceAll("\\s+", " ");
545 rajveer 117
    	log.info("query=" + query);
118
 
317 ashish 119
		String uri = SOLR_URL + "?wt=xml&q=" + query;
120
 
3561 rajveer 121
		uri += "&stats=on&stats.field=" + priceFacetName;
545 rajveer 122
 
123
 
124
		if(minPrice != null || maxPrice != null){
125
			String minString = "0";
126
			String maxString = "*";  
127
			if(minPrice != null){
128
				minString = minPrice.toString();
129
			}
130
			if(maxPrice != null){
131
				maxString = maxPrice.toString();
132
			}
3561 rajveer 133
			uri += "&fq=" + priceFacetName + ":["+  minString + " " + maxString + "]";
354 rajveer 134
		}
135
 
569 rajveer 136
		if(categoryId != 10000){
137
			uri += "&fq=F_50010:\"" + CategoryManager.getCategoryManager().getCategoryLabel(categoryId) + "\"";
138
		}
354 rajveer 139
 
569 rajveer 140
		if(sortOrder != null){
3561 rajveer 141
			//replace the price facet name, so that it can pick price for the source.
142
			sortOrder = sortOrder.replace("F_50002", priceFacetName);
569 rajveer 143
			uri += "&sort=" + sortOrder;
144
		}
545 rajveer 145
 
317 ashish 146
		if(facetqueries != null) {
6866 amit.gupta 147
			//sorting will guarantee all similar facets together so that we can assume or between all similar items without fail.
148
			Arrays.sort(facetqueries);
149
			String fq="";
317 ashish 150
			for(int i=0; i<facetqueries.length; i++) {
6866 amit.gupta 151
				String[] tokens = facetqueries[i].split(":");
152
				if(rootFacetsQueried.contains(tokens[0])) {
153
					uri += " OR ";
154
					if(facetqueries[i].contains(" ")){
155
						uri +=  "\"" + tokens[1] + "\"";
156
					}else{
157
						uri += facetqueries[i];
158
					}
159
 
160
				} else {
161
					if(Arrays.asList(Utils.facetDefIDs).contains(tokens[0])) {
162
						fq = "{!tag=dt" + rootFacetsQueried.size() + "}";
163
						rootFacetsQueried.add(tokens[0]);
164
						if(facetqueries[i].contains(" ") && !(facetqueries[i].contains(" OR "))){
165
							fq +=  tokens[0] + ":\"" + tokens[1] + "\"";
166
						}else{
167
							fq += facetqueries[i] + "";
168
						}
169
					}
170
					uri += "&fq=" + fq;
536 rajveer 171
				}
317 ashish 172
			}
173
		}
6866 amit.gupta 174
		uri += "&fl=ID,Name&facet=true&start=" + start + "&rows=" + rows + "&facet.mincount=1";
2435 rajveer 175
		if(facetDefinitionIDs != null){
176
			for(int i=0; i<facetDefinitionIDs.length; i++) {
6866 amit.gupta 177
				if(rootFacetsQueried.contains(facetDefinitionIDs[i])){
178
					uri += "&facet.field={!ex=dt" + rootFacetsQueried.indexOf(facetDefinitionIDs[i])+ "}"+ facetDefinitionIDs[i]; 
179
				} else {
180
					uri += "&facet.field=" + facetDefinitionIDs[i];
181
				}
2435 rajveer 182
			}
317 ashish 183
		}
3262 rajveer 184
		log.info("uri=" + uri);
317 ashish 185
 
186
		this.inputSource = new InputSource(uri);
517 rajveer 187
 
545 rajveer 188
		this.facetMap = getFacetMap();
354 rajveer 189
	}
190
 
5729 amit.gupta 191
	public Map<String,Map<String,Integer>> removeUnwantedFacets(Map<String,Map<String,Integer>> facetMap, long numberOfResults){
6866 amit.gupta 192
 
193
		Set<String> facetsInQuery = new HashSet<String>(getAllMatches(this.query));
5729 amit.gupta 194
		Map<String,Map<String,Integer>> tempFacets = new TreeMap<String, Map<String,Integer>>(); 
354 rajveer 195
		for(String facet : facetMap.keySet()){
6866 amit.gupta 196
			if(facetMap.get(facet).size() > 0 && !facetsInQuery.contains(facet)){
5729 amit.gupta 197
				Map<String,Integer> tempMap = new LinkedHashMap<String, Integer>();
545 rajveer 198
 
354 rajveer 199
				for(String facetValueName : facetMap.get(facet).keySet()){
6866 amit.gupta 200
					//if(facetMap.get(facet).get(facetValueName) != 0 && facetMap.get(facet).get(facetValueName) != numberOfResults){
545 rajveer 201
						tempMap.put(facetValueName, facetMap.get(facet).get(facetValueName));
6866 amit.gupta 202
					//}
354 rajveer 203
				}
545 rajveer 204
				if(!tempMap.isEmpty()){
205
					tempFacets.put(facet, tempMap);
354 rajveer 206
				}
545 rajveer 207
			}	
354 rajveer 208
		}
6866 amit.gupta 209
		/*if(tempFacets.containsKey("F_50010")){
550 rajveer 210
			tempFacets.remove("F_50011");
6866 amit.gupta 211
		}*/
354 rajveer 212
 
213
		return tempFacets;
214
	}
215
 
5729 amit.gupta 216
	public Map<String,Integer> getFacetDetails(String facetName){
2606 rajveer 217
		if(facetMap != null){
218
			return facetMap.get(facetName);
219
		}else{
220
			return null;
221
		}
354 rajveer 222
	}
223
 
5729 amit.gupta 224
	public Map<String,Map<String,Integer>> getFacetMap() {
225
		facetMap = new TreeMap<String,Map<String,Integer>>();
354 rajveer 226
 
227
		String facetNamePath = "/response/lst/lst[@name = 'facet_fields']/lst";
545 rajveer 228
 
354 rajveer 229
		NodeList nodes = null;
230
		try {
231
			nodes = (NodeList) this.xpath.evaluate(facetNamePath, this.inputSource, XPathConstants.NODESET);
232
		}
233
		catch (XPathExpressionException xpee) {
234
			return null;
235
		}
236
 
237
		if(nodes.getLength() == 0) {
238
			return null;
239
		}
240
 
241
		NodeList subNodes = null;
242
 
243
		for(int i=0; i<nodes.getLength(); i++) {
244
			Node node = nodes.item(i);
2946 chandransh 245
			String facetName = node.getAttributes().getNamedItem("name").getNodeValue();
354 rajveer 246
			subNodes = node.getChildNodes();
5729 amit.gupta 247
			Map<String,Integer> facetValueCountMap = new LinkedHashMap<String,Integer>();
354 rajveer 248
			for(int j=0; j<subNodes.getLength(); j++) {
249
				Node subNode = subNodes.item(j);
250
				facetValueCountMap.put(subNode.getAttributes().getNamedItem("name").getNodeValue(), Integer.parseInt(subNode.getTextContent()));
251
			}
5729 amit.gupta 252
			if(SORTED_FACET_VALUE_MAP.containsKey(facetName)){
253
				List<String> orderedValues = SORTED_FACET_VALUE_MAP.get(facetName);
254
				Map<String, Integer> sortedMap = new LinkedHashMap<String, Integer>();
255
			    for (Iterator<String> it = orderedValues.iterator(); it.hasNext();) {
256
			    	String val = it.next();
257
			        if(facetValueCountMap.containsKey(val)) {
258
			        	sortedMap.put(val, facetValueCountMap.get(val));
259
			        }
260
			    }
261
			    facetMap.put(facetName, sortedMap);
262
			} else {
263
				facetMap.put(facetName, facetValueCountMap);
354 rajveer 264
			}
5729 amit.gupta 265
		}
545 rajveer 266
		this.numberOfResults  = this.getTotalResults();
517 rajveer 267
 
354 rajveer 268
		facetMap = removeUnwantedFacets(facetMap, numberOfResults);
269
		return facetMap;
5729 amit.gupta 270
	}
354 rajveer 271
 
790 vikas 272
	public List<String> getResultMap() {
273
		resultMap = new LinkedList<String>();
354 rajveer 274
 
275
		String resultDocsPath = "/response/result/doc";
276
 
277
 
278
		NodeList nodes = null;
279
		try {
280
			nodes = (NodeList) this.xpath.evaluate(resultDocsPath, this.inputSource, XPathConstants.NODESET);
281
		}
282
		catch (XPathExpressionException xpee) {
283
			return null;
284
		}
285
 
286
		if(nodes.getLength() == 0) {
287
			return null;
288
		}
289
 
290
		for(int i=0; i<nodes.getLength(); i++) {
291
			Node node = nodes.item(i);
292
			String docID = node.getFirstChild().getTextContent();
790 vikas 293
			resultMap.add(docID);	
354 rajveer 294
 		}
295
		return resultMap;
296
	}
297
 
298
	public HashMap<String, Double> getPriceStatsMap() {
299
		HashMap<String, Double> priceStatsMap = new HashMap<String, Double>();
300
 
3561 rajveer 301
		String resultDocsPath = "/response/lst[@name = 'stats']/lst[@name = 'stats_fields']/lst[@name = '" + priceFacetName + "']";
354 rajveer 302
 
303
 
304
		NodeList nodes = null;
305
		try {
306
			nodes = (NodeList) this.xpath.evaluate(resultDocsPath, this.inputSource, XPathConstants.NODESET);
307
		}
308
		catch (XPathExpressionException xpee) {
309
			return null;
310
		}
311
 
312
		if(nodes.getLength() == 0) {
313
			return null;
314
		}
315
 
316
		NodeList subNodes = nodes.item(0).getChildNodes();
317
 
318
		for(int i=0; i<subNodes.getLength(); i++) {
319
			Node node = subNodes.item(i);
320
 
321
			String parameter = node.getAttributes().getNamedItem("name").getNodeValue();
322
			String value = node.getTextContent();
323
			priceStatsMap.put(parameter, Double.parseDouble(value));	
324
 		}
325
		return priceStatsMap;
326
	}
327
 
328
	public HashMap<String,Integer> getRangeQueryResultMap() {
329
		HashMap<String, Integer> rangeQueryResultMap = new HashMap<String,Integer>();
330
 
331
		String resultDocsPath = "/response/lst[@name = 'facet_counts']/lst[@name = 'facet_queries']/int";
332
 
333
 
334
		NodeList nodes = null;
335
		try {
336
			nodes = (NodeList) this.xpath.evaluate(resultDocsPath, this.inputSource, XPathConstants.NODESET);
337
		}
338
		catch (XPathExpressionException xpee) {
339
			return null;
340
		}
341
 
342
		if(nodes.getLength() == 0) {
343
			return null;
344
		}
345
 
346
 
347
		for(int i=0; i<nodes.getLength(); i++) {
348
			Node node = nodes.item(i);
349
 
350
			String query = node.getAttributes().getNamedItem("name").getNodeValue();
351
			String docCount = node.getTextContent();
352
 
353
			rangeQueryResultMap.put(query,Integer.parseInt(docCount));	
354
 		}
355
		return rangeQueryResultMap;
356
 
357
	}
358
 
545 rajveer 359
	/**
360
	 * 
361
	 */
362
	public long getTotalResults(){
363
		String resultDocsPath = "/response/result";
364
		NodeList nodes = null;
365
		try {
366
			nodes = (NodeList) this.xpath.evaluate(resultDocsPath, this.inputSource, XPathConstants.NODESET);
367
		}
368
		catch (XPathExpressionException xpee) {
369
			return 0;
370
		}
371
 
372
		Node node = nodes.item(0);
373
 
374
		return Long.parseLong(node.getAttributes().getNamedItem("numFound").getNodeValue());
375
 
376
	}
354 rajveer 377
		/**
317 ashish 378
	 * 
379
	 * @return
380
	 */
381
	public long[] getResultEntityIDs() {
382
		String expression = "/response/result/doc/long";
383
 
384
		NodeList nodes = null;
385
		try {
386
			nodes = (NodeList) this.xpath.evaluate(expression, this.inputSource,
387
					XPathConstants.NODESET);
388
		} 
389
		catch(XPathExpressionException xpee) {
390
			return null;
391
		}
392
 
393
		if(nodes.getLength() == 0) {
394
			return null;
395
		}
396
 
397
		long[] values = new long[nodes.getLength()];
398
		for(int i=0; i<nodes.getLength(); i++) {
399
			Node node = nodes.item(i);
400
			String value = node.getTextContent();
401
			values[i] = Long.parseLong(value);
402
 		}
403
 
404
		return values;
405
	}
406
 
407
	/**
408
	 * 
409
	 * @return
410
	 */
411
	public String[] getResultCategoryNames() {
412
		String expression = "/response/lst/lst[@name = 'facet_fields']/";
413
		expression += "lst[@name = 'Category']/int/@name";
414
 
415
		NodeList nodes = null;
416
		try {
417
			nodes = (NodeList) this.xpath.evaluate(expression, 
418
				this.inputSource, XPathConstants.NODESET);
419
		}
420
		catch (XPathExpressionException xpee) {
421
			return null;
422
		}
423
 
424
		if(nodes.getLength() == 0) {
425
			return null;
426
		}
427
 
428
		String[] values = new String[nodes.getLength()];
429
		for(int i=0; i<nodes.getLength(); i++) {
430
			Node node = nodes.item(i);
431
			values[i] = node.getTextContent();
432
 		}
433
 
434
		return values;
435
	}
436
 
437
	/**
438
	 * 
439
	 * @return
440
	 */
441
	public int[] getResultCategoryCounts() {
442
		String expression = "/response/lst/lst[@name = 'facet_fields']/";
443
		expression += "lst[@name = 'Category']/int";
444
 
445
		NodeList nodes = null;
446
		try {
447
			nodes = (NodeList) this.xpath.evaluate(expression, 
448
				this.inputSource, XPathConstants.NODESET);
449
		}
450
		catch (XPathExpressionException xpee) {
451
			return null;
452
		}
453
 
454
		if(nodes.getLength() == 0) {
455
			return null;
456
		}
457
 
458
		int[] values = new int[nodes.getLength()];
459
		for(int i=0; i<nodes.getLength(); i++) {
460
			Node node = nodes.item(i);
461
			values[i] = Integer.parseInt(node.getTextContent());
462
 		}
463
 
464
		return values;
465
	}
466
 
467
	/**
468
	 * 
469
	 * @return
470
	 */
471
	public String[]  getResultEntityNames() {
472
		String expression = "/response/result/doc/str";
473
 
474
		NodeList nodes = null;
475
		try {
476
			nodes = (NodeList) this.xpath.evaluate(expression, this.inputSource,
477
					XPathConstants.NODESET);
478
		} 
479
		catch(XPathExpressionException xpee) {
480
			return null;
481
		}
482
 
483
		if(nodes.getLength() == 0) {
484
			return null;
485
		}
486
 
487
		String[] values = new String[nodes.getLength()];
488
		for(int i=0; i<nodes.getLength(); i++) {
489
			Node node = nodes.item(i);
490
			String value = node.getTextContent();
491
			values[i] = value;
492
 		}
493
 
494
		return values;
495
	}
496
 
497
	/**
498
	 * 
499
	 * @param facetDefinitionID
500
	 * @return
501
	 */
354 rajveer 502
	public String[] getFacetValues(String facetDefinitionID) {
317 ashish 503
		String expression = "/response/lst/lst[@name = 'facet_fields']/";
354 rajveer 504
		expression += "lst[@name = '"+ facetDefinitionID +"']/int/@name";
317 ashish 505
 
506
		NodeList nodes = null;
507
		try {
508
			nodes = (NodeList) this.xpath.evaluate(expression, 
509
				this.inputSource, XPathConstants.NODESET);
510
		}
511
		catch (XPathExpressionException xpee) {
512
			return null;
513
		}
514
 
515
		if(nodes.getLength() == 0) {
516
			return null;
517
		}
518
 
519
		String[] values = new String[nodes.getLength()];
520
		for(int i=0; i<nodes.getLength(); i++) {
521
			Node node = nodes.item(i);
522
			values[i] = node.getTextContent();
545 rajveer 523
		}
317 ashish 524
 
525
		return values;
526
	}
527
 
528
	/**
529
	 * 
530
	 * @param facetDefinitionID
531
	 * @return
532
	 */
354 rajveer 533
	public String[] getFacetCounts(String facetDefinitionID) {
317 ashish 534
		String expression = "/response/lst/lst[@name = 'facet_fields']/";
354 rajveer 535
		expression += "lst[@name = '" + facetDefinitionID + "']/int";
317 ashish 536
 
537
		NodeList nodes = null;
538
		try {
539
			nodes = (NodeList) this.xpath.evaluate(expression, 
540
				this.inputSource, XPathConstants.NODESET);
541
		}
542
		catch (XPathExpressionException xpee) {
543
			return null;
544
		}
545
 
546
		if(nodes.getLength() == 0) {
547
			return null;
548
		}
549
 
550
		String[] values = new String[nodes.getLength()];
551
		for(int i=0; i<nodes.getLength(); i++) {
552
			Node node = nodes.item(i);
553
			values[i] = node.getTextContent();
554
 		}
555
 
556
		return values;
557
	}
545 rajveer 558
 
559
	public static void main(String[] args){
560
		/*
561
    	// Hard coded for now
562
    	String[] facetDefIDs = new String[] {"F_50001", "F_50002", "F_50003", "F_50004", "F_50005", "F_50006", "F_50007", "F_50008", "F_50009"};
563
 
564
    	// Hard-coded for now
565
    	String[] facetLabels = new String[] {
566
	    	"Brand", "Price","Form Factor", "Carry In Pocket", "Cellular Technologies", 
567
	    	"Data Connectivity", "Camera Resolution", "Built-in Memory", 
568
	    	"Talk time"
569
    	};
570
 
571
		 */
572
    	String[] facetDefIDs = new String[] {"Category","F_50002","F_50001",  "F_50006", "F_50007" };
2147 chandransh 573
    	//String[] facetLabels = new String[] {"Category","Price", "Brand", "Data Connectivity", "Camera Resolution"	};
545 rajveer 574
 
575
 
576
    	String[] fqrys = {};
3561 rajveer 577
		SolrSearchService search = new SolrSearchService("nokia", fqrys, facetDefIDs, 0 , 20, null, null, 10000, null, -1);
545 rajveer 578
 
579
    	long[] entityIDs = search.getResultEntityIDs();
580
    	log.info("entityIDs=" + Arrays.toString(entityIDs));
581
 
582
    	String[] entityNames = search.getResultEntityNames();
583
    	log.info("entityNames=" + Arrays.toString(entityNames));
584
    	search.getFacetMap();
585
 
586
    	search.getResultMap();
587
    	search.getRangeQueryResultMap();
588
    	search.getPriceStatsMap();
589
    	search.getTotalResults();
590
       	for (int i=0; i<facetDefIDs.length; i++) {
591
       		search.getFacetCounts(facetDefIDs[i]);
592
       		search.getFacetValues(facetDefIDs[i]);
593
       	}
594
 
595
	}
6866 amit.gupta 596
 
597
 
598
    public static List<String> getAllMatches(String text) {
599
        List<String> matches = new ArrayList<String>();
600
        Matcher m = FACET_PATTERN.matcher(text);
601
        while(m.find()) {
602
            matches.add(m.group(1));
603
        }
604
        return matches;
605
    }
317 ashish 606
}