Subversion Repositories SmartDukaan

Rev

Rev 6866 | Rev 6931 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
317 ashish 1
/**
2
 * 
3
 */
4
package in.shop2020.serving.services;
5
 
6
 
1698 chandransh 7
import in.shop2020.config.ConfigException;
6866 amit.gupta 8
import in.shop2020.serving.utils.Utils;
1698 chandransh 9
import in.shop2020.thrift.clients.config.ConfigClient;
2070 rajveer 10
import in.shop2020.utils.CategoryManager;
1698 chandransh 11
 
6866 amit.gupta 12
import java.util.ArrayList;
354 rajveer 13
import java.util.Arrays;
5729 amit.gupta 14
import java.util.Collections;
354 rajveer 15
import java.util.HashMap;
6866 amit.gupta 16
import java.util.HashSet;
5729 amit.gupta 17
import java.util.Iterator;
18
import java.util.LinkedHashMap;
790 vikas 19
import java.util.LinkedList;
20
import java.util.List;
5729 amit.gupta 21
import java.util.Map;
6866 amit.gupta 22
import java.util.Set;
354 rajveer 23
import java.util.TreeMap;
6866 amit.gupta 24
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
354 rajveer 26
 
317 ashish 27
import javax.xml.xpath.XPath;
28
import javax.xml.xpath.XPathConstants;
29
import javax.xml.xpath.XPathExpressionException;
30
import javax.xml.xpath.XPathFactory;
31
 
832 rajveer 32
import org.apache.log4j.Logger;
317 ashish 33
import org.w3c.dom.Node;
34
import org.w3c.dom.NodeList;
35
import org.xml.sax.InputSource;
36
 
354 rajveer 37
 
317 ashish 38
/**
545 rajveer 39
 * @author rajveer
317 ashish 40
 *
41
 */
42
public class SolrSearchService {
43
	/**
44
	 * 
45
	 */
2147 chandransh 46
	private static Logger log = Logger.getLogger(Class.class);
545 rajveer 47
 
48
	/**
49
	 * 
50
	 */
1698 chandransh 51
	public static final String SOLR_URL;
317 ashish 52
 
6866 amit.gupta 53
	private static final Pattern FACET_PATTERN = Pattern.compile("(?=(F_\\d{5}))");
54
 
5729 amit.gupta 55
	private static final Map<String, List<String>> SORTED_FACET_VALUE_MAP = Collections.unmodifiableMap(
56
			new HashMap<String, List<String>>(){
57
				/**
58
				 * 
59
				 */
60
				private static final long serialVersionUID = 1L;
61
 
62
				{
6896 amit.gupta 63
					put("F_50007", Arrays.asList("Upto 2 Mpx", "2 - 5 Mpx", "5 - 10 Mpx", "10 Mpx and above"));
64
					put("F_50024", Arrays.asList("Upto 10 Mpx", "10 - 12 Mpx", "12 - 15 Mpx", "15 - 18 Mpx", "18 Mpx and above"));
65
					put("F_50025", Arrays.asList("Upto 4x", "4 - 6x", "6 - 10x", "10 - 14x", "14 - 18x", "18x and above"));
66
					put("F_50026", Arrays.asList("Below 2 in.", "2 to 2.9 in.", "3 to 3.9 in.", "4 in. and above"));
67
					put("F_50032", Arrays.asList("Below 3 in.", "3 to 3.9 in.", "4 to 4.9 in.", "5 in. and above"));
68
					put("F_50027", Arrays.asList("Upto 10 Mpx", "10 - 15 Mpx", "15 - 20 Mpx", "20 Mpx and above"));
5729 amit.gupta 69
				}
70
		});
1698 chandransh 71
	static {
72
		String solr_url = null;
73
		try {
74
			solr_url = ConfigClient.getClient().get("solr_url");
75
		}catch(ConfigException cex){
2949 chandransh 76
		    log.error("Unable to get the solr URL from the config server. Setting the default value.", cex);
1698 chandransh 77
			solr_url = "http://localhost:8983/solr/select/";
78
		}
79
		SOLR_URL = solr_url;
80
	}
81
 
317 ashish 82
	/**
83
	 * 
84
	 */
85
	private XPath xpath;
86
 
87
	/**
88
	 * 
89
	 */
90
	private InputSource inputSource;
91
 
5729 amit.gupta 92
	Map<String,Map<String,Integer>> facetMap;
354 rajveer 93
 
6866 amit.gupta 94
	private String 	query;
95
 
790 vikas 96
	List<String> resultMap;
545 rajveer 97
 
98
	long numberOfResults=0;
3561 rajveer 99
 
100
	String priceFacetName = "F_50002";
101
 
317 ashish 102
	/**
103
	 * 
104
	 * @param query
105
	 * @param facetDefinitionIDs
106
	 */
3561 rajveer 107
	public SolrSearchService(String query, String[] facetqueries, String[] facetDefinitionIDs, long start, long rows,  Double minPrice, Double maxPrice, long categoryId, String sortOrder, long sourceId) {
6866 amit.gupta 108
		this.query = query;
354 rajveer 109
 
6866 amit.gupta 110
		List<String> rootFacetsQueried = new ArrayList<String>();
3561 rajveer 111
		if(sourceId != -1){
112
			priceFacetName = priceFacetName + "_" + sourceId;
113
		}
114
 
317 ashish 115
		this.xpath = XPathFactory.newInstance().newXPath();
545 rajveer 116
 
2606 rajveer 117
		query = query.trim().replaceAll("\\s+", " ");
545 rajveer 118
    	log.info("query=" + query);
119
 
317 ashish 120
		String uri = SOLR_URL + "?wt=xml&q=" + query;
121
 
3561 rajveer 122
		uri += "&stats=on&stats.field=" + priceFacetName;
545 rajveer 123
 
124
 
125
		if(minPrice != null || maxPrice != null){
126
			String minString = "0";
127
			String maxString = "*";  
128
			if(minPrice != null){
129
				minString = minPrice.toString();
130
			}
131
			if(maxPrice != null){
132
				maxString = maxPrice.toString();
133
			}
3561 rajveer 134
			uri += "&fq=" + priceFacetName + ":["+  minString + " " + maxString + "]";
354 rajveer 135
		}
136
 
569 rajveer 137
		if(categoryId != 10000){
138
			uri += "&fq=F_50010:\"" + CategoryManager.getCategoryManager().getCategoryLabel(categoryId) + "\"";
139
		}
354 rajveer 140
 
569 rajveer 141
		if(sortOrder != null){
3561 rajveer 142
			//replace the price facet name, so that it can pick price for the source.
143
			sortOrder = sortOrder.replace("F_50002", priceFacetName);
569 rajveer 144
			uri += "&sort=" + sortOrder;
145
		}
545 rajveer 146
 
317 ashish 147
		if(facetqueries != null) {
6866 amit.gupta 148
			//sorting will guarantee all similar facets together so that we can assume or between all similar items without fail.
149
			Arrays.sort(facetqueries);
150
			String fq="";
317 ashish 151
			for(int i=0; i<facetqueries.length; i++) {
6866 amit.gupta 152
				String[] tokens = facetqueries[i].split(":");
153
				if(rootFacetsQueried.contains(tokens[0])) {
154
					uri += " OR ";
155
					if(facetqueries[i].contains(" ")){
156
						uri +=  "\"" + tokens[1] + "\"";
157
					}else{
158
						uri += facetqueries[i];
159
					}
160
 
161
				} else {
162
					if(Arrays.asList(Utils.facetDefIDs).contains(tokens[0])) {
163
						fq = "{!tag=dt" + rootFacetsQueried.size() + "}";
164
						rootFacetsQueried.add(tokens[0]);
165
						if(facetqueries[i].contains(" ") && !(facetqueries[i].contains(" OR "))){
166
							fq +=  tokens[0] + ":\"" + tokens[1] + "\"";
167
						}else{
168
							fq += facetqueries[i] + "";
169
						}
170
					}
171
					uri += "&fq=" + fq;
536 rajveer 172
				}
317 ashish 173
			}
174
		}
6866 amit.gupta 175
		uri += "&fl=ID,Name&facet=true&start=" + start + "&rows=" + rows + "&facet.mincount=1";
2435 rajveer 176
		if(facetDefinitionIDs != null){
177
			for(int i=0; i<facetDefinitionIDs.length; i++) {
6866 amit.gupta 178
				if(rootFacetsQueried.contains(facetDefinitionIDs[i])){
179
					uri += "&facet.field={!ex=dt" + rootFacetsQueried.indexOf(facetDefinitionIDs[i])+ "}"+ facetDefinitionIDs[i]; 
180
				} else {
181
					uri += "&facet.field=" + facetDefinitionIDs[i];
182
				}
2435 rajveer 183
			}
317 ashish 184
		}
3262 rajveer 185
		log.info("uri=" + uri);
317 ashish 186
 
187
		this.inputSource = new InputSource(uri);
517 rajveer 188
 
545 rajveer 189
		this.facetMap = getFacetMap();
354 rajveer 190
	}
191
 
5729 amit.gupta 192
	public Map<String,Map<String,Integer>> removeUnwantedFacets(Map<String,Map<String,Integer>> facetMap, long numberOfResults){
6866 amit.gupta 193
 
194
		Set<String> facetsInQuery = new HashSet<String>(getAllMatches(this.query));
5729 amit.gupta 195
		Map<String,Map<String,Integer>> tempFacets = new TreeMap<String, Map<String,Integer>>(); 
354 rajveer 196
		for(String facet : facetMap.keySet()){
6866 amit.gupta 197
			if(facetMap.get(facet).size() > 0 && !facetsInQuery.contains(facet)){
5729 amit.gupta 198
				Map<String,Integer> tempMap = new LinkedHashMap<String, Integer>();
545 rajveer 199
 
354 rajveer 200
				for(String facetValueName : facetMap.get(facet).keySet()){
6866 amit.gupta 201
					//if(facetMap.get(facet).get(facetValueName) != 0 && facetMap.get(facet).get(facetValueName) != numberOfResults){
545 rajveer 202
						tempMap.put(facetValueName, facetMap.get(facet).get(facetValueName));
6866 amit.gupta 203
					//}
354 rajveer 204
				}
545 rajveer 205
				if(!tempMap.isEmpty()){
206
					tempFacets.put(facet, tempMap);
354 rajveer 207
				}
545 rajveer 208
			}	
354 rajveer 209
		}
6866 amit.gupta 210
		/*if(tempFacets.containsKey("F_50010")){
550 rajveer 211
			tempFacets.remove("F_50011");
6866 amit.gupta 212
		}*/
354 rajveer 213
 
214
		return tempFacets;
215
	}
216
 
5729 amit.gupta 217
	public Map<String,Integer> getFacetDetails(String facetName){
2606 rajveer 218
		if(facetMap != null){
219
			return facetMap.get(facetName);
220
		}else{
221
			return null;
222
		}
354 rajveer 223
	}
224
 
5729 amit.gupta 225
	public Map<String,Map<String,Integer>> getFacetMap() {
226
		facetMap = new TreeMap<String,Map<String,Integer>>();
354 rajveer 227
 
228
		String facetNamePath = "/response/lst/lst[@name = 'facet_fields']/lst";
545 rajveer 229
 
354 rajveer 230
		NodeList nodes = null;
231
		try {
232
			nodes = (NodeList) this.xpath.evaluate(facetNamePath, this.inputSource, XPathConstants.NODESET);
233
		}
234
		catch (XPathExpressionException xpee) {
235
			return null;
236
		}
237
 
238
		if(nodes.getLength() == 0) {
239
			return null;
240
		}
241
 
242
		NodeList subNodes = null;
243
 
244
		for(int i=0; i<nodes.getLength(); i++) {
245
			Node node = nodes.item(i);
2946 chandransh 246
			String facetName = node.getAttributes().getNamedItem("name").getNodeValue();
354 rajveer 247
			subNodes = node.getChildNodes();
5729 amit.gupta 248
			Map<String,Integer> facetValueCountMap = new LinkedHashMap<String,Integer>();
354 rajveer 249
			for(int j=0; j<subNodes.getLength(); j++) {
250
				Node subNode = subNodes.item(j);
251
				facetValueCountMap.put(subNode.getAttributes().getNamedItem("name").getNodeValue(), Integer.parseInt(subNode.getTextContent()));
252
			}
5729 amit.gupta 253
			if(SORTED_FACET_VALUE_MAP.containsKey(facetName)){
254
				List<String> orderedValues = SORTED_FACET_VALUE_MAP.get(facetName);
255
				Map<String, Integer> sortedMap = new LinkedHashMap<String, Integer>();
256
			    for (Iterator<String> it = orderedValues.iterator(); it.hasNext();) {
257
			    	String val = it.next();
258
			        if(facetValueCountMap.containsKey(val)) {
259
			        	sortedMap.put(val, facetValueCountMap.get(val));
260
			        }
261
			    }
262
			    facetMap.put(facetName, sortedMap);
263
			} else {
264
				facetMap.put(facetName, facetValueCountMap);
354 rajveer 265
			}
5729 amit.gupta 266
		}
545 rajveer 267
		this.numberOfResults  = this.getTotalResults();
517 rajveer 268
 
354 rajveer 269
		facetMap = removeUnwantedFacets(facetMap, numberOfResults);
270
		return facetMap;
5729 amit.gupta 271
	}
354 rajveer 272
 
790 vikas 273
	public List<String> getResultMap() {
274
		resultMap = new LinkedList<String>();
354 rajveer 275
 
276
		String resultDocsPath = "/response/result/doc";
277
 
278
 
279
		NodeList nodes = null;
280
		try {
281
			nodes = (NodeList) this.xpath.evaluate(resultDocsPath, this.inputSource, XPathConstants.NODESET);
282
		}
283
		catch (XPathExpressionException xpee) {
284
			return null;
285
		}
286
 
287
		if(nodes.getLength() == 0) {
288
			return null;
289
		}
290
 
291
		for(int i=0; i<nodes.getLength(); i++) {
292
			Node node = nodes.item(i);
293
			String docID = node.getFirstChild().getTextContent();
790 vikas 294
			resultMap.add(docID);	
354 rajveer 295
 		}
296
		return resultMap;
297
	}
298
 
299
	public HashMap<String, Double> getPriceStatsMap() {
300
		HashMap<String, Double> priceStatsMap = new HashMap<String, Double>();
301
 
3561 rajveer 302
		String resultDocsPath = "/response/lst[@name = 'stats']/lst[@name = 'stats_fields']/lst[@name = '" + priceFacetName + "']";
354 rajveer 303
 
304
 
305
		NodeList nodes = null;
306
		try {
307
			nodes = (NodeList) this.xpath.evaluate(resultDocsPath, this.inputSource, XPathConstants.NODESET);
308
		}
309
		catch (XPathExpressionException xpee) {
310
			return null;
311
		}
312
 
313
		if(nodes.getLength() == 0) {
314
			return null;
315
		}
316
 
317
		NodeList subNodes = nodes.item(0).getChildNodes();
318
 
319
		for(int i=0; i<subNodes.getLength(); i++) {
320
			Node node = subNodes.item(i);
321
 
322
			String parameter = node.getAttributes().getNamedItem("name").getNodeValue();
323
			String value = node.getTextContent();
324
			priceStatsMap.put(parameter, Double.parseDouble(value));	
325
 		}
326
		return priceStatsMap;
327
	}
328
 
329
	public HashMap<String,Integer> getRangeQueryResultMap() {
330
		HashMap<String, Integer> rangeQueryResultMap = new HashMap<String,Integer>();
331
 
332
		String resultDocsPath = "/response/lst[@name = 'facet_counts']/lst[@name = 'facet_queries']/int";
333
 
334
 
335
		NodeList nodes = null;
336
		try {
337
			nodes = (NodeList) this.xpath.evaluate(resultDocsPath, this.inputSource, XPathConstants.NODESET);
338
		}
339
		catch (XPathExpressionException xpee) {
340
			return null;
341
		}
342
 
343
		if(nodes.getLength() == 0) {
344
			return null;
345
		}
346
 
347
 
348
		for(int i=0; i<nodes.getLength(); i++) {
349
			Node node = nodes.item(i);
350
 
351
			String query = node.getAttributes().getNamedItem("name").getNodeValue();
352
			String docCount = node.getTextContent();
353
 
354
			rangeQueryResultMap.put(query,Integer.parseInt(docCount));	
355
 		}
356
		return rangeQueryResultMap;
357
 
358
	}
359
 
545 rajveer 360
	/**
361
	 * 
362
	 */
363
	public long getTotalResults(){
364
		String resultDocsPath = "/response/result";
365
		NodeList nodes = null;
366
		try {
367
			nodes = (NodeList) this.xpath.evaluate(resultDocsPath, this.inputSource, XPathConstants.NODESET);
368
		}
369
		catch (XPathExpressionException xpee) {
370
			return 0;
371
		}
372
 
373
		Node node = nodes.item(0);
374
 
375
		return Long.parseLong(node.getAttributes().getNamedItem("numFound").getNodeValue());
376
 
377
	}
354 rajveer 378
		/**
317 ashish 379
	 * 
380
	 * @return
381
	 */
382
	public long[] getResultEntityIDs() {
383
		String expression = "/response/result/doc/long";
384
 
385
		NodeList nodes = null;
386
		try {
387
			nodes = (NodeList) this.xpath.evaluate(expression, this.inputSource,
388
					XPathConstants.NODESET);
389
		} 
390
		catch(XPathExpressionException xpee) {
391
			return null;
392
		}
393
 
394
		if(nodes.getLength() == 0) {
395
			return null;
396
		}
397
 
398
		long[] values = new long[nodes.getLength()];
399
		for(int i=0; i<nodes.getLength(); i++) {
400
			Node node = nodes.item(i);
401
			String value = node.getTextContent();
402
			values[i] = Long.parseLong(value);
403
 		}
404
 
405
		return values;
406
	}
407
 
408
	/**
409
	 * 
410
	 * @return
411
	 */
412
	public String[] getResultCategoryNames() {
413
		String expression = "/response/lst/lst[@name = 'facet_fields']/";
414
		expression += "lst[@name = 'Category']/int/@name";
415
 
416
		NodeList nodes = null;
417
		try {
418
			nodes = (NodeList) this.xpath.evaluate(expression, 
419
				this.inputSource, XPathConstants.NODESET);
420
		}
421
		catch (XPathExpressionException xpee) {
422
			return null;
423
		}
424
 
425
		if(nodes.getLength() == 0) {
426
			return null;
427
		}
428
 
429
		String[] values = new String[nodes.getLength()];
430
		for(int i=0; i<nodes.getLength(); i++) {
431
			Node node = nodes.item(i);
432
			values[i] = node.getTextContent();
433
 		}
434
 
435
		return values;
436
	}
437
 
438
	/**
439
	 * 
440
	 * @return
441
	 */
442
	public int[] getResultCategoryCounts() {
443
		String expression = "/response/lst/lst[@name = 'facet_fields']/";
444
		expression += "lst[@name = 'Category']/int";
445
 
446
		NodeList nodes = null;
447
		try {
448
			nodes = (NodeList) this.xpath.evaluate(expression, 
449
				this.inputSource, XPathConstants.NODESET);
450
		}
451
		catch (XPathExpressionException xpee) {
452
			return null;
453
		}
454
 
455
		if(nodes.getLength() == 0) {
456
			return null;
457
		}
458
 
459
		int[] values = new int[nodes.getLength()];
460
		for(int i=0; i<nodes.getLength(); i++) {
461
			Node node = nodes.item(i);
462
			values[i] = Integer.parseInt(node.getTextContent());
463
 		}
464
 
465
		return values;
466
	}
467
 
468
	/**
469
	 * 
470
	 * @return
471
	 */
472
	public String[]  getResultEntityNames() {
473
		String expression = "/response/result/doc/str";
474
 
475
		NodeList nodes = null;
476
		try {
477
			nodes = (NodeList) this.xpath.evaluate(expression, this.inputSource,
478
					XPathConstants.NODESET);
479
		} 
480
		catch(XPathExpressionException xpee) {
481
			return null;
482
		}
483
 
484
		if(nodes.getLength() == 0) {
485
			return null;
486
		}
487
 
488
		String[] values = new String[nodes.getLength()];
489
		for(int i=0; i<nodes.getLength(); i++) {
490
			Node node = nodes.item(i);
491
			String value = node.getTextContent();
492
			values[i] = value;
493
 		}
494
 
495
		return values;
496
	}
497
 
498
	/**
499
	 * 
500
	 * @param facetDefinitionID
501
	 * @return
502
	 */
354 rajveer 503
	public String[] getFacetValues(String facetDefinitionID) {
317 ashish 504
		String expression = "/response/lst/lst[@name = 'facet_fields']/";
354 rajveer 505
		expression += "lst[@name = '"+ facetDefinitionID +"']/int/@name";
317 ashish 506
 
507
		NodeList nodes = null;
508
		try {
509
			nodes = (NodeList) this.xpath.evaluate(expression, 
510
				this.inputSource, XPathConstants.NODESET);
511
		}
512
		catch (XPathExpressionException xpee) {
513
			return null;
514
		}
515
 
516
		if(nodes.getLength() == 0) {
517
			return null;
518
		}
519
 
520
		String[] values = new String[nodes.getLength()];
521
		for(int i=0; i<nodes.getLength(); i++) {
522
			Node node = nodes.item(i);
523
			values[i] = node.getTextContent();
545 rajveer 524
		}
317 ashish 525
 
526
		return values;
527
	}
528
 
529
	/**
530
	 * 
531
	 * @param facetDefinitionID
532
	 * @return
533
	 */
354 rajveer 534
	public String[] getFacetCounts(String facetDefinitionID) {
317 ashish 535
		String expression = "/response/lst/lst[@name = 'facet_fields']/";
354 rajveer 536
		expression += "lst[@name = '" + facetDefinitionID + "']/int";
317 ashish 537
 
538
		NodeList nodes = null;
539
		try {
540
			nodes = (NodeList) this.xpath.evaluate(expression, 
541
				this.inputSource, XPathConstants.NODESET);
542
		}
543
		catch (XPathExpressionException xpee) {
544
			return null;
545
		}
546
 
547
		if(nodes.getLength() == 0) {
548
			return null;
549
		}
550
 
551
		String[] values = new String[nodes.getLength()];
552
		for(int i=0; i<nodes.getLength(); i++) {
553
			Node node = nodes.item(i);
554
			values[i] = node.getTextContent();
555
 		}
556
 
557
		return values;
558
	}
545 rajveer 559
 
560
	public static void main(String[] args){
561
		/*
562
    	// Hard coded for now
563
    	String[] facetDefIDs = new String[] {"F_50001", "F_50002", "F_50003", "F_50004", "F_50005", "F_50006", "F_50007", "F_50008", "F_50009"};
564
 
565
    	// Hard-coded for now
566
    	String[] facetLabels = new String[] {
567
	    	"Brand", "Price","Form Factor", "Carry In Pocket", "Cellular Technologies", 
568
	    	"Data Connectivity", "Camera Resolution", "Built-in Memory", 
569
	    	"Talk time"
570
    	};
571
 
572
		 */
573
    	String[] facetDefIDs = new String[] {"Category","F_50002","F_50001",  "F_50006", "F_50007" };
2147 chandransh 574
    	//String[] facetLabels = new String[] {"Category","Price", "Brand", "Data Connectivity", "Camera Resolution"	};
545 rajveer 575
 
576
 
577
    	String[] fqrys = {};
3561 rajveer 578
		SolrSearchService search = new SolrSearchService("nokia", fqrys, facetDefIDs, 0 , 20, null, null, 10000, null, -1);
545 rajveer 579
 
580
    	long[] entityIDs = search.getResultEntityIDs();
581
    	log.info("entityIDs=" + Arrays.toString(entityIDs));
582
 
583
    	String[] entityNames = search.getResultEntityNames();
584
    	log.info("entityNames=" + Arrays.toString(entityNames));
585
    	search.getFacetMap();
586
 
587
    	search.getResultMap();
588
    	search.getRangeQueryResultMap();
589
    	search.getPriceStatsMap();
590
    	search.getTotalResults();
591
       	for (int i=0; i<facetDefIDs.length; i++) {
592
       		search.getFacetCounts(facetDefIDs[i]);
593
       		search.getFacetValues(facetDefIDs[i]);
594
       	}
595
 
596
	}
6866 amit.gupta 597
 
598
 
599
    public static List<String> getAllMatches(String text) {
600
        List<String> matches = new ArrayList<String>();
601
        Matcher m = FACET_PATTERN.matcher(text);
602
        while(m.find()) {
603
            matches.add(m.group(1));
604
        }
605
        return matches;
606
    }
317 ashish 607
}