Subversion Repositories SmartDukaan

Rev

Blame | Last modification | View Log | RSS feed

/**
 * 
 */
package in.shop2020.serving.services;


import in.shop2020.config.ConfigException;
import in.shop2020.serving.controllers.SearchController;
import in.shop2020.serving.utils.Utils;
import in.shop2020.thrift.clients.config.ConfigClient;

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.collections.ListUtils;
import org.apache.log4j.Logger;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;


/**
 * @author rajveer
 *
 */
public class SolrSearchService {
        /**
         * 
         */
        private static Logger log = Logger.getLogger(Class.class);
        private HashMap<String, Double> dynamicPriceMap = null;
        
        /**
         * 
         */
        public static final String SOLR_URL;
        
        private static final Pattern FACET_PATTERN = Pattern.compile("(?=(F_\\d{5}))");
        
        private static final Map<String, List<String>> SORTED_FACET_VALUE_MAP = Collections.unmodifiableMap(
                        new HashMap<String, List<String>>(){
                                /**
                                 * 
                                 */
                                private static final long serialVersionUID = 1L;
                                {
                                        put("F_50007", Arrays.asList("Upto 2 Mpx", "2 - 5 Mpx", "5 - 10 Mpx", "10 Mpx and above"));
                                        put("F_50024", Arrays.asList("Upto 10 Mpx", "10 - 12 Mpx", "12 - 15 Mpx", "15 - 18 Mpx", "18 Mpx and above"));
                                        put("F_50025", Arrays.asList("Upto 4x", "4 - 6x", "6 - 10x", "10 - 14x", "14 - 18x", "18x and above"));
                                        put("F_50026", Arrays.asList("Below 2 in.", "2 to 2.9 in.", "3 to 3.9 in.", "4 in. and above"));
                                        put("F_50032", Arrays.asList("Below 3 in.", "3 to 3.9 in.", "4 to 4.9 in.", "5 in. and above"));
                                        put("F_50027", Arrays.asList("Upto 10 Mpx", "10 - 15 Mpx", "15 - 20 Mpx", "20 Mpx and above"));
                                }
                });
        public static final Map<String, List<String>> CATEGORY_FACET_MAP = Collections.unmodifiableMap(
                        new HashMap<String, List<String>>(){
                                /**
                                 * 
                                 */
                                private static final long serialVersionUID = 1L;
                                //Data Connectivity, Camera Resolution,Operating System,Screen Size
                                List<String> mobileFacets = Arrays.asList("F_50006", "F_50007", "F_50031", "F_50032");
                                //Camera Resolution,Operating System,Screen Size
                                List<String> tabletFacets = Arrays.asList("F_50036", "F_50035", "F_50034");
                                //Operating System, Processor, Storage, RAM, Screen Size
                                List<String> laptopFacets = Arrays.asList("F_50013", "F_50014", "F_50015","F_50017", "F_50033");
                                //Resolution, Optical Zoon, Display Size
                                List<String> compactCameras = Arrays.asList("F_50024", "F_50025", "F_50026");
                                //Display Size
                                List<String> dslrCameras = Arrays.asList("F_50026");
                                //Capacity, Class
                                List<String> memoryCards = Arrays.asList("F_50018", "F_50019");
                                //Capacity
                                List<String> penDrives = Arrays.asList("F_50020");
                                //Capacity, Type, Interface
                                List<String> externalHardDisks = Arrays.asList("F_50021", "F_50022", "F_50023");
                                {                                       
                                        put(SearchController.getCategoryLabel(10001l), mobileFacets);
                                        put(SearchController.getCategoryLabel(10006l), mobileFacets);
                                        put(SearchController.getCategoryLabel(10009l), tabletFacets);
                                        put(SearchController.getCategoryLabel(10010l), tabletFacets);
                                        put(SearchController.getCategoryLabel(10013l), memoryCards);
                                        put(SearchController.getCategoryLabel(10017l), penDrives);
                                        put(SearchController.getCategoryLabel(10049l), laptopFacets);
                                        put(SearchController.getCategoryLabel(10050l), laptopFacets);
                                        put(SearchController.getCategoryLabel(10073l), externalHardDisks);
                                        put(SearchController.getCategoryLabel(11002l), compactCameras);
                                        put(SearchController.getCategoryLabel(11003l), dslrCameras);
                                }
                        });
        static {
                String solr_url = null;
                try {
                        solr_url = ConfigClient.getClient().get("solr_url");
                }catch(ConfigException cex){
                    log.error("Unable to get the solr URL from the config server. Setting the default value.", cex);
                        solr_url = "http://localhost:8983/solr/select/";
                }
                SOLR_URL = solr_url;
        }
        
        /**
         * 
         */
        private XPath xpath;
        
        /**
         * 
         */
        private InputSource inputSource;
        
        Map<String,Map<String,Integer>> facetMap;
        
        private String  query;
        
        List<String> resultMap;
        
        long numberOfResults=0;
        
        String priceFacetName = "F_50002";
        
        List<String> filtrableFacets;
        
        /**
         * 
         * @param query
         * @param facetDefinitionIDs
         */
        public SolrSearchService(String query, String[] facetqueries, long start, long rows,  Double minPrice, Double maxPrice, String sortOrder, long sourceId) {

                this.query = query;
                
                List<String> facetsQueried = new ArrayList<String>();
                if(sourceId != -1){
                        priceFacetName = priceFacetName + "_" + sourceId;
                }
                
                setFilterableFacets(facetqueries);
                
                
                this.xpath = XPathFactory.newInstance().newXPath();
                
                query = query.trim().replaceAll("\\s+", " ");
        log.info("query=" + query);
        
                String uri = SOLR_URL + "?wt=xml&q=" + this.query;
                
                uri += "&stats=on&stats.field=" + priceFacetName;
                
                if(sortOrder != null){
                        //replace the price facet name, so that it can pick price for the source.
                        sortOrder = sortOrder.replace("F_50002", priceFacetName);
                        uri += "&sort=" + sortOrder;
                }
                
                if(facetqueries != null) {
                        //sorting will guarantee all similar facets together so that we can assume or between all similar items without fail.
                        Arrays.sort(facetqueries);
                        String fq="";
                        for(int i=0; i<facetqueries.length; i++) {
                                String value = "";
                                String[] tokens = facetqueries[i].split(":");
                                try {
                                        value = URLEncoder.encode(tokens[1], "UTF-8");
                                } catch (UnsupportedEncodingException e) {
                                        // TODO Auto-generated catch block
                                        e.printStackTrace();
                                }
                                if(facetsQueried.contains(tokens[0])) {
                                        uri += " OR  " + tokens[0] + ":\"" + value + "\"";
                                        
                                } else {
                                        fq = "{!tag=dt" + facetsQueried.size() + "}";
                                        facetsQueried.add(tokens[0]);
                                        fq +=  tokens[0] + ":\"" + value + "\"";
                                        uri += "&fq=" + fq;
                                }
                        }
                }
                String minString = "0";
                String maxString = "*";  
                if(minPrice != null || maxPrice != null){
                        try {
                                dynamicPriceMap = getPriceStatsMap(new InputSource(uri)); 
                        } catch (Exception e){
                                e.printStackTrace();
                        }
                        if(minPrice != null){
                                minString = minPrice.toString();
                        }
                        if(maxPrice != null){
                                maxString = maxPrice.toString();
                        }
                }
                uri += "&fq=" + priceFacetName + ":["+  minString + " " + maxString + "]";
                uri += "&fl=ID,Name&facet=true&start=" + start + "&rows=" + rows + "&facet.mincount=1";
                for(String facetDefinitionID : filtrableFacets) {
                                if(facetsQueried.contains(facetDefinitionID)){
                                        uri += "&facet.field={!ex=dt" + facetsQueried.indexOf(facetDefinitionID)+ "}"+ facetDefinitionID; 
                                } else {
                                        uri += "&facet.field=" + facetDefinitionID;
                                }
                }
                log.info("uri=" + uri);
                
                this.inputSource = new InputSource(uri);

                this.facetMap = getFacetMap();
        }

        @SuppressWarnings("unchecked")
        private void setFilterableFacets(String[] facetqueries) {
                List<String> queriedFacets = getAllMatches(this.query);
                if(facetqueries != null) {
                        String facetString = Arrays.toString(facetqueries);
                        List<String> filteredFacets = getAllMatches(facetString);
                        if(filteredFacets.contains("F_50011")){
                                for(String facetQuery : facetqueries) {
                                        if(facetQuery.contains("F_50011")){
                                                String facetVal = facetQuery.split(":")[1];
                                                if(CATEGORY_FACET_MAP.containsKey(facetVal)){
                                                        this.filtrableFacets = ListUtils.sum(Utils.rootfacetDefIDs, CATEGORY_FACET_MAP.get(facetVal));
                                                        return;
                                                } else {
                                                        break;
                                                }
                                        }
                                }
                        }
                        if(filteredFacets.contains("F_50010")){
                                for(String facetQuery : facetqueries) {
                                        if(facetQuery.contains("F_50010")){
                                                String facetVal = facetQuery.split(":")[1];
                                                if(CATEGORY_FACET_MAP.containsKey(facetVal)){
                                                        this.filtrableFacets = ListUtils.sum(Utils.rootfacetDefIDs, CATEGORY_FACET_MAP.get(facetVal));
                                                        return;
                                                } else {
                                                        break;
                                                }
                                        }
                                }
                        }
                }
                if(queriedFacets.contains("F_50011")) {
                        String facetVal = this.query.split("F_50011:")[1];
                        if (facetVal.contains(" OR ")) {
                                this.filtrableFacets = Utils.rootfacetDefIDs;
                                return;
                        } else if(CATEGORY_FACET_MAP.containsKey(facetVal)){
                                facetVal = facetVal.split("&")[0].replaceAll("[\"()]", "");
                                if(CATEGORY_FACET_MAP.containsKey(facetVal)){
                                        this.filtrableFacets = ListUtils.sum(Utils.rootfacetDefIDs, CATEGORY_FACET_MAP.get(facetVal));
                                        return;
                                }
                        } 
                }
                if(queriedFacets.contains("F_50010")){
                        String facetVal = this.query.split("F_50010:")[1];
                        if (facetVal.contains(" OR ")) {
                                this.filtrableFacets = Utils.rootfacetDefIDs;
                                return;
                        } else if(CATEGORY_FACET_MAP.containsKey(facetVal)){
                                facetVal = facetVal.split("&")[0].replaceAll("[\"()]", "");
                                if(CATEGORY_FACET_MAP.containsKey(facetVal)){
                                        this.filtrableFacets = ListUtils.sum(Utils.rootfacetDefIDs, CATEGORY_FACET_MAP.get(facetVal));
                                        return;
                                }
                        }
                }
                this.filtrableFacets = Utils.rootfacetDefIDs;
        }

        public List<String> getFilterableFacets() {
                return this.filtrableFacets; 
        }

        public Map<String,Map<String,Integer>> removeUnwantedFacets(Map<String,Map<String,Integer>> facetMap, long numberOfResults){
                
                Set<String> facetsInQuery = new HashSet<String>(getAllMatches(this.query));
                Map<String,Map<String,Integer>> tempFacets = new TreeMap<String, Map<String,Integer>>(); 
                for(String facet : facetMap.keySet()){
                        if(facetMap.get(facet).size() > 0 && !facetsInQuery.contains(facet)){
                                Map<String,Integer> tempMap = new LinkedHashMap<String, Integer>();
                                
                                for(String facetValueName : facetMap.get(facet).keySet()){
                                        //if(facetMap.get(facet).get(facetValueName) != 0 && facetMap.get(facet).get(facetValueName) != numberOfResults){
                                                tempMap.put(facetValueName, facetMap.get(facet).get(facetValueName));
                                        //}
                                }
                                if(!tempMap.isEmpty()){
                                        tempFacets.put(facet, tempMap);
                                }
                        }       
                }
                /*if(tempFacets.containsKey("F_50010")){
                        tempFacets.remove("F_50011");
                }*/

                return tempFacets;
        }
        
        public Map<String,Integer> getFacetDetails(String facetName){
                if(facetMap != null){
                        return facetMap.get(facetName);
                }else{
                        return null;
                }
        }
        
        public Map<String,Map<String,Integer>> getFacetMap() {
                facetMap = new TreeMap<String,Map<String,Integer>>();
                
                String facetNamePath = "/response/lst/lst[@name = 'facet_fields']/lst";
                
                NodeList nodes = null;
                try {
                        nodes = (NodeList) this.xpath.evaluate(facetNamePath, this.inputSource, XPathConstants.NODESET);
                }
                catch (XPathExpressionException xpee) {
                        return null;
                }
                
                if(nodes.getLength() == 0) {
                        return null;
                }
                
                NodeList subNodes = null;
                
                for(int i=0; i<nodes.getLength(); i++) {
                        Node node = nodes.item(i);
                        String facetName = node.getAttributes().getNamedItem("name").getNodeValue();
                        subNodes = node.getChildNodes();
                        Map<String,Integer> facetValueCountMap = new LinkedHashMap<String,Integer>();
                        for(int j=0; j<subNodes.getLength(); j++) {
                                Node subNode = subNodes.item(j);
                                facetValueCountMap.put(subNode.getAttributes().getNamedItem("name").getNodeValue(), Integer.parseInt(subNode.getTextContent()));
                        }
                        if(SORTED_FACET_VALUE_MAP.containsKey(facetName)){
                                List<String> orderedValues = SORTED_FACET_VALUE_MAP.get(facetName);
                                Map<String, Integer> sortedMap = new LinkedHashMap<String, Integer>();
                            for (Iterator<String> it = orderedValues.iterator(); it.hasNext();) {
                                String val = it.next();
                                if(facetValueCountMap.containsKey(val)) {
                                        sortedMap.put(val, facetValueCountMap.get(val));
                                }
                            }
                            facetMap.put(facetName, sortedMap);
                        } else {
                                facetMap.put(facetName, facetValueCountMap);
                        }
                }
                this.numberOfResults  = this.getTotalResults();
                
                facetMap = removeUnwantedFacets(facetMap, numberOfResults);
                return facetMap;
        }
        
        public List<String> getResultMap() {
                resultMap = new LinkedList<String>();

                String resultDocsPath = "/response/result/doc";
                
                
                NodeList nodes = null;
                try {
                        nodes = (NodeList) this.xpath.evaluate(resultDocsPath, this.inputSource, XPathConstants.NODESET);
                }
                catch (XPathExpressionException xpee) {
                        return null;
                }
                
                if(nodes.getLength() == 0) {
                        return null;
                }
                
                for(int i=0; i<nodes.getLength(); i++) {
                        Node node = nodes.item(i);
                        String docID = node.getFirstChild().getTextContent();
                        resultMap.add(docID);   
                }
                return resultMap;
        }

        public HashMap<String, Double> getPriceStatsMap() {
                return this.getPriceStatsMap(this.inputSource);
        }
        
        public HashMap<String, Double> getPriceStatsMap(InputSource inputSource) {
                HashMap<String, Double> priceStatsMap = new HashMap<String, Double>();

                String resultDocsPath = "/response/lst[@name = 'stats']/lst[@name = 'stats_fields']/lst[@name = '" + priceFacetName + "']";
                
                
                NodeList nodes = null;
                try {
                        nodes = (NodeList) this.xpath.evaluate(resultDocsPath, inputSource, XPathConstants.NODESET);
                }
                catch (XPathExpressionException xpee) {
                        return null;
                }
                
                if(nodes.getLength() == 0) {
                        return null;
                }
                
                NodeList subNodes = nodes.item(0).getChildNodes();
                
                for(int i=0; i<subNodes.getLength(); i++) {
                        Node node = subNodes.item(i);
                        
                        String parameter = node.getAttributes().getNamedItem("name").getNodeValue();
                        String value = node.getTextContent();
                        priceStatsMap.put(parameter, Double.parseDouble(value));        
                }
                return priceStatsMap;
        }

        public HashMap<String,Integer> getRangeQueryResultMap() {
                HashMap<String, Integer> rangeQueryResultMap = new HashMap<String,Integer>();

                String resultDocsPath = "/response/lst[@name = 'facet_counts']/lst[@name = 'facet_queries']/int";
                
                
                NodeList nodes = null;
                try {
                        nodes = (NodeList) this.xpath.evaluate(resultDocsPath, this.inputSource, XPathConstants.NODESET);
                }
                catch (XPathExpressionException xpee) {
                        return null;
                }
                
                if(nodes.getLength() == 0) {
                        return null;
                }
                
                
                for(int i=0; i<nodes.getLength(); i++) {
                        Node node = nodes.item(i);

                        String query = node.getAttributes().getNamedItem("name").getNodeValue();
                        String docCount = node.getTextContent();

                        rangeQueryResultMap.put(query,Integer.parseInt(docCount));      
                }
                return rangeQueryResultMap;
                
        }

        /**
         * 
         */
        public long getTotalResults(){
                String resultDocsPath = "/response/result";
                NodeList nodes = null;
                try {
                        nodes = (NodeList) this.xpath.evaluate(resultDocsPath, this.inputSource, XPathConstants.NODESET);
                }
                catch (XPathExpressionException xpee) {
                        return 0;
                }
                
                Node node = nodes.item(0);

                return Long.parseLong(node.getAttributes().getNamedItem("numFound").getNodeValue());
                
        }
                /**
         * 
         * @return
         */
        public long[] getResultEntityIDs() {
                String expression = "/response/result/doc/long";
                
                NodeList nodes = null;
                try {
                        nodes = (NodeList) this.xpath.evaluate(expression, this.inputSource,
                                        XPathConstants.NODESET);
                } 
                catch(XPathExpressionException xpee) {
                        return null;
                }
                
                if(nodes.getLength() == 0) {
                        return null;
                }
                
                long[] values = new long[nodes.getLength()];
                for(int i=0; i<nodes.getLength(); i++) {
                        Node node = nodes.item(i);
                        String value = node.getTextContent();
                        values[i] = Long.parseLong(value);
                }
                
                return values;
        }
        
        /**
         * 
         * @return
         */
        public String[] getResultCategoryNames() {
                String expression = "/response/lst/lst[@name = 'facet_fields']/";
                expression += "lst[@name = 'Category']/int/@name";
                
                NodeList nodes = null;
                try {
                        nodes = (NodeList) this.xpath.evaluate(expression, 
                                this.inputSource, XPathConstants.NODESET);
                }
                catch (XPathExpressionException xpee) {
                        return null;
                }
                
                if(nodes.getLength() == 0) {
                        return null;
                }
                
                String[] values = new String[nodes.getLength()];
                for(int i=0; i<nodes.getLength(); i++) {
                        Node node = nodes.item(i);
                        values[i] = node.getTextContent();
                }
                
                return values;
        }
        
        /**
         * 
         * @return
         */
        public int[] getResultCategoryCounts() {
                String expression = "/response/lst/lst[@name = 'facet_fields']/";
                expression += "lst[@name = 'Category']/int";
                
                NodeList nodes = null;
                try {
                        nodes = (NodeList) this.xpath.evaluate(expression, 
                                this.inputSource, XPathConstants.NODESET);
                }
                catch (XPathExpressionException xpee) {
                        return null;
                }
                
                if(nodes.getLength() == 0) {
                        return null;
                }
                
                int[] values = new int[nodes.getLength()];
                for(int i=0; i<nodes.getLength(); i++) {
                        Node node = nodes.item(i);
                        values[i] = Integer.parseInt(node.getTextContent());
                }
                
                return values;
        }
        
        /**
         * 
         * @return
         */
        public String[]  getResultEntityNames() {
                String expression = "/response/result/doc/str";
                
                NodeList nodes = null;
                try {
                        nodes = (NodeList) this.xpath.evaluate(expression, this.inputSource,
                                        XPathConstants.NODESET);
                } 
                catch(XPathExpressionException xpee) {
                        return null;
                }
                
                if(nodes.getLength() == 0) {
                        return null;
                }
                
                String[] values = new String[nodes.getLength()];
                for(int i=0; i<nodes.getLength(); i++) {
                        Node node = nodes.item(i);
                        String value = node.getTextContent();
                        values[i] = value;
                }
                
                return values;
        }

        /**
         * 
         * @param facetDefinitionID
         * @return
         */
        public String[] getFacetValues(String facetDefinitionID) {
                String expression = "/response/lst/lst[@name = 'facet_fields']/";
                expression += "lst[@name = '"+ facetDefinitionID +"']/int/@name";
                
                NodeList nodes = null;
                try {
                        nodes = (NodeList) this.xpath.evaluate(expression, 
                                this.inputSource, XPathConstants.NODESET);
                }
                catch (XPathExpressionException xpee) {
                        return null;
                }
                
                if(nodes.getLength() == 0) {
                        return null;
                }
                
                String[] values = new String[nodes.getLength()];
                for(int i=0; i<nodes.getLength(); i++) {
                        Node node = nodes.item(i);
                        values[i] = node.getTextContent();
                }
                
                return values;
        }
        
        /**
         * 
         * @param facetDefinitionID
         * @return
         */
        public String[] getFacetCounts(String facetDefinitionID) {
                String expression = "/response/lst/lst[@name = 'facet_fields']/";
                expression += "lst[@name = '" + facetDefinitionID + "']/int";
                
                NodeList nodes = null;
                try {
                        nodes = (NodeList) this.xpath.evaluate(expression, 
                                this.inputSource, XPathConstants.NODESET);
                }
                catch (XPathExpressionException xpee) {
                        return null;
                }
                
                if(nodes.getLength() == 0) {
                        return null;
                }
                
                String[] values = new String[nodes.getLength()];
                for(int i=0; i<nodes.getLength(); i++) {
                        Node node = nodes.item(i);
                        values[i] = node.getTextContent();
                }
                
                return values;
        }

        public static void main(String[] args){
                /*
        // Hard coded for now
        String[] facetDefIDs = new String[] {"F_50001", "F_50002", "F_50003", "F_50004", "F_50005", "F_50006", "F_50007", "F_50008", "F_50009"};

        // Hard-coded for now
        String[] facetLabels = new String[] {
                "Brand", "Price","Form Factor", "Carry In Pocket", "Cellular Technologies", 
                "Data Connectivity", "Camera Resolution", "Built-in Memory", 
                "Talk time"
        };

                 */
        String[] facetDefIDs = new String[] {"Category","F_50002","F_50001",  "F_50006", "F_50007" };
        //String[] facetLabels = new String[] {"Category","Price", "Brand", "Data Connectivity", "Camera Resolution"    };

        
        String[] fqrys = {};
                SolrSearchService search = new SolrSearchService("nokia", fqrys, 0 , 20, null, null, null, -1);
        
        long[] entityIDs = search.getResultEntityIDs();
        log.info("entityIDs=" + Arrays.toString(entityIDs));
        
        String[] entityNames = search.getResultEntityNames();
        log.info("entityNames=" + Arrays.toString(entityNames));
        search.getFacetMap();
        
        search.getResultMap();
        search.getRangeQueryResultMap();
        search.getPriceStatsMap(new InputSource());
        search.getTotalResults();
        for (int i=0; i<facetDefIDs.length; i++) {
                search.getFacetCounts(facetDefIDs[i]);
                search.getFacetValues(facetDefIDs[i]);
        }
        
        }
        

    public static List<String> getAllMatches(String text) {
        List<String> matches = new ArrayList<String>();
        Matcher m = FACET_PATTERN.matcher(text);
        while(m.find()) {
            matches.add(m.group(1));
        }
        return matches;
    }
    
    public Map<String, Double> getDynamicPriceMap() {
        return this.dynamicPriceMap;
    }
    
}