Blame | Last modification | View Log | RSS feed
package in.shop2020.web;import java.io.IOException;import java.util.ArrayList;import java.util.Date;import java.util.HashMap;import java.util.List;import java.util.Map;import javax.servlet.ServletException;import javax.servlet.http.HttpServlet;import javax.servlet.http.HttpServletRequest;import javax.servlet.http.HttpServletResponse;import org.json.JSONArray;import org.json.JSONException;import org.json.JSONObject;import com.google.appengine.api.datastore.DatastoreService;import com.google.appengine.api.datastore.DatastoreServiceFactory;import com.google.appengine.api.datastore.Entity;import com.google.appengine.api.datastore.FetchOptions;import com.google.appengine.api.datastore.Key;import com.google.appengine.api.datastore.KeyFactory;import com.google.appengine.api.datastore.PreparedQuery;import com.google.appengine.api.datastore.Query;import com.google.appengine.api.datastore.Query.FilterOperator;import com.google.appengine.api.datastore.Query.SortDirection;public class PriceComparisonServlet extends HttpServlet {private static final long serialVersionUID = 1L;private final String ENTITY_KIND_JOB = "ScrapeJob";private final String ENTITY_KIND_PHONE_PRICE = "PhonePrice";private DatastoreService ds = DatastoreServiceFactory.getDatastoreService();public enum JobStatus {UNASSIGNED(0), ASSIGNED(1), COMPLETED(2);private int code;private JobStatus(int code) {this.code = code;}public int getCode() {return this.code;}}private Entity createScrapeJob(String url, String source) {Entity scrapeJob = new Entity(ENTITY_KIND_JOB);scrapeJob.setProperty("url", url);scrapeJob.setProperty("source", source);scrapeJob.setProperty("status", JobStatus.UNASSIGNED.getCode());scrapeJob.setProperty("assigneeId", null);scrapeJob.setProperty("enqueuedOn", null);return scrapeJob;}private void assignScrapeJob(Entity scrapeJob, String assigneeId) {System.out.println("Assignee Id: " + assigneeId);scrapeJob.setProperty("status", JobStatus.ASSIGNED.getCode());scrapeJob.setProperty("assigneeId", assigneeId);ds.put(scrapeJob);}private void enqueueScrapeJob(Entity scrapeJob) {scrapeJob.setProperty("enqueuedOn", new Date());ds.put(scrapeJob);}private Entity assignJobToScraper(String assigneeId, String lastScrapedSource) {Query query = new Query(ENTITY_KIND_JOB);query.addFilter("status", FilterOperator.EQUAL, JobStatus.UNASSIGNED.getCode());query.addSort("enqueuedOn", SortDirection.ASCENDING);PreparedQuery pq = ds.prepare(query);FetchOptions fo = FetchOptions.Builder.withLimit(1);Entity job;if (pq.countEntities(fo) > 0) {job = pq.asList(fo).get(0);if (assigneeId == null) {System.out.println("Assignee Id is null");String newAssigneeId = Long.toString(job.getKey().getId());System.out.println("New Job Id & Assignee Id: " + newAssigneeId);assignScrapeJob(job, newAssigneeId);} else {System.out.println("Assignee Id is " + assigneeId);assignScrapeJob(job, assigneeId);}} else {job = null;}System.out.println("Job assigned:" + job);return job;}private Map<String, String> toMap(Entity job) {Map<String, String> jobMap = new HashMap<String, String>();if (job != null) {jobMap.put("id", Long.toString(job.getKey().getId()));jobMap.put("source", (String) job.getProperty("source"));jobMap.put("assigneeId", (String) job.getProperty("assigneeId"));jobMap.put("url", (String) job.getProperty("url"));} else {jobMap.put("id", null);jobMap.put("source", null);jobMap.put("assigneeId", null);jobMap.put("url", null);}return jobMap;}private int getTimeToWait() {return 2 + (int) (Math.random() * 2);}private String getPhoneNameKey(String phoneName, String source) {int bracketStartsAt = phoneName.indexOf('(');if(bracketStartsAt > -1) {phoneName = phoneName.substring(0, bracketStartsAt);}String phoneNameKey = phoneName.replaceAll("[^a-zA-Z0-9]", "");return phoneNameKey.toLowerCase();}private void removeJobFromQueue(String jobId) {Key k = KeyFactory.createKey(ENTITY_KIND_JOB, Long.parseLong(jobId));ds.delete(k);}private void savePhonePrices(JSONArray phonePrices, String source) {System.out.println("Saving Price Data");List<Entity> priceEntities = new ArrayList<Entity>();for (int i = 0; i < phonePrices.length(); i ++) {try {JSONObject phone = phonePrices.getJSONObject(i);Entity priceEntity = new Entity(ENTITY_KIND_PHONE_PRICE);priceEntity.setProperty("source", source);priceEntity.setProperty("name", phone.getString("name"));priceEntity.setProperty("price", phone.getString("price"));priceEntity.setProperty("in_stock", phone.getString("in_stock"));priceEntity.setProperty("url", phone.getString("product_url"));priceEntities.add(priceEntity);} catch (JSONException e) {e.printStackTrace();}}ds.put(priceEntities);}private void initJobQueue() {Entity job1 = createScrapeJob("http://www.flipkart.com/mobiles/all/", "flipkart");enqueueScrapeJob(job1);Entity job2 = createScrapeJob("http://www.letsbuy.com/mobile-phones-mobiles-c-254_88?perpage=192", "letsbuy");enqueueScrapeJob(job2);Entity job3 = createScrapeJob("http://www.infibeam.com/Mobiles/search", "infibeam");enqueueScrapeJob(job3);Entity job4 = createScrapeJob("http://www.homeshop18.com/gsm-handsets/category:3027/", "homeshop18");enqueueScrapeJob(job4);Entity job5 = createScrapeJob("http://www.flipkart.com/mobiles/tablet-20278", "flipkart");enqueueScrapeJob(job5);Entity job6 = createScrapeJob("http://www.letsbuy.com/mobile-phones-tablets-c-254_393?perpage=192", "letsbuy");enqueueScrapeJob(job6);Entity job7 = createScrapeJob("http://www.homeshop18.com/ipads-2f-tablets/category:8937/", "homeshop18");enqueueScrapeJob(job7);// Entity job8 = createScrapeJob("http://www.adexmart.com/modules/coremanager/modules/filtersearch/filtersearch.json.php?act=filter&ident=16&page=1&perpage=1000&orderby=newest&orderway=desc", "adexmart");// enqueueScrapeJob(job8);}public void doGet(HttpServletRequest req, HttpServletResponse resp) {String cmd = req.getParameter("cmd");System.out.println("Command recieved: " + cmd);Query q = new Query(ENTITY_KIND_PHONE_PRICE);PreparedQuery pq = ds.prepare(q);if (cmd != null) {if (cmd.equals("delall")) {System.out.println("Deleting all pricing");for (Entity pricing: pq.asIterable()) {ds.delete(pricing.getKey());}System.out.println("Deleting all jobs");q = new Query(ENTITY_KIND_JOB);pq = ds.prepare(q);for (Entity job: pq.asIterable()) {ds.delete(job.getKey());}} else if(cmd.equals("init")) {System.out.println("Initializing the job queue");initJobQueue();} else if (cmd.equals("getjson")) {List<Map<String, String>> entities = new ArrayList<Map<String,String>>();Map<String, String> details;for (Entity pricing: pq.asIterable()) {details = new HashMap<String, String>();details.put("source", (String) pricing.getProperty("source"));details.put("name", (String) pricing.getProperty("name"));details.put("price", (String) pricing.getProperty("price"));details.put("in_stock", (String) pricing.getProperty("in_stock"));details.put("url", (String) pricing.getProperty("url"));entities.add(details);}resp.setContentType("application/json");try {resp.getWriter().print(new JSONArray(entities));} catch (IOException e) {e.printStackTrace();}}}}public void doPost(HttpServletRequest req, HttpServletResponse resp) {String clientId = (String) req.getParameter("id");String jobId = (String) req.getParameter("job_id");String source = (String) req.getParameter("source");String phonePrices = (String) req.getParameter("phone_prices");String nextUrl = (String) req.getParameter("next_url");Entity job;System.out.println(clientId + " " + jobId + " " + source + " " + phonePrices);if(clientId.equals("None") || jobId.equals("None") || source.equals("None") || phonePrices.equals("None")) {System.out.println("Assigning first job");job = assignJobToScraper(null, null);} else {try {JSONObject phonePricesJSONObj = new JSONObject("{priceData: " + phonePrices + "}");// JSONArray priceJSONArray = new JSONArray(phonePrices);// System.out.println(priceJSONArray);savePhonePrices(phonePricesJSONObj.getJSONArray("priceData"), source);// savePhonePrices(priceJSONArray, source);removeJobFromQueue(jobId); // Removing executed job from the queue} catch (JSONException e) {e.printStackTrace();}System.out.println("Assigning next job");job = assignJobToScraper(clientId, source);}if(!nextUrl.equals("None") && !source.equals("None")) {Entity newJob = createScrapeJob(nextUrl, source);enqueueScrapeJob(newJob);}System.out.println("Assigned Job & all attributes set:" + job);Map<String, String> scraperJobMap = toMap(job);scraperJobMap.put("timetowait", Integer.toString(getTimeToWait()));System.out.println("final Map:" + scraperJobMap);try {resp.getWriter().print(new JSONObject(scraperJobMap));} catch (IOException e) {e.printStackTrace();}}}