Rev 51 | Blame | Last modification | View Log | RSS feed
/****/package in.shop2020.metamodel.util;import in.shop2020.metamodel.core.Bullet;import in.shop2020.metamodel.core.CompositeDataObject;import in.shop2020.metamodel.core.Entity;import in.shop2020.metamodel.core.EnumDataObject;import in.shop2020.metamodel.core.Feature;import in.shop2020.metamodel.core.FreeformContent;import in.shop2020.metamodel.core.PrimitiveDataObject;import in.shop2020.metamodel.definitions.BulletDefinition;import in.shop2020.metamodel.definitions.Catalog;import in.shop2020.metamodel.definitions.CompositeDefinition;import in.shop2020.metamodel.definitions.CompositePartDefinition;import in.shop2020.metamodel.definitions.DatatypeDefinition;import in.shop2020.metamodel.definitions.DefinitionsContainer;import in.shop2020.metamodel.definitions.EditorialImportance;import in.shop2020.metamodel.definitions.EntityContainer;import in.shop2020.metamodel.definitions.EnumDefinition;import in.shop2020.metamodel.definitions.FeatureDefinition;import in.shop2020.metamodel.definitions.SlideDefinition;import in.shop2020.metamodel.definitions.Unit;import in.shop2020.util.Utils;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;import org.apache.commons.lang.ArrayUtils;import org.apache.commons.lang.StringUtils;import org.apache.poi.hslf.HSLFSlideShow;import org.apache.poi.hslf.extractor.PowerPointExtractor;import org.apache.poi.hslf.model.Slide;import org.apache.poi.hslf.model.TextRun;import org.apache.poi.hslf.usermodel.RichTextRun;import org.apache.poi.hslf.usermodel.SlideShow;/*** Command line utility to convert MS PPT into shop2020 content model objects.* It can be used to** - Validate PPT file* - Import PPT file* - Show Java objects from already imported PPT*** HSLF - Horrible slide show format (POI API term)** @author naveen**/public class HSLF {// LOCAL/*** Path to where PPT files will be picked*/public static final String CONTENT_SRC_PPT_PATH ="/home/naveen/workspace/eclipse/content/ppt/";/** Validation toolpublic static final String CONTENT_SRC_PPT_PATH = "./ppt/";*//*** Path where exported HTML files are kept*/public static final String CONTENT_SRC_HTML_PATH ="/home/naveen/workspace/eclipse/content/html/";/****/public static final String CONTENT_SRC_XML_PATH ="/home/naveen/workspace/eclipse/content/xml/";/****/public static final int BLUE = -16776961;/****/public static final int PURPLE = -6749953;/****/public static final int GREEN = -16711936;/****/public static final int ORANGE = -26368;// LOCAL/****/public static final String CONTENT_DB_PATH ="/home/naveen/workspace/eclipse/db/entities/";/** Validation toolpublic static final String CONTENT_DB_PATH ="./db/entities/";*//** WEBpublic static final String CONTENT_DB_PATH ="/var/lib/tomcat6/webapps/shop2020/db/entities/";*//****/private List<String> slideNames = new ArrayList<String>();/****/private Map<String, List<String>> slideNameChildrenSlideNames =new HashMap<String, List<String>>();/****/private Map<String, List<String>> slideNameFeatureNames =new HashMap<String, List<String>>();/****/private Map<String, Long> slideNameSlideDefID =new HashMap<String, Long>();/****/private Map<String, Long> slideNameFeatureNameFeatureDefID =new HashMap<String, Long>();/****/private Map<String, List<String>> slideNameFeatureNameBulletTexts =new HashMap<String, List<String>>();/****/private Map<String, List<Bullet>> slideNameFeatureNameBullets =new HashMap<String, List<Bullet>>();/****/private Map<String, List<String>> containerFCCs =new HashMap<String, List<String>>();/****/private long categoryID;/****/private String srcFile;/****/private String dbFile;/****/private String introductionTitle;/****/private String introductionText;/*** Usage: HSLF [validate|import|show] [{Entity ID}|{Category ID} {PPT file name}]** @param args*/public static void main(String[] args) throws Exception {String[] commands = new String[] {"validate", "import", "show"};String usage = "Usage: HSLF ["+ StringUtils.join(commands, "|") +"] [{Entity ID}|{Category ID} {PPT file name}]\n";if(args.length < 2) {System.out.println(usage);System.exit(-1);}String inputCommand = args[0];if(!ArrayUtils.contains(commands, inputCommand)) {System.out.println(usage);System.exit(-1);}if (inputCommand.equals("validate")) {String inputCategoryID = args[1];if(args.length < 3) {System.out.println(usage);System.exit(-1);}String inputFilename = args[2];long categoryID = new Long(inputCategoryID).longValue();HSLF hslf = new HSLF(categoryID, inputFilename);hslf.validateEntity();System.exit(0);}if (inputCommand.equals("import")) {String inputCategoryID = args[1];if(args.length < 3) {System.out.println(usage);System.exit(-1);}String inputFilename = args[2];long categoryID = new Long(inputCategoryID).longValue();HSLF hslf = new HSLF(categoryID, inputFilename);hslf.importEntity();System.exit(0);}if (inputCommand.equals("show")) {String entityID = args[1];HSLF hslf = new HSLF();hslf.showEntity(new Long(entityID).longValue());System.exit(0);}}/****/public HSLF() {}/**** @param categoryID* @param fileName*/public HSLF(long categoryID, String fileName) {this.categoryID = categoryID;this.srcFile = CONTENT_SRC_PPT_PATH + fileName + ".ppt";this.dbFile = CONTENT_DB_PATH + "entities" + ".ser";}/**** @param entityID* @throws Exception*/public void showEntity(long entityID) throws Exception {EntityContainer entContainer =Catalog.getInstance().getEntityContainer();/*Entity entity = entContainer.getEntity(entityID);Utils.info("Entity=" + entity);*/ExpandedEntity expandedEntity =entContainer.getExpandedEntity(entityID);Utils.info("expandedEntity=" + expandedEntity);}/**** @throws Exception*/public void validateEntity() throws Exception {HSLFSlideShow hslfSS = new HSLFSlideShow(this.srcFile);SlideShow ss = new SlideShow(hslfSS);Slide[] ssarr = ss.getSlides();// Introduction pagethis.processIntroductionSlide(ssarr[0]);// Rest of the content pagefor(int i=1; i < ssarr.length; i++) {TextRun[] trs = ssarr[i].getTextRuns();Utils.info("trs.length=" + trs.length);/* DEBUG ONLYfor(int j=0;j<trs.length;j++) {Utils.info("trs["+ j +"].getRawText=" + trs[j].getRawText());Utils.info("trs[" + j + "].getRichTextRuns().length=" +trs[j].getRichTextRuns().length);}*/String title = StringUtils.lowerCase(StringUtils.strip(StringUtils.trim(ssarr[i].getTitle())));this.slideNames.add(title);Utils.info("title=" + title);// Hack to get to TextRun where bullets are!// Assume second textrun will have all the contentTextRun contentTextRun = trs[1];RichTextRun[] richTextRuns = contentTextRun.getRichTextRuns();//Utils.info("richTextRuns.length=" + richTextRuns.length);String rawContent = contentTextRun.getRawText();Utils.info("rawContent=" + rawContent);// richTextRuns.length == 1if(title.equalsIgnoreCase(StringUtils.strip(rawContent)) &&trs.length > 2) {contentTextRun = trs[2];richTextRuns = contentTextRun.getRichTextRuns();}Utils.info("contentTextRun.getRawText=" +contentTextRun.getRawText());int startIndex = 0;int indent = 1;Map<String, Object[]> info = getContents(richTextRuns, startIndex,indent);Utils.info(info);//System.exit(0);drilldown(richTextRuns, info, indent, title);}//System.exit(0);Utils.info("this.slideNames=" + this.slideNames);Utils.info("this.slideNameChildrenSlideNames=" +this.slideNameChildrenSlideNames);Utils.info("this.slideNameFeatureNames=" +this.slideNameFeatureNames);Utils.info("this.slideNameFeatureNameBulletTexts=" +this.slideNameFeatureNameBulletTexts);Utils.info("this.containerFCCs=" + this.containerFCCs);// Start validation and import// Validate slidesthis.processSlides();// Collect Slide Definition IDsthis.collectSlideDefinitionIDs();Utils.info("this.slideNameSlideDefID=" +this.slideNameSlideDefID);// Validate featuresfor(String slideName : this.slideNames) {this.processFeatures(slideName, slideName);}// Collect Feature Definition IDsthis.collectAllFeatureDefinitionIDs();Utils.info("this.slideNameFeatureNameFeatureDefID=" +this.slideNameFeatureNameFeatureDefID);DefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();// Validate bulletsfor(String slideName : this.slideNames) {Long slideDefID = this.slideNameSlideDefID.get(slideName);SlideDefinition slideDef =defs.getSlideDefinition(slideDefID.longValue());this.processBullets(slideName, slideDef.getLabel());}Utils.info("this.slideNameFeatureNameBullets=" +this.slideNameFeatureNameBullets);}/**** @param slideName* @param displayTextPrefix* @throws Exception*/private void processBullets(String slideName, String displayTextPrefix)throws Exception {Utils.info("Process bullets for " + slideName);DefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();List<String> featureLabels =this.slideNameFeatureNames.get(slideName);// Feature-less slideif(featureLabels != null) {for(String featureLabel : featureLabels) {String key = slideName + "_" + featureLabel;Utils.info("key=" + key);List<String> bulletTexts =this.slideNameFeatureNameBulletTexts.get(key);// Bullet-less featureif(bulletTexts == null) {continue;}Long featureDefID =this.slideNameFeatureNameFeatureDefID.get(key);FeatureDefinition featureDef =defs.getFeatureDefinition(featureDefID.longValue());String crumb = displayTextPrefix + " > " +featureDef.getLabel();String prefix = slideName + "_" + featureLabel;for(String bulletText : bulletTexts) {Bullet bullet =this.processBulletText(bulletText, featureDef, crumb,prefix);// Can't be imported has errors, skipif(bullet == null) {continue;}List<Bullet> bullets =this.slideNameFeatureNameBullets.get(key);if(bullets == null) {bullets = new ArrayList<Bullet>();this.slideNameFeatureNameBullets.put(key, bullets);}bullets.add(bullet);}}}// Process bullets of children slidesList<String> childrenSlideNames =this.slideNameChildrenSlideNames.get(slideName);Utils.info("childrenSlideNames=" + childrenSlideNames);if(childrenSlideNames != null) {for(String childSlideName : childrenSlideNames) {String fullSlideName = slideName + "_" + childSlideName;Utils.info("fullSlideName=" + fullSlideName);Long slideDefID =this.slideNameSlideDefID.get(fullSlideName);Utils.info("slideDefID=" + slideDefID);if(slideDefID == null) {Utils.severe("Unexpected slide " + fullSlideName);continue;}SlideDefinition childSlideDef =defs.getSlideDefinition(slideDefID.longValue());String newDisplayTextPrefix = displayTextPrefix + " > " +childSlideDef.getLabel();this.processBullets(fullSlideName, newDisplayTextPrefix);}}}/**** @throws Exception*/public void importEntity() throws Exception {// Parse and Validatethis.validateEntity();// Construct Content ModelEntity entity = this.contructEntityObject();Utils.info("entity="+entity);EntityContainer entContainer =Catalog.getInstance().getEntityContainer();entContainer.addEntity(entity);// RE-VISIT// Store it backDBUtils.store(entContainer.getEntities(), this.dbFile);// Store the index separatelyString entitiesbycategoryDBFile = HSLF.CONTENT_DB_PATH +"entitiesbycategory" + ".ser";DBUtils.store(entContainer.getEntitiesbyCategory(),entitiesbycategoryDBFile);}/**** @param hslfSlide*/private void processIntroductionSlide(Slide hslfSlide) {TextRun[] trs = hslfSlide.getTextRuns();StringBuffer sbTitle = new StringBuffer();StringBuffer sbText = new StringBuffer();for(int i=0;i<trs.length;i++) {RichTextRun[] rtrs = trs[i].getRichTextRuns();for(int j=0;j<rtrs.length;j++) {int rgb = rtrs[j].getFontColor().getRGB();if(rgb == HSLF.ORANGE) {sbTitle.append(rtrs[j].getRawText());}else if(rgb == HSLF.PURPLE) {sbText.append(rtrs[j].getRawText());}}}this.introductionTitle = sbTitle.toString();this.introductionText = sbText.toString();Utils.info("this.introductionTitle=" + this.introductionTitle);Utils.info("this.introductionText=" + this.introductionText);}/**** @return Entity* @throws Exception*/private Entity contructEntityObject() throws Exception {SequenceGenerator sg = SequenceGenerator.getInstance();long entityID = sg.getNextSequence(SequenceGenerator.ENTITY);Entity entity = new Entity(entityID, this.categoryID);String brandModel[] = StringUtils.split(this.introductionTitle, " ");if(brandModel.length == 1) {entity.setBrand(StringUtils.strip(brandModel[0]));}else if(brandModel.length == 2) {entity.setBrand(StringUtils.strip(brandModel[0]));entity.setModelName(StringUtils.strip(brandModel[1]));}else if(brandModel.length == 3) {entity.setBrand(StringUtils.strip(brandModel[0]));entity.setModelNumber(StringUtils.strip(brandModel[1]));entity.setModelName(StringUtils.strip(brandModel[2]));}else if(brandModel.length == 4) {entity.setBrand(StringUtils.strip(brandModel[0]) + " " +StringUtils.strip(brandModel[1]));entity.setModelNumber(StringUtils.strip(brandModel[2]));entity.setModelName(StringUtils.strip(brandModel[3]));}// Add introduction slideDefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();List<SlideDefinition> introSlideDefs =defs.getSlideDefinitions(this.categoryID, "Introduction");// It will only be onelong introSlideDefID = introSlideDefs.get(0).getID();Utils.info("introSlideDefID=" + introSlideDefID);in.shop2020.metamodel.core.Slide introSlide =new in.shop2020.metamodel.core.Slide(introSlideDefID);introSlide.setFreeformContent(new FreeformContent(this.introductionText));Utils.info("introSlide=" + introSlide);entity.addSlide(introSlide);// Add rest of the slidesfor(String slideName : this.slideNames) {in.shop2020.metamodel.core.Slide slide =this.constructSlideObject(slideName);entity.addSlide(slide);}return entity;}/**** @param slideName* @return*/private in.shop2020.metamodel.core.Slide constructSlideObject(String slideName) {Utils.info("Constructing slide object for " + slideName);List<String> featureNames =this.slideNameFeatureNames.get(slideName);Long slideDefID = this.slideNameSlideDefID.get(slideName);if(slideDefID == null) {Utils.severe("Unexpected slide " + slideName);return null;}in.shop2020.metamodel.core.Slide slide =new in.shop2020.metamodel.core.Slide(slideDefID.longValue());// Only if there are featuresif(featureNames != null) {List<Feature> features = new ArrayList<Feature>();for(String featureName : featureNames) {String key = slideName + "_" + featureName;Long featureDefID =this.slideNameFeatureNameFeatureDefID.get(key);List<Bullet> bullets =this.slideNameFeatureNameBullets.get(key);Feature feature = new Feature(featureDefID.longValue());feature.setBullets(bullets);// Free-form contentList<String> featureFFCs = this.containerFCCs.get(key);FreeformContent featureFFC =new FreeformContent(StringUtils.join(featureFFCs, "|"));feature.setFreeformContent(featureFFC);features.add(feature);}slide.setFeatures(features);}// Add children slidesList<String> childrenSlideNames =this.slideNameChildrenSlideNames.get(slideName);if(childrenSlideNames != null) {for(String childSlideName : childrenSlideNames) {in.shop2020.metamodel.core.Slide childSlide =this.constructSlideObject(slideName + "_" + childSlideName);if(childSlide != null) {slide.addChild(childSlide);}}}// Add free-form content is collect aboveList<String> slideFFCs = this.containerFCCs.get(slideName);Utils.info("slideName=" + slideName +" slideFFCs=" + slideFFCs);if(slideFFCs != null) {FreeformContent slideFFC =new FreeformContent(StringUtils.join(slideFFCs, "|"));slide.setFreeformContent(slideFFC);}return slide;}/**** @throws Exception*/private void collectAllFeatureDefinitionIDs() throws Exception {for(String slideName : this.slideNames) {this.collectFeatureDefinitionIDs(slideName);}}/**** @param slideName* @throws Exception*/private void collectFeatureDefinitionIDs(String slideName)throws Exception {Utils.info("collectFeatureDefinitionIDs for " + slideName);DefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();Long slideDefID = this.slideNameSlideDefID.get(slideName);Utils.info("slideName=" + slideName);if(slideDefID == null) {Utils.severe("Unexpected slide " + slideName);return;}List<String> featureLabels =this.slideNameFeatureNames.get(slideName);Utils.info("featureLabels=" + featureLabels);// Feature-less slideif(featureLabels != null && !featureLabels.isEmpty()) {for(String featureLabel : featureLabels) {Utils.info("defs=" + defs);Utils.info("slideDefID=" + slideDefID);Utils.info("featureLabel=" + featureLabel);FeatureDefinition featureDef =defs.getFeatureDefinition(slideDefID, featureLabel);String key = slideName + "_" + featureLabel;if(featureDef == null) {Utils.severe("Unexpected feature " + key);continue;}this.slideNameFeatureNameFeatureDefID.put(key,new Long(featureDef.getID()));}}List<String> childrenSlideNames =this.slideNameChildrenSlideNames.get(slideName);if(childrenSlideNames != null && !childrenSlideNames.isEmpty()) {for(String childSlideName : childrenSlideNames) {String fullSlideName = slideName + "_" + childSlideName;this.collectFeatureDefinitionIDs(fullSlideName);}}}/**** @throws Exception*/private void collectSlideDefinitionIDs() throws Exception {for(String slideName : this.slideNames) {SlideDefinition slideDef = this.getSlideDefinition(slideName);//Utils.info("slideDef=" + slideDef);if(slideDef == null) {continue;}Long slideID = slideDef.getID();this.slideNameSlideDefID.put(slideName, slideID);if(slideDef.hasChildren()) {this.collectChidrenSlideDefinitionIDs(slideName, slideDef);}}}/**** @param parentSlideName* @param paretnSlideDef* @throws Exception*/private void collectChidrenSlideDefinitionIDs(String parentSlideName,SlideDefinition parentSlideDef) throws Exception {Utils.info("collectChidrenSlideDefinitionIDs for " + parentSlideName);// May not be needed as validations have already filtered correct// children slide namesList<String> childrenSlideNames =this.slideNameChildrenSlideNames.get(parentSlideName);Utils.info("childrenSlideNames=" + childrenSlideNames);List<Long> childrenSlideDefinitionIDs =parentSlideDef.getChildrenSlideDefinitionIDs();Utils.info("childrenSlideDefinitionIDs=" + childrenSlideDefinitionIDs);DefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();for(Long childSlideDefID : childrenSlideDefinitionIDs) {SlideDefinition childSlideDef =defs.getSlideDefinition(childSlideDefID.longValue());String label = childSlideDef.getLabel();Utils.info("label="+label);if(childrenSlideNames.contains(StringUtils.lowerCase(label))) {String key = parentSlideName + "_" +StringUtils.lowerCase(label);this.slideNameSlideDefID.put(key, childSlideDefID);if(childSlideDef.hasChildren()) {this.collectChidrenSlideDefinitionIDs(key, childSlideDef);}}}}/**** @param slideName* @return* @throws Exception*/private SlideDefinition getSlideDefinition(String slideName)throws Exception {DefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();List<SlideDefinition> slideDefs =defs.getSlideDefinitions(this.categoryID, slideName);if(slideDefs == null) {return null;}if (slideDefs.isEmpty()) {return null;}// RE-VISIT// Pick the firstSlideDefinition slideDef = slideDefs.get(0);//Utils.info("slideDef=" + slideDef);return slideDef;}/**** @param slideName* @param displayTextPrefix* @throws Exception*/private void processFeatures(String slideName, String displayTextPrefix)throws Exception {Utils.info("Processing features for " + slideName);if(this.slideNameSlideDefID.get(slideName) == null) {Utils.severe("Unexpected slide \"" + displayTextPrefix + "\"");return;}long slideDefID = this.slideNameSlideDefID.get(slideName).longValue();Utils.info("slideID=" + slideDefID);List<String> candidateFeatures =this.slideNameFeatureNames.get(slideName);Utils.info("slideName=" + slideName);Utils.info("candidateFeatures=" + candidateFeatures);// Get all feature definitions for the slideDefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();SlideDefinition slideDef = defs.getSlideDefinition(slideDefID);Utils.info("slideDef=" + slideDef);List<FeatureDefinition> featureDefs =defs.getFeatureDefinitions(slideDefID);Utils.info("featureDefs=" + featureDefs);// Feature less slideif(featureDefs == null || featureDefs.isEmpty()) {Utils.info("Feature-less slide");return;}List<String> validFeatureLabels = new ArrayList<String>();for(int i=0; i<featureDefs.size(); i++) {validFeatureLabels.add(StringUtils.lowerCase(featureDefs.get(i).getLabel()));}Utils.info("validFeatureLabels=" + validFeatureLabels);// Missing features checkif(candidateFeatures == null || candidateFeatures.isEmpty()) {Utils.info("No features found on the slide");candidateFeatures = new ArrayList<String>();}// Discard if feature is not one of the validsList<String> processedFeatureLabels = new ArrayList<String>();for(String featureLabel : candidateFeatures) {if(validFeatureLabels.contains(featureLabel)) {processedFeatureLabels.add(featureLabel);}else {Utils.severe("Invalid feature \"" + displayTextPrefix +" > " + featureLabel + "\"");}}Utils.info("processedFeatureLabels=" + processedFeatureLabels);// For all further processingthis.slideNameFeatureNames.put(slideName, processedFeatureLabels);// Fetch all mandatory featuresList<FeatureDefinition> mandatoryFeatureDefs =defs.getFeatureDefinitions(slideDefID,EditorialImportance.MANDATORY);Utils.info("mandatoryFeatureDefs=" + mandatoryFeatureDefs);// Severe error if mandatory features are not includedfor(FeatureDefinition featureDef : mandatoryFeatureDefs) {if(!processedFeatureLabels.contains(StringUtils.lowerCase(featureDef.getLabel()))) {Utils.severe("Mandatory feature \"" + displayTextPrefix+ " > " + featureDef.getLabel() + "\" is missing");}}// Fetch all recommended featuresList<FeatureDefinition> recommendedFeatureDefs =defs.getFeatureDefinitions(slideDef.getID(),EditorialImportance.RECOMMENDED);Utils.info("recommendedFeatureDefs=" + recommendedFeatureDefs);// Warn if recommended features are not includedfor(FeatureDefinition featureDef : recommendedFeatureDefs) {if(!processedFeatureLabels.contains(StringUtils.lowerCase(featureDef.getLabel()))) {Utils.warning("Recommended feature \"" + displayTextPrefix +" > " + featureDef.getLabel() +"\" is missing");}}// Process features of children slidesList<String> childrenSlideNames =this.slideNameChildrenSlideNames.get(slideName);if(childrenSlideNames != null && !childrenSlideNames.isEmpty()) {for(String childSlideName : childrenSlideNames) {String fullChildSlideName = slideName + "_" + childSlideName;String newDisplayTextPrefix = displayTextPrefix + " > " +childSlideName;this.processFeatures(fullChildSlideName, newDisplayTextPrefix);}}}/*** Filter and validate parent slide names** @throws Exception*/private void processSlides() throws Exception {// 1 Retrieve meta-data// 1.1 Retrieve all valid slide names in the content modelDefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();List<SlideDefinition> slideDefs = defs.getSlideDefinitions(this.categoryID);List<String> validSlideNames = new ArrayList<String>();for (SlideDefinition slideDef : slideDefs) {// To avoid Introduction slideif(!slideDef.getLabel().isEmpty()) {validSlideNames.add(StringUtils.lowerCase(slideDef.getLabel()));}}Utils.info("validSlideNames=" + validSlideNames.toString());// 2 Rules// 2.1 Discard if slide is not one of the validsList<String> processedSlideNames = new ArrayList<String>();for(String slideName : this.slideNames) {if(validSlideNames.contains(slideName)) {processedSlideNames.add(slideName);}else if(slideName.equals("resource urls") ||slideName.equals("reference urls")) {// ignorecontinue;}else {Utils.severe("Invalid slide \"" + slideName + "\"");}}Utils.info("processedSlideNames=" + processedSlideNames.toString());// For all further processing use processed slide names onlythis.slideNames = processedSlideNames;// 3 Retrieve "Mandatory" slide names for the categoryList<SlideDefinition> mandatorySlideDefs =defs.getSlides(this.categoryID, EditorialImportance.MANDATORY);Utils.info("mandatorySlideDefs=" +mandatorySlideDefs.toString());// 3.1 All mandatory slides exist - Severefor(SlideDefinition mandatorySlideDef : mandatorySlideDefs) {// Avoid introduction slideif(mandatorySlideDef.getLabel().isEmpty()) {continue;}if(mandatorySlideDef.getLabel().equals("Introduction")) {continue;}if(!this.slideNames.contains(StringUtils.lowerCase(mandatorySlideDef.getLabel()))) {Utils.severe("Mandatory slide \"" +mandatorySlideDef.getLabel() + "\" is missing");}}// 4 Retrieve "Recommended" slide names for the categoryList<SlideDefinition> recommendedSlideDefs =defs.getSlides(this.categoryID, EditorialImportance.RECOMMENDED);Utils.info("recommendedSlideDefs=" + recommendedSlideDefs.toString());// 4.1 All recommended slides exist - Warnfor(SlideDefinition recommendedSlideDef : recommendedSlideDefs) {if(!this.slideNames.contains(StringUtils.lowerCase(recommendedSlideDef.getLabel()))) {Utils.warning("Recommended slide \"" +recommendedSlideDef.getLabel() + "\" is missing");}}// Process children slidesfor(String parentSlideName : this.slideNames) {Utils.info("Process children slides for parentSlideName=" +parentSlideName);SlideDefinition parentSlideDef =this.getSlideDefinition(parentSlideName);// ignore for nowif(parentSlideDef == null) {continue;}this.processChildrenSlides(parentSlideDef, parentSlideName,parentSlideDef.getLabel());}Utils.info("slideNameChildrenSlideNames=" +slideNameChildrenSlideNames);}/**** @param slideName* @param parentSlideDefinition* @param crumb* @throws Exception*/private void processChildrenSlides(SlideDefinition parentSlideDef,String crumb, String displayTextPrefix) throws Exception {List<Long> childrenSlideDefinitionIDs =parentSlideDef.getChildrenSlideDefinitionIDs();if(childrenSlideDefinitionIDs == null ||childrenSlideDefinitionIDs.isEmpty()) {return;}DefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();String parentSlideName = parentSlideDef.getLabel();Utils.info("parentSlideName=" + parentSlideName + " has " +childrenSlideDefinitionIDs + " children slides to process");Utils.info("crumb=" + crumb);List<String> childrenSlideNames =this.slideNameChildrenSlideNames.get(crumb);Utils.info("childrenSlideNames=" + childrenSlideNames);// Collect allowed children slide labelsList<String> validChildrenSlideNames = new ArrayList<String>();Map<String, SlideDefinition> childSlideNameSlideDefinition =new HashMap<String, SlideDefinition>();for(Long childSlideDefID : childrenSlideDefinitionIDs) {Utils.info("childSlideDefID=" + childSlideDefID);SlideDefinition childSlideDef =defs.getSlideDefinition(childSlideDefID.longValue());Utils.info("childSlideDef=" + childSlideDef);String displaytext = displayTextPrefix + " > " +childSlideDef.getLabel();// All children slides defined are mandatoryString childSlideName =StringUtils.lowerCase(childSlideDef.getLabel());Utils.info("childSlideName=" + childSlideName);if(childrenSlideNames == null ||!childrenSlideNames.contains(childSlideName)) {Utils.severe("Missing child slide \"" + displaytext + "\"");continue;}validChildrenSlideNames.add(childSlideName);childSlideNameSlideDefinition.put(childSlideName, childSlideDef);}this.slideNameChildrenSlideNames.put(crumb, validChildrenSlideNames);Utils.info("validChildrenSlideNames=" + validChildrenSlideNames);for(String childSlideName : childSlideNameSlideDefinition.keySet()) {SlideDefinition slideDef =childSlideNameSlideDefinition.get(childSlideName);String newcrumb = crumb + "_" +StringUtils.lowerCase(childSlideName);String newDisplayTextPrefix = displayTextPrefix + " > " +slideDef.getLabel();this.processChildrenSlides(slideDef, newcrumb,newDisplayTextPrefix);}}/**** @param bulletText* @param featureDef* @param crumb* @param prefix* @return* @throws Exception*/private Bullet processBulletText(String bulletText,FeatureDefinition featureDef, String crumb, String prefix)throws Exception {Utils.info("featureDef.getLabel=" + featureDef.getLabel());Utils.info("bulletText=" + bulletText);BulletDefinition bulletDef = featureDef.getBulletDefinition();Utils.info("bulletDefinition=" + bulletDef);DefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();// If unit is definedUnit unitDef = null;if(bulletDef.getUnitID() != 0L) {long unitID = bulletDef.getUnitID();unitDef = defs.getUnit(unitID);Utils.info("unitDef=" + unitDef);}long datatypeDefID = bulletDef.getDatatypeDefinitionID();DatatypeDefinition datatypeDef =defs.getDatatypeDefinition(datatypeDefID);Utils.info("datatypeDef=" + datatypeDef);// If primitiveboolean isEnum = false;boolean isComposite = false;boolean isPrimitive = false;if(datatypeDef instanceof EnumDefinition) {isEnum = true;}else if(datatypeDef instanceof CompositeDefinition) {isComposite = true;}else {isPrimitive = true;}Utils.info("isEnum=" + isEnum + " isComposite=" + isComposite +" isPrimitive=" + isPrimitive);// if no unit is defined for this bullet whole is treated as valueString bulletValue = bulletText;Bullet bullet = null;// If unit is definedif(bulletDef.getUnitID() != 0L) {// Validate unitString[] parts = StringUtils.split(bulletText, " ");if(parts.length < 2) {Utils.severe("Unit is missing, \"" + crumb + "\" = " +bulletText);return null;}else if(parts.length > 2) {Utils.severe("Invalid value, \"" + crumb + "\" = " +bulletText);return null;}bulletValue = parts[0];String unitValue = parts[1];Utils.info("unitValue="+unitValue);if(!(unitValue.equalsIgnoreCase(unitDef.getShortForm()) ||unitValue.equalsIgnoreCase(unitDef.getFullForm()))) {Utils.severe("Invalid unit, \"" + crumb + "\" = " +bulletText);}}// Validate bullet valueUtils.info("bulletValue=" + bulletValue);if(isPrimitive) {if(!this.validatePrimitive(bulletValue, datatypeDef, crumb)) {return null;}bullet = new Bullet(new PrimitiveDataObject(bulletValue));}// Enum and fixedelse if(isEnum && !bulletDef.isLearned()) {long enumValueID = defs.getEnumValueID(datatypeDef.getID(),bulletValue);Utils.info("enumValueID=" + enumValueID);// Treat it to be free-formif(enumValueID == -1L) {Utils.severe("Not one of the valid enum values, \"" +crumb + "\" = " + bulletValue);return null;}EnumDataObject enumDataObject = new EnumDataObject(enumValueID);bullet = new Bullet(enumDataObject);}// Compositeelse if(isComposite) {CompositeDefinition compositeDef =(CompositeDefinition)datatypeDef;String separator = compositeDef.getSeparator();String[] compositeParts =StringUtils.split(bulletValue, separator);List<CompositePartDefinition> compositePartDefs =compositeDef.getCompositePartDefinitions();// Validate number of partsif(compositeParts.length != compositePartDefs.size()) {Utils.severe("Invalid value, " + crumb + " = " +bulletValue);return null;}// Remove spurious whitespacesboolean validPart = true;for(int j=0;j<compositeParts.length;j++) {compositeParts[j] = StringUtils.strip(compositeParts[j]);Utils.info("compositeParts["+ j + "]=" +compositeParts[j]);// Validate each part// Each part can be enum or composite in itself// We will stick to primitive for nowlong partDatatypeDefID =compositePartDefs.get(j).getDatatypeDefinitionID();DatatypeDefinition partDatatypeDef =defs.getDatatypeDefinition(partDatatypeDefID);Utils.info("partDatatypeDef=" + partDatatypeDef);if(!this.validatePrimitive(compositeParts[j],partDatatypeDef, crumb)) {validPart = false;break;}}if(!validPart) {return null;}CompositeDataObject compositeDataObject =new CompositeDataObject();for(int j=0;j<compositeParts.length;j++) {compositeDataObject.addPrimitiveDataObject(new PrimitiveDataObject(compositeParts[j]));}bullet = new Bullet(compositeDataObject);}// Free-form content at bullet levelString key = prefix + "_" + bulletText;List<String> bulletFFCs = this.containerFCCs.get(key);bullet.setFreeformContent(new FreeformContent(StringUtils.join(bulletFFCs, "|")));return bullet;}/**** @param bulletValue* @param datatypeDef* @param crumb*/private boolean validatePrimitive(String bulletValue,DatatypeDefinition datatypeDef, String crumb) {String dt = datatypeDef.getName();// integerif(dt.equals("integer")) {try {Integer.parseInt(bulletValue);}catch(NumberFormatException nfe) {Utils.severe("Invalid integer value, \"" + crumb +"\" = " + bulletValue);return false;}}// decimalelse if(dt.equals("decimal")) {try {Float.parseFloat(bulletValue);}catch(NumberFormatException nfe) {Utils.severe("Invalid decimal value, \"" + crumb +"\" = " + bulletValue);return false;}}// hours_mins e.g. 2 hours 40 minselse if(dt.equals("hours_mins")) {String[] parts = StringUtils.split(bulletValue, " ");if(!(parts.length == 2 || parts.length == 4)) {Utils.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}// parts[0]try {Integer.parseInt(StringUtils.strip(parts[0]));}catch (NumberFormatException nfe) {Utils.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}// parts[1]if(!"hours".equalsIgnoreCase(StringUtils.strip(parts[1]))) {Utils.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}if(parts.length == 4) {// parts[2]try {Integer.parseInt(StringUtils.strip(parts[2]));}catch (NumberFormatException nfe) {Utils.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}// parts[3]if(!"mins".equalsIgnoreCase(StringUtils.strip(parts[3]))) {Utils.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}}}// days_hours e.g. 9 days 14 hourselse if(dt.equals("days_hours")) {String[] parts = StringUtils.split(bulletValue, " ");if(!(parts.length == 2 || parts.length == 4)) {Utils.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}// parts[0]try {Integer.parseInt(StringUtils.strip(parts[0]));}catch (NumberFormatException nfe) {Utils.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}// parts[1]if(!"days".equalsIgnoreCase(StringUtils.strip(parts[1]))) {Utils.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}if(parts.length == 4) {// parts[2]try {Integer.parseInt(StringUtils.strip(parts[2]));}catch (NumberFormatException nfe) {Utils.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}// parts[3]if("hours".equalsIgnoreCase(StringUtils.strip(parts[3]))) {Utils.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}}}return true;}/**** @param richTextRuns* @param info* @param indent* @param prefix*/@SuppressWarnings("unchecked")private void drilldown(RichTextRun[] richTextRuns,Map<String, Object[]> info, int indent, String prefix) {Utils.info("drilldown");//System.exit(0);Object[] featuresInfo = info.get("features");if(featuresInfo != null) {List<String> featureLabels = (List<String>) featuresInfo[0];List<Integer> featureIndices = (List<Integer>) featuresInfo[1];Utils.info("featureLabels=" + featureLabels);Utils.info("featureIndices=" + featureIndices);Utils.info("processing features");for(int i=0;i<featureLabels.size();i++) {String featureLabel = featureLabels.get(i);Utils.info("featureLabel="+featureLabel);featureLabels.set(i, StringUtils.lowerCase(StringUtils.strip(StringUtils.trim(featureLabel))));int contentStartIndex = featureIndices.get(i) + 1;int contentIndent = indent + 1;Map<String, Object[]> info1 =getContents(richTextRuns, contentStartIndex, contentIndent);String newprefix = StringUtils.lowerCase(prefix + "_" +featureLabel);drilldown(richTextRuns, info1, contentIndent, newprefix);}if(featureLabels.size() > 0) {Utils.info("prefix=" + prefix);Utils.info("featureLabels=" + featureLabels);this.slideNameFeatureNames.put(prefix, featureLabels);}}//System.exit(0);Object[] bulletsInfo = info.get("bullets");if(bulletsInfo != null) {List<String> bulletLabels = (List<String>) bulletsInfo[0];List<Integer> bulletIndices = (List<Integer>) bulletsInfo[1];Utils.info("bulletLabels=" + bulletLabels);Utils.info("bulletIndices=" + bulletIndices);Utils.info("processing bullets");for(int i=0;i<bulletLabels.size();i++) {String bulletLabel = bulletLabels.get(i);Utils.info("bulletLabel="+bulletLabel);int contentStartIndex = bulletIndices.get(i) + 1;int contentIndent = indent + 1;Map<String, Object[]> info1 = getContents(richTextRuns,contentStartIndex, contentIndent);drilldown(richTextRuns, info1, contentIndent, prefix);}if(bulletLabels.size() > 0) {this.slideNameFeatureNameBulletTexts.put(prefix, bulletLabels);}}//System.exit(0);Object[] childrenSlidesInfo = info.get("childrenSlides");if(childrenSlidesInfo != null) {List<String> childrenSlideLabels =(List<String>) childrenSlidesInfo[0];List<Integer> childrenSlideIndices =(List<Integer>) childrenSlidesInfo[1];Utils.info("childrenSlideLabels=" +childrenSlideLabels);Utils.info("childrenSlideIndices=" +childrenSlideIndices);Utils.info("processing children slides");for(int i=0;i<childrenSlideLabels.size();i++) {String slideLabel = childrenSlideLabels.get(i);Utils.info("slideLabel="+slideLabel);childrenSlideLabels.set(i, StringUtils.lowerCase(slideLabel));int contentStartIndex = childrenSlideIndices.get(i) + 1;int contentIndent = indent + 1;Map<String, Object[]> info1 = getContents(richTextRuns,contentStartIndex, contentIndent);String newprefix = StringUtils.lowerCase(prefix + "_" +slideLabel);drilldown(richTextRuns, info1, contentIndent, newprefix);}if(childrenSlideLabels.size() > 0) {this.slideNameChildrenSlideNames.put(prefix,childrenSlideLabels);}}Object[] ffcInfo = info.get("ffc");if(ffcInfo != null) {List<String> ffcTexts = (List<String>) ffcInfo[0];List<Integer> ffcIndices = (List<Integer>) ffcInfo[1];Utils.info("ffcTexts=" + ffcTexts);Utils.info("ffcIndices=" + ffcIndices);Utils.info("processing ffc");if(ffcTexts.size() > 0) {this.containerFCCs.put(prefix, ffcTexts);}}}/**** @param richTextRuns* @param startIndex* @param indent* @return*/private Map<String, Object[]> getContents(RichTextRun[] richTextRuns,int startIndex, int indent) {Map<String, Object[]> info = new HashMap<String, Object[]>();Object[] childrenSlidesInfo = getChildrenSlides(richTextRuns,startIndex, indent);info.put("childrenSlides", childrenSlidesInfo);if(childrenSlidesInfo != null) {Utils.info("childrenSlidesInfo[0]=" +childrenSlidesInfo[0]);Utils.info("childrenSlidesInfo[1]=" +childrenSlidesInfo[1]);}Object[] featuresInfo = getFeatures(richTextRuns,startIndex, indent);info.put("features", featuresInfo);if(featuresInfo != null) {Utils.info("featuresInfo[0]=" + featuresInfo[0]);Utils.info("featuresInfo[1]=" + featuresInfo[1]);}Object[] bulletsInfo = getBulletInfo(richTextRuns,startIndex, indent);info.put("bullets", bulletsInfo);if(bulletsInfo != null) {Utils.info("bulletsInfo[0]=" + bulletsInfo[0]);Utils.info("bulletsInfo[1]=" + bulletsInfo[1]);}Object[] ffcInfo = getFreeformContent(richTextRuns,startIndex, indent);info.put("ffc", ffcInfo);if(ffcInfo != null) {Utils.info("ffcInfo[0]=" + ffcInfo[0]);Utils.info("ffcInfo[1]=" + ffcInfo[1]);}return info;}/**** @param richTextRuns* @param startIndex* @param indent* @return*/private Object[] getBulletInfo(RichTextRun[] richTextRuns,int startIndex, int indent) {// -16776961 - BlueObject[] bulletsInfo = this.getInfo(richTextRuns, startIndex, indent,HSLF.BLUE, true);return bulletsInfo;}/**** @param richTextRuns* @param startIndex* @param indent* @return*/private Object[] getFreeformContent(RichTextRun[] richTextRuns,int startIndex, int indent) {// -6749953 - PurpleObject[] ffcInfo = this.getInfo(richTextRuns, startIndex,indent, HSLF.PURPLE, true);return ffcInfo;}/**** @param richTextRuns* @param startIndex* @param indent* @return*/private Object[] getFeatures(RichTextRun[] richTextRuns,int startIndex, int indent) {// -16711936 - GreenObject[] featuresInfo = this.getInfo(richTextRuns, startIndex,indent, HSLF.GREEN, true);return featuresInfo;}/**** @param richTextRuns* @param startIndex* @param indent* @return*/private Object[] getChildrenSlides(RichTextRun[] richTextRuns,int startIndex, int indent) {// -26368 - OrangeObject[] childrenSlidesInfo = this.getInfo(richTextRuns, startIndex,indent, HSLF.ORANGE, true);return childrenSlidesInfo;}/**** @param rts* @param startIndex* @param indent* @param rgb* @param debug* @return*/private Object[] getInfo(RichTextRun[] rts, int startIndex,int indent, int rgb, boolean debug) {List<String> labels = new ArrayList<String>();List<Integer> indices = new ArrayList<Integer>();if(startIndex >= rts.length) {return null;}int i = startIndex;while(true) {if(debug) {Utils.info("rts["+i+"].getRawText=" +rts[i].getRawText());Utils.info("rts["+i+"].isBullet=" + rts[i].isBullet());Utils.info("rts["+i+"].getIndentLevel=" +rts[i].getIndentLevel());Utils.info("rts["+i+"].getBulletColor().getRGB=" +rts[i].getBulletColor().getRGB());Utils.info("rts["+i+"].getFontColor().getRGB=" +rts[i].getFontColor().getRGB());Utils.info("rgb=" + rgb);Utils.info("indent=" + indent);Utils.info("rts.length=" + rts.length);}// rts[i].isBullet() &&// rts[i].getBulletColor().getRGB() == rgb// rts[i].getFontColor().getRGB()if(rts[i].getIndentLevel() == indent &&(rts[i].getBulletColor().getRGB() == rgb ||rts[i].getFontColor().getRGB() == rgb)) {Utils.info("Criteria of color, indent etc. is met");i = this.getValidLabel(i, rts, labels, indices, debug);if(debug) Utils.info("new i="+ i);}Utils.info("labels=" + labels);Utils.info("indices=" + indices);Utils.info("(i >= (rts.length-1))" + (i >= (rts.length-1)));if(i >= (rts.length-1)) {break;}// rts[i+1].isBullet() &&if(rts[i+1].getIndentLevel() < indent) {break;}i++;}return new Object[] {labels, indices};}/**** @param index* @param rts* @param labels* @param indices* @param debug* @return*/private int getValidLabel(int index, RichTextRun[] rts,List<String> labels, List<Integer> indices, boolean debug){StringBuffer sb = new StringBuffer();while(true) {if(index > rts.length - 1) {break;}String text = rts[index].getRawText();if(text.equals("\r")) {break;}sb.append(text);index++;}if(debug)Utils.info("before label=]"+sb.toString()+"[");String label = StringUtils.strip(StringUtils.trim(sb.toString()));if(debug)Utils.info("after label=]"+label+"[");//for(int i=0;i<label.length();i++) Utils.info(label.charAt(i));if(!label.isEmpty()) {indices.add(new Integer(index));// Remove hardspace character \xa0char hardspace = (char)160;char softspace = (char)32;label = StringUtils.strip(StringUtils.replaceChars(label,hardspace, softspace));labels.add(label);}return index;}/*** TEST ONLY** @param filename* @throws Exception*/public static void showAllRichText(String filename) throws Exception {HSLFSlideShow hslfSS = new HSLFSlideShow(filename);SlideShow ss = new SlideShow(hslfSS);Slide[] ssarr = ss.getSlides();for(int i=0; i < ssarr.length; i++) {TextRun[] trarr = ssarr[i].getTextRuns();//System.out.println("\nTitle=" + ssarr[i].getTitle());//System.out.println(ssarr[i].getTitle());for(int j=0; j < trarr.length; j++) {System.out.println("\ntrarr[" + j + "].getRawText=" +trarr[j].getRawText());RichTextRun[] rtrarr = trarr[j].getRichTextRuns();StringBuffer indent2 = new StringBuffer();for(int k=0; k < rtrarr.length; k++) {String rawText = rtrarr[k].getRawText();System.out.println("\nrtrarr[" + k + "].getRawText=" +rawText);System.out.println("rtrarr[" + k + "].isBullet=" +rtrarr[k].isBullet());System.out.println("rtrarr[" + k + "].isUnderlined=" +rtrarr[k].isUnderlined());//System.out.println("Ends with CR=" +// StringUtils.endsWith(rawText, "\r"));//System.out.println("\nRawText=" + rawText);/*if(StringUtils.strip(rawText).isEmpty()) {continue;}*/int indent = rtrarr[k].getIndentLevel();System.out.println("indent=" + indent);if(indent == 2) {indent2.append(rawText);}int rgb = rtrarr[k].getBulletColor().getRGB();System.out.println("rgb=" + rgb);int fontrgb = rtrarr[k].getFontColor().getRGB();System.out.println("font rgb=" + fontrgb);String fontname = rtrarr[k].getFontName();System.out.println("font rgb=" + fontname);}}}}/*** TEST ONLY** @param filename* @throws Exception*/public static void showAllText(String filename) throws Exception {PowerPointExtractor ppe = new PowerPointExtractor(filename);System.out.println(ppe.getText());}}