Rev 39 | Blame | Last modification | View Log | RSS feed
/****/package in.shop2020.metamodel.util;import in.shop2020.metamodel.core.Bullet;import in.shop2020.metamodel.core.CompositeDataObject;import in.shop2020.metamodel.core.Entity;import in.shop2020.metamodel.core.EnumDataObject;import in.shop2020.metamodel.core.Feature;import in.shop2020.metamodel.core.FreeformContent;import in.shop2020.metamodel.core.PrimitiveDataObject;import in.shop2020.metamodel.definitions.BulletDefinition;import in.shop2020.metamodel.definitions.Catalog;import in.shop2020.metamodel.definitions.CompositeDefinition;import in.shop2020.metamodel.definitions.CompositePartDefinition;import in.shop2020.metamodel.definitions.DatatypeDefinition;import in.shop2020.metamodel.definitions.DefinitionsContainer;import in.shop2020.metamodel.definitions.EditorialImportance;import in.shop2020.metamodel.definitions.EntityContainer;import in.shop2020.metamodel.definitions.EnumDefinition;import in.shop2020.metamodel.definitions.FeatureDefinition;import in.shop2020.metamodel.definitions.SlideDefinition;import in.shop2020.metamodel.definitions.Unit;import in.shop2020.util.Utils;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;import org.apache.commons.lang.ArrayUtils;import org.apache.commons.lang.StringUtils;import org.apache.poi.hslf.HSLFSlideShow;import org.apache.poi.hslf.extractor.PowerPointExtractor;import org.apache.poi.hslf.model.Slide;import org.apache.poi.hslf.model.TextRun;import org.apache.poi.hslf.usermodel.RichTextRun;import org.apache.poi.hslf.usermodel.SlideShow;/*** @author naveen**/public class HSLF {public static final String CONTENT_SRC_PPT_PATH ="/home/naveen/workspace/eclipse/content/ppt/";public static final String CONTENT_SRC_HTML_PATH ="/home/naveen/workspace/eclipse/content/html/";public static final String CONTENT_SRC_XML_PATH ="/home/naveen/workspace/eclipse/content/xml/";public static final int BLUE = -16776961;public static final int PURPLE = -6749953;public static final int GREEN = -16711936;public static final int ORANGE = -26368;//** LOCALpublic static final String CONTENT_DB_PATH ="/home/naveen/workspace/eclipse/db/entities/";/** WEBpublic static final String CONTENT_DB_PATH ="/var/lib/tomcat6/webapps/shop2020/db/entities/";*/private List<String> slideNames = new ArrayList<String>();private Map<String, List<String>> slideNameChildrenSlideNames =new HashMap<String, List<String>>();private Map<String, List<String>> slideNameFeatureNames =new HashMap<String, List<String>>();private Map<String, Long> slideNameSlideDefID =new HashMap<String, Long>();private Map<String, Long> slideNameFeatureNameFeatureDefID =new HashMap<String, Long>();private Map<String, List<String>> slideNameFeatureNameBulletTexts =new HashMap<String, List<String>>();private Map<String, List<Bullet>> slideNameFeatureNameBullets =new HashMap<String, List<Bullet>>();private Map<String, List<String>> containerFCCs =new HashMap<String, List<String>>();private long categoryID;private String srcFile;private String dbFile;private String introductionTitle;private String introductionText;/*** @param args*/public static void main(String[] args) throws Exception {String[] commands = new String[] {"validate", "import", "show"};String usage = "Usage: HSLF ["+ StringUtils.join(commands, "|") +"] [{Entity ID}|{Category ID} {PPT file name}]\n";if(args.length < 2) {System.out.println(usage);System.exit(-1);}String inputCommand = args[0];if(!ArrayUtils.contains(commands, inputCommand)) {System.out.println(usage);System.exit(-1);}if (inputCommand.equals("validate")) {String inputCategoryID = args[1];if(args.length < 3) {System.out.println(usage);System.exit(-1);}String inputFilename = args[2];long categoryID = new Long(inputCategoryID).longValue();HSLF hslf = new HSLF(categoryID, inputFilename);hslf.validateEntity();System.exit(0);}if (inputCommand.equals("import")) {String inputCategoryID = args[1];if(args.length < 3) {System.out.println(usage);System.exit(-1);}String inputFilename = args[2];long categoryID = new Long(inputCategoryID).longValue();HSLF hslf = new HSLF(categoryID, inputFilename);hslf.importEntity();System.exit(0);}if (inputCommand.equals("show")) {String entityID = args[1];HSLF hslf = new HSLF();hslf.showEntity(new Long(entityID).longValue());System.exit(0);}}/****/public HSLF() {}/**** @param categoryID* @param fileName*/public HSLF(long categoryID, String fileName) {this.categoryID = categoryID;this.srcFile = CONTENT_SRC_PPT_PATH + fileName + ".ppt";this.dbFile = CONTENT_DB_PATH + "entities" + ".ser";}/**** @throws Exception*/public void showEntity(long entityID) throws Exception {EntityContainer entContainer =Catalog.getInstance().getEntityContainer();Entity entity = entContainer.getEntity(entityID);Utils.logger.info("Entity=" + entity);/*ExpandedEntity expandedEntity =entContainer.getExpandedEntity(entityID);Utils.logger.info("expandedEntity=" + expandedEntity);*/}/**** @throws Exception*/public void validateEntity() throws Exception {HSLFSlideShow hslfSS = new HSLFSlideShow(this.srcFile);SlideShow ss = new SlideShow(hslfSS);Slide[] ssarr = ss.getSlides();// Introduction pagethis.processIntroductionSlide(ssarr[0]);// Rest of the content pagefor(int i=1; i < ssarr.length; i++) {TextRun[] trs = ssarr[i].getTextRuns();String title = StringUtils.lowerCase(StringUtils.strip(StringUtils.trim(ssarr[i].getTitle())));System.out.println(title);this.slideNames.add(title);// Hack to get to TextRun where bullets are!TextRun contentTextRun = trs[1];RichTextRun[] richTextRuns = contentTextRun.getRichTextRuns();if(richTextRuns.length == 1 && trs.length > 2) {contentTextRun = trs[2];richTextRuns = contentTextRun.getRichTextRuns();}int startIndex = 0;int indent = 1;Map<String, Object[]> info = getContents(richTextRuns, startIndex,indent);System.out.println(info);//System.exit(0);drilldown(richTextRuns, info, indent, title);}System.out.println("this.slideNames=" + this.slideNames);System.out.println("this.slideNameChildrenSlideNames=" +this.slideNameChildrenSlideNames);System.out.println("this.slideNameFeatureNames=" +this.slideNameFeatureNames);System.out.println("this.slideNameFeatureNameBulletTexts=" +this.slideNameFeatureNameBulletTexts);System.out.println("this.containerFCCs=" + this.containerFCCs);// Start validation and import// Validate slidesthis.processSlides();// Collect Slide Definition IDsthis.collectSlideDefinitionIDs();Utils.logger.info("this.slideNameSlideDefID=" +this.slideNameSlideDefID);// Validate featuresfor(String slideName : this.slideNames) {this.processFeatures(slideName);}// Collect Feature Definition IDsthis.collectFeatureDefinitionIDs();Utils.logger.info("this.slideNameFeatureNameFeatureDefID=" +this.slideNameFeatureNameFeatureDefID);DefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();// Validate bulletsfor(String slideName : this.slideNames) {Long slideDefID = this.slideNameSlideDefID.get(slideName);SlideDefinition slideDef =defs.getSlideDefinition(slideDefID.longValue());List<String> featureLabels =this.slideNameFeatureNames.get(slideName);// Feature-less slideif(featureLabels == null) {continue;}for(String featureLabel : featureLabels) {String key = slideName + "_" + featureLabel;List<String> bulletTexts =this.slideNameFeatureNameBulletTexts.get(key);// Bullet-less featureif(bulletTexts == null) {continue;}Long featureDefID =this.slideNameFeatureNameFeatureDefID.get(key);FeatureDefinition featureDef =defs.getFeatureDefinition(featureDefID.longValue());String crumb = slideDef.getLabel() + " > " +featureDef.getLabel();String prefix = slideName + "_" + featureLabel;for(String bulletText : bulletTexts) {Bullet bullet =this.processBulletText(bulletText, featureDef, crumb,prefix);// Can't be imported has errors, skipif(bullet == null) {continue;}List<Bullet> bullets =this.slideNameFeatureNameBullets.get(key);if(bullets == null) {bullets = new ArrayList<Bullet>();this.slideNameFeatureNameBullets.put(key, bullets);}bullets.add(bullet);}}}Utils.logger.info("this.slideNameFeatureNameBullets=" +this.slideNameFeatureNameBullets);}/**** @throws Exception*/public void importEntity() throws Exception {// Parse and Validatethis.validateEntity();// Construct Content ModelEntity entity = this.contructEntityObject();Utils.logger.info("entity="+entity);EntityContainer entContainer =Catalog.getInstance().getEntityContainer();entContainer.addEntity(entity);// RE-VISIT// Store it backDBUtils.store(entContainer.getEntities(), this.dbFile);// Store the index separatelyString entitiesbycategoryDBFile = HSLF.CONTENT_DB_PATH +"entitiesbycategory" + ".ser";DBUtils.store(entContainer.getEntitiesbyCategory(),entitiesbycategoryDBFile);}/**** @param hslfSlide*/private void processIntroductionSlide(Slide hslfSlide) {TextRun[] trs = hslfSlide.getTextRuns();StringBuffer sbTitle = new StringBuffer();StringBuffer sbText = new StringBuffer();for(int i=0;i<trs.length;i++) {RichTextRun[] rtrs = trs[i].getRichTextRuns();for(int j=0;j<rtrs.length;j++) {int rgb = rtrs[j].getFontColor().getRGB();if(rgb == HSLF.ORANGE) {sbTitle.append(rtrs[j].getRawText());}else if(rgb == HSLF.PURPLE) {sbText.append(rtrs[j].getRawText());}}}this.introductionTitle = sbTitle.toString();this.introductionText = sbText.toString();Utils.logger.info("this.introductionTitle=" + this.introductionTitle);Utils.logger.info("this.introductionText=" + this.introductionText);}/**** @return Entity* @throws Exception*/private Entity contructEntityObject() throws Exception {SequenceGenerator sg = SequenceGenerator.getInstance();long entityID = sg.getNextSequence(SequenceGenerator.ENTITY);Entity entity = new Entity(entityID, this.categoryID);String brandModel[] = StringUtils.split(this.introductionTitle, " ");if(brandModel.length == 1) {entity.setBrand(StringUtils.strip(brandModel[0]));}else if(brandModel.length == 2) {entity.setBrand(StringUtils.strip(brandModel[0]));entity.setModelName(StringUtils.strip(brandModel[1]));}else if(brandModel.length == 3) {entity.setBrand(StringUtils.strip(brandModel[0]));entity.setModelNumber(StringUtils.strip(brandModel[1]));entity.setModelName(StringUtils.strip(brandModel[2]));}else if(brandModel.length == 4) {entity.setBrand(StringUtils.strip(brandModel[0]) + " " +StringUtils.strip(brandModel[1]));entity.setModelNumber(StringUtils.strip(brandModel[2]));entity.setModelName(StringUtils.strip(brandModel[3]));}// Add introduction slideDefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();List<SlideDefinition> introSlideDefs =defs.getSlideDefinitions(this.categoryID, "Introduction");// It will only be onelong introSlideDefID = introSlideDefs.get(0).getID();Utils.logger.info("introSlideDefID=" + introSlideDefID);in.shop2020.metamodel.core.Slide introSlide =new in.shop2020.metamodel.core.Slide(introSlideDefID);introSlide.setFreeformContent(new FreeformContent(this.introductionText));Utils.logger.info("introSlide=" + introSlide);entity.addSlide(introSlide);// Add rest of the slidesfor(String slideName : this.slideNames) {List<String> featureNames =this.slideNameFeatureNames.get(slideName);Long slideDefID = this.slideNameSlideDefID.get(slideName);in.shop2020.metamodel.core.Slide slide =new in.shop2020.metamodel.core.Slide(slideDefID.longValue());// Only if there are featuresif(featureNames == null) {continue;}List<Feature> features = new ArrayList<Feature>();for(String featureName : featureNames) {String key = slideName + "_" + featureName;Long featureDefID =this.slideNameFeatureNameFeatureDefID.get(key);List<Bullet> bullets =this.slideNameFeatureNameBullets.get(key);Feature feature = new Feature(featureDefID.longValue());feature.setBullets(bullets);// Free-form contentList<String> featureFFCs = this.containerFCCs.get(key);FreeformContent featureFFC =new FreeformContent(StringUtils.join(featureFFCs, "|"));feature.setFreeformContent(featureFFC);features.add(feature);}slide.setFeatures(features);// Add free-form content is collect aboveList<String> slideFFCs = this.containerFCCs.get(slideName);Utils.logger.info("slideName=" + slideName +" slideFFCs=" + slideFFCs);FreeformContent slideFFC =new FreeformContent(StringUtils.join(slideFFCs, "|"));slide.setFreeformContent(slideFFC);entity.addSlide(slide);}return entity;}/**** @throws Exception*/private void collectFeatureDefinitionIDs() throws Exception {DefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();for(String slideName : this.slideNames) {Long slideDefID = this.slideNameSlideDefID.get(slideName);Utils.logger.info("slideName=" + slideName);List<String> featureLabels =this.slideNameFeatureNames.get(slideName);Utils.logger.info("featureLabels=" + featureLabels);// Feature-less slideif(featureLabels == null) {continue;}for(String featureLabel : featureLabels) {FeatureDefinition featureDef =defs.getFeatureDefinition(slideDefID, featureLabel);String key = slideName + "_" + featureLabel;this.slideNameFeatureNameFeatureDefID.put(key,new Long(featureDef.getID()));}}}/**** @throws Exception*/private void collectSlideDefinitionIDs() throws Exception {DefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();for(String slideName : this.slideNames) {List<SlideDefinition> slideDefs =defs.getSlideDefinitions(this.categoryID, slideName);if(slideDefs == null) {continue;}if (slideDefs.isEmpty()) {continue;}// RE-VISIT// Pick the firstSlideDefinition slideDef = slideDefs.get(0);//Utils.logger.info("slideDef=" + slideDef);Long slideID = slideDef.getID();this.slideNameSlideDefID.put(slideName, slideID);}}/**** @param allLines* @param slideDef* @param lineNumberRange* @throws Exception*/private void processFeatures(String slideName)throws Exception {long slideDefID = this.slideNameSlideDefID.get(slideName).longValue();Utils.logger.info("slideID=" + slideDefID);List<String> candidateFeatures =this.slideNameFeatureNames.get(slideName);Utils.logger.info("slideName=" + slideName);Utils.logger.info("candidateFeatures=" + candidateFeatures);// Feature less slideif(candidateFeatures == null) {return;}// Get all feature definitions for the slideDefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();SlideDefinition slideDef = defs.getSlideDefinition(slideDefID);Utils.logger.info("slideDef=" + slideDef);List<FeatureDefinition> featureDefs =defs.getFeatureDefinitions(slideDefID);Utils.logger.info("featureDefs=" + featureDefs);List<String> validFeatureLabels = new ArrayList<String>();for(int i=0; i<featureDefs.size(); i++) {validFeatureLabels.add(StringUtils.lowerCase(featureDefs.get(i).getLabel()));}Utils.logger.info("validFeatureLabels=" + validFeatureLabels);// Discard if feature is not one of the validsList<String> processedFeatureLabels = new ArrayList<String>();for(String featureLabel : candidateFeatures) {if(validFeatureLabels.contains(featureLabel)) {processedFeatureLabels.add(featureLabel);}else {Utils.logger.severe("Invalid feature \"" + slideDef.getLabel() +" > " + featureLabel + "\"");}}Utils.logger.info("processedFeatureLabels=" + processedFeatureLabels);// For all further processingthis.slideNameFeatureNames.put(slideName, processedFeatureLabels);// Fetch all mandatory featuresList<FeatureDefinition> mandatoryFeatureDefs =defs.getFeatureDefinitions(slideDefID,EditorialImportance.MANDATORY);Utils.logger.info("mandatoryFeatureDefs=" + mandatoryFeatureDefs);// Severe error if mandatory features are not includedfor(FeatureDefinition featureDef : mandatoryFeatureDefs) {if(!processedFeatureLabels.contains(StringUtils.lowerCase(featureDef.getLabel()))) {Utils.logger.severe("Mandatory feature \"" + slideDef.getLabel()+ " > " + featureDef.getLabel() + "\" is missing");}}// Fetch all recommended featuresList<FeatureDefinition> recommendedFeatureDefs =defs.getFeatureDefinitions(slideDef.getID(),EditorialImportance.RECOMMENDED);Utils.logger.info("recommendedFeatureDefs=" + recommendedFeatureDefs);// Warn if recommended features are not includedfor(FeatureDefinition featureDef : recommendedFeatureDefs) {if(!processedFeatureLabels.contains(StringUtils.lowerCase(featureDef.getLabel()))) {Utils.logger.warning("Recommended feature \"" +slideDef.getLabel() + " > " + featureDef.getLabel() +"\" is missing");}}}/*** Filter and validate parent slide names** @throws Exception*/private void processSlides() throws Exception {// 1 Retrieve meta-data// 1.1 Retrieve all valid slide names in the content modelDefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();List<SlideDefinition> slideDefs = defs.getSlideDefinitions(this.categoryID);List<String> validSlideNames = new ArrayList<String>();for (SlideDefinition slideDef : slideDefs) {// To avoid Introduction slideif(!slideDef.getLabel().isEmpty()) {validSlideNames.add(StringUtils.lowerCase(slideDef.getLabel()));}}Utils.logger.info("validSlideNames=" + validSlideNames.toString());// 2 Rules// 2.1 Discard if slide is not one of the validsList<String> processedSlideNames = new ArrayList<String>();for(String slideName : this.slideNames) {if(validSlideNames.contains(slideName)) {processedSlideNames.add(slideName);}else if(slideName.equals("resource urls") ||slideName.equals("reference urls")) {// ignorecontinue;}else {Utils.logger.severe("Invalid slide \"" + slideName + "\"");}}Utils.logger.info("processedSlideNames=" +processedSlideNames.toString());// For all further processing use processed slide names onlythis.slideNames = processedSlideNames;// 3 Retrieve "Mandatory" slide names for the categoryList<SlideDefinition> mandatorySlideDefs =defs.getSlides(this.categoryID, EditorialImportance.MANDATORY);Utils.logger.info("mandatorySlideDefs=" +mandatorySlideDefs.toString());// 3.1 All mandatory slides exist - Severefor(SlideDefinition mandatorySlideDef : mandatorySlideDefs) {// Avoid introduction slideif(mandatorySlideDef.getLabel().isEmpty()) {continue;}if(mandatorySlideDef.getLabel().equals("Introduction")) {continue;}if(!this.slideNames.contains(StringUtils.lowerCase(mandatorySlideDef.getLabel()))) {Utils.logger.severe("Mandatory slide \"" +mandatorySlideDef.getLabel() + "\" is missing");}}// 4 Retrieve "Recommended" slide names for the categoryList<SlideDefinition> recommendedSlideDefs =defs.getSlides(this.categoryID, EditorialImportance.RECOMMENDED);Utils.logger.info("recommendedSlideDefs=" +recommendedSlideDefs.toString());// 4.1 All recommended slides exist - Warnfor(SlideDefinition recommendedSlideDef : recommendedSlideDefs) {if(!this.slideNames.contains(StringUtils.lowerCase(recommendedSlideDef.getLabel()))) {Utils.logger.warning("Recommended slide \"" +recommendedSlideDef.getLabel() + "\" is missing");}}}/**** @param bulletText* @return Bullet* @throws Exception*/private Bullet processBulletText(String bulletText,FeatureDefinition featureDef, String crumb, String prefix)throws Exception {Utils.logger.info("featureDef.getLabel=" + featureDef.getLabel());Utils.logger.info("bulletText=" + bulletText);BulletDefinition bulletDef = featureDef.getBulletDefinition();Utils.logger.info("bulletDefinition=" + bulletDef);DefinitionsContainer defs =Catalog.getInstance().getDefinitionsContainer();// If unit is definedUnit unitDef = null;if(bulletDef.getUnitID() != 0L) {long unitID = bulletDef.getUnitID();unitDef = defs.getUnit(unitID);Utils.logger.info("unitDef=" + unitDef);}long datatypeDefID = bulletDef.getDatatypeDefinitionID();DatatypeDefinition datatypeDef =defs.getDatatypeDefinition(datatypeDefID);Utils.logger.info("datatypeDef=" + datatypeDef);// If primitiveboolean isEnum = false;boolean isComposite = false;boolean isPrimitive = false;if(datatypeDef instanceof EnumDefinition) {isEnum = true;}else if(datatypeDef instanceof CompositeDefinition) {isComposite = true;}else {isPrimitive = true;}Utils.logger.info("isEnum=" + isEnum + " isComposite=" + isComposite +" isPrimitive=" + isPrimitive);// if no unit is defined for this bullet whole is treated as valueString bulletValue = bulletText;Bullet bullet = null;// If unit is definedif(bulletDef.getUnitID() != 0L) {// Validate unitString[] parts = StringUtils.split(bulletText, " ");if(parts.length < 2) {Utils.logger.severe("Unit is missing, \"" + crumb + "\" = " +bulletText);return null;}else if(parts.length > 2) {Utils.logger.severe("Invalid value, \"" + crumb + "\" = " +bulletText);return null;}bulletValue = parts[0];String unitValue = parts[1];Utils.logger.info("unitValue="+unitValue);if(!(unitValue.equalsIgnoreCase(unitDef.getShortForm()) ||unitValue.equalsIgnoreCase(unitDef.getFullForm()))) {Utils.logger.severe("Invalid unit, \"" + crumb + "\" = " +bulletText);}}// Validate bullet valueUtils.logger.info("bulletValue=" + bulletValue);if(isPrimitive) {if(!this.validatePrimitive(bulletValue, datatypeDef, crumb)) {return null;}bullet = new Bullet(new PrimitiveDataObject(bulletValue));}// Enum and fixedelse if(isEnum && !bulletDef.isLearned()) {long enumValueID = defs.getEnumValueID(datatypeDef.getID(),bulletValue);Utils.logger.info("enumValueID=" + enumValueID);// Treat it to be free-formif(enumValueID == -1L) {Utils.logger.severe("Not one of the valid enum values, \"" +crumb + "\" = " + bulletValue);return null;}EnumDataObject enumDataObject = new EnumDataObject(enumValueID);bullet = new Bullet(enumDataObject);}// Compositeelse if(isComposite) {CompositeDefinition compositeDef =(CompositeDefinition)datatypeDef;String separator = compositeDef.getSeparator();String[] compositeParts =StringUtils.split(bulletValue, separator);List<CompositePartDefinition> compositePartDefs =compositeDef.getCompositePartDefinitions();// Validate number of partsif(compositeParts.length != compositePartDefs.size()) {Utils.logger.severe("Invalid value, " + crumb + " = " +bulletValue);return null;}// Remove spurious whitespacesboolean validPart = true;for(int j=0;j<compositeParts.length;j++) {compositeParts[j] = StringUtils.strip(compositeParts[j]);Utils.logger.info("compositeParts["+ j + "]=" +compositeParts[j]);// Validate each part// Each part can be enum or composite in itself// We will stick to primitive for nowlong partDatatypeDefID =compositePartDefs.get(j).getDatatypeDefinitionID();DatatypeDefinition partDatatypeDef =defs.getDatatypeDefinition(partDatatypeDefID);Utils.logger.info("partDatatypeDef=" + partDatatypeDef);if(!this.validatePrimitive(compositeParts[j],partDatatypeDef, crumb)) {validPart = false;break;}}if(!validPart) {return null;}CompositeDataObject compositeDataObject =new CompositeDataObject();for(int j=0;j<compositeParts.length;j++) {compositeDataObject.addPrimitiveDataObject(new PrimitiveDataObject(compositeParts[j]));}bullet = new Bullet(compositeDataObject);}// Free-form content at bullet levelString key = prefix + "_" + bulletText;List<String> bulletFFCs = this.containerFCCs.get(key);bullet.setFreeformContent(new FreeformContent(StringUtils.join(bulletFFCs, "|")));return bullet;}/**** @param bulletValue* @param datatypeDef* @param crumb*/private boolean validatePrimitive(String bulletValue,DatatypeDefinition datatypeDef, String crumb) {String dt = datatypeDef.getName();// integerif(dt.equals("integer")) {try {Integer.parseInt(bulletValue);}catch(NumberFormatException nfe) {Utils.logger.severe("Invalid integer value, \"" + crumb +"\" = " + bulletValue);return false;}}// decimalelse if(dt.equals("decimal")) {try {Float.parseFloat(bulletValue);}catch(NumberFormatException nfe) {Utils.logger.severe("Invalid decimal value, \"" + crumb +"\" = " + bulletValue);return false;}}// hours_mins e.g. 2 hours 40 minselse if(dt.equals("hours_mins")) {String[] parts = StringUtils.split(bulletValue, " ");if(!(parts.length == 2 || parts.length == 4)) {return false;}// parts[0]try {Integer.parseInt(StringUtils.strip(parts[0]));}catch (NumberFormatException nfe) {Utils.logger.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}// parts[1]if(!parts[1].equalsIgnoreCase("hours")) {Utils.logger.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}if(parts.length == 4) {// parts[2]try {Integer.parseInt(StringUtils.strip(parts[2]));}catch (NumberFormatException nfe) {Utils.logger.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}// parts[3]if(!parts[1].equalsIgnoreCase("mins")) {Utils.logger.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}}}// days_hours e.g. 9 days 14 hourselse if(dt.equals("days_hours")) {String[] parts = StringUtils.split(bulletValue, " ");if(!(parts.length == 2 || parts.length == 4)) {return false;}// parts[0]try {Integer.parseInt(StringUtils.strip(parts[0]));}catch (NumberFormatException nfe) {Utils.logger.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}// parts[1]if(!parts[1].equalsIgnoreCase("days")) {Utils.logger.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}if(parts.length == 4) {// parts[2]try {Integer.parseInt(StringUtils.strip(parts[2]));}catch (NumberFormatException nfe) {Utils.logger.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}// parts[3]if(!parts[1].equalsIgnoreCase("hours")) {Utils.logger.severe("Invalid value, \"" + crumb +"\" = " + bulletValue);return false;}}}return true;}/**** @param richTextRuns* @param info* @param indent* @param prefix*/@SuppressWarnings("unchecked")private void drilldown(RichTextRun[] richTextRuns,Map<String, Object[]> info, int indent, String prefix) {System.out.println("drilldown");//System.exit(0);Object[] featuresInfo = info.get("features");if(featuresInfo != null) {List<String> featureLabels = (List<String>) featuresInfo[0];List<Integer> featureIndices = (List<Integer>) featuresInfo[1];System.out.println("featureLabels=" + featureLabels);System.out.println("featureIndices=" + featureIndices);System.out.println("processing features");for(int i=0;i<featureLabels.size();i++) {String featureLabel = featureLabels.get(i);System.out.println("featureLabel="+featureLabel);featureLabels.set(i, StringUtils.lowerCase(StringUtils.strip(StringUtils.trim(featureLabel))));int contentStartIndex = featureIndices.get(i) + 1;int contentIndent = indent + 1;Map<String, Object[]> info1 =getContents(richTextRuns, contentStartIndex, contentIndent);String newprefix = StringUtils.lowerCase(prefix + "_" +featureLabel);drilldown(richTextRuns, info1, contentIndent, newprefix);}if(featureLabels.size() > 0) {this.slideNameFeatureNames.put(prefix, featureLabels);}}//System.exit(0);Object[] bulletsInfo = info.get("bullets");if(bulletsInfo != null) {List<String> bulletLabels = (List<String>) bulletsInfo[0];List<Integer> bulletIndices = (List<Integer>) bulletsInfo[1];System.out.println("bulletLabels=" + bulletLabels);System.out.println("bulletIndices=" + bulletIndices);System.out.println("processing bullets");for(int i=0;i<bulletLabels.size();i++) {String bulletLabel = bulletLabels.get(i);System.out.println("bulletLabel="+bulletLabel);int contentStartIndex = bulletIndices.get(i) + 1;int contentIndent = indent + 1;Map<String, Object[]> info1 = getContents(richTextRuns,contentStartIndex, contentIndent);drilldown(richTextRuns, info1, contentIndent, prefix);}if(bulletLabels.size() > 0) {this.slideNameFeatureNameBulletTexts.put(prefix, bulletLabels);}}//System.exit(0);Object[] childrenSlidesInfo = info.get("childrenSlides");if(childrenSlidesInfo != null) {List<String> childrenSlideLabels =(List<String>) childrenSlidesInfo[0];List<Integer> childrenSlideIndices =(List<Integer>) childrenSlidesInfo[1];System.out.println("childrenSlideLabels=" +childrenSlideLabels);System.out.println("childrenSlideIndices=" +childrenSlideIndices);System.out.println("processing children slides");for(int i=0;i<childrenSlideLabels.size();i++) {String slideLabel = childrenSlideLabels.get(i);System.out.println("slideLabel="+slideLabel);childrenSlideLabels.set(i, StringUtils.lowerCase(slideLabel));int contentStartIndex = childrenSlideIndices.get(i) + 1;int contentIndent = indent + 1;Map<String, Object[]> info1 = getContents(richTextRuns,contentStartIndex, contentIndent);String newprefix = StringUtils.lowerCase(prefix + "_" +slideLabel);drilldown(richTextRuns, info1, contentIndent, newprefix);}if(childrenSlideLabels.size() > 0) {this.slideNameChildrenSlideNames.put(prefix,childrenSlideLabels);}}Object[] ffcInfo = info.get("ffc");if(ffcInfo != null) {List<String> ffcTexts = (List<String>) ffcInfo[0];List<Integer> ffcIndices = (List<Integer>) ffcInfo[1];System.out.println("ffcTexts=" + ffcTexts);System.out.println("ffcIndices=" + ffcIndices);System.out.println("processing ffc");if(ffcTexts.size() > 0) {this.containerFCCs.put(prefix, ffcTexts);}}}/**** @param richTextRuns* @param startIndex* @param indent* @return*/private Map<String, Object[]> getContents(RichTextRun[] richTextRuns,int startIndex, int indent) {Map<String, Object[]> info = new HashMap<String, Object[]>();Object[] childrenSlidesInfo = getChildrenSlides(richTextRuns,startIndex, indent);info.put("childrenSlides", childrenSlidesInfo);if(childrenSlidesInfo != null) {System.out.println("childrenSlidesInfo[0]=" +childrenSlidesInfo[0]);System.out.println("childrenSlidesInfo[1]=" +childrenSlidesInfo[1]);}Object[] featuresInfo = getFeatures(richTextRuns,startIndex, indent);info.put("features", featuresInfo);if(featuresInfo != null) {System.out.println("featuresInfo[0]=" + featuresInfo[0]);System.out.println("featuresInfo[1]=" + featuresInfo[1]);}Object[] bulletsInfo = getBulletInfo(richTextRuns,startIndex, indent);info.put("bullets", bulletsInfo);if(bulletsInfo != null) {System.out.println("bulletsInfo[0]=" + bulletsInfo[0]);System.out.println("bulletsInfo[1]=" + bulletsInfo[1]);}Object[] ffcInfo = getFreeformContent(richTextRuns,startIndex, indent);info.put("ffc", ffcInfo);if(ffcInfo != null) {System.out.println("ffcInfo[0]=" + ffcInfo[0]);System.out.println("ffcInfo[1]=" + ffcInfo[1]);}return info;}/**** @param richTextRuns* @param startIndex* @param indent* @return*/private Object[] getBulletInfo(RichTextRun[] richTextRuns,int startIndex, int indent) {// -16776961 - BlueObject[] bulletsInfo = this.getInfo(richTextRuns, startIndex, indent,HSLF.BLUE, true);return bulletsInfo;}/**** @param richTextRuns* @param startIndex* @param indent* @return*/private Object[] getFreeformContent(RichTextRun[] richTextRuns,int startIndex, int indent) {// -6749953 - PurpleObject[] ffcInfo = this.getInfo(richTextRuns, startIndex,indent, HSLF.PURPLE, true);return ffcInfo;}/**** @param richTextRuns* @param startIndex* @param indent* @return*/private Object[] getFeatures(RichTextRun[] richTextRuns,int startIndex, int indent) {// -16711936 - GreenObject[] featuresInfo = this.getInfo(richTextRuns, startIndex,indent, HSLF.GREEN, true);return featuresInfo;}/**** @param richTextRuns* @param startIndex* @param indent* @return*/private Object[] getChildrenSlides(RichTextRun[] richTextRuns,int startIndex, int indent) {// -26368 - OrangeObject[] childrenSlidesInfo = this.getInfo(richTextRuns, startIndex,indent, HSLF.ORANGE, true);return childrenSlidesInfo;}/**** @param rts* @param startIndex* @param indent* @param rgb* @param debug* @return*/private Object[] getInfo(RichTextRun[] rts, int startIndex,int indent, int rgb, boolean debug) {List<String> labels = new ArrayList<String>();List<Integer> indices = new ArrayList<Integer>();if(startIndex >= rts.length) {return null;}int i = startIndex;while(true) {if(debug) {System.out.println("rts["+i+"].getRawText=" +rts[i].getRawText());System.out.println("rts["+i+"].isBullet=" + rts[i].isBullet());System.out.println("rts["+i+"].getIndentLevel=" +rts[i].getIndentLevel());System.out.println("rts["+i+"].getBulletColor().getRGB=" +rts[i].getBulletColor().getRGB());System.out.println("rts["+i+"].getFontColor().getRGB=" +rts[i].getFontColor().getRGB());System.out.println("rgb=" + rgb);System.out.println("indent=" + indent);System.out.println("rts.length=" + rts.length);}// rts[i].isBullet() &&// rts[i].getBulletColor().getRGB() == rgb// rts[i].getFontColor().getRGB()if(rts[i].getIndentLevel() == indent &&(rts[i].getBulletColor().getRGB() == rgb ||rts[i].getFontColor().getRGB() == rgb)) {System.out.println("Criteria of color, indent etc. is met");i = this.getValidLabel(i, rts, labels, indices, debug);if(debug) System.out.println("new i="+ i);}System.out.println("labels=" + labels);System.out.println("indices=" + indices);System.out.println("(i >= (rts.length-1))" + (i >= (rts.length-1)));if(i >= (rts.length-1)) {break;}// rts[i+1].isBullet() &&if(rts[i+1].getIndentLevel() < indent) {break;}i++;}return new Object[] {labels, indices};}/**** @param index* @param rts* @param labels* @param indices* @param debug* @return*/private int getValidLabel(int index, RichTextRun[] rts,List<String> labels, List<Integer> indices, boolean debug){StringBuffer sb = new StringBuffer();while(true) {if(index > rts.length - 1) {break;}String text = rts[index].getRawText();if(text.equals("\r")) {break;}sb.append(text);index++;}if(debug)System.out.println("before label=]"+sb.toString()+"[");String label = StringUtils.strip(StringUtils.trim(sb.toString()));if(debug)System.out.println("after label=]"+label+"[");//for(int i=0;i<label.length();i++) System.out.println(label.charAt(i));if(!label.isEmpty()) {indices.add(new Integer(index));// Remove hardspace character \xa0char hardspace = (char)160;char softspace = (char)32;label = StringUtils.strip(StringUtils.replaceChars(label,hardspace, softspace));labels.add(label);}return index;}/*** TEST ONLY** @param filename* @throws Exception*/public static void showAllRichText(String filename) throws Exception {HSLFSlideShow hslfSS = new HSLFSlideShow(filename);SlideShow ss = new SlideShow(hslfSS);Slide[] ssarr = ss.getSlides();for(int i=0; i < ssarr.length; i++) {TextRun[] trarr = ssarr[i].getTextRuns();//System.out.println("\nTitle=" + ssarr[i].getTitle());//System.out.println(ssarr[i].getTitle());for(int j=0; j < trarr.length; j++) {System.out.println("\ntrarr[" + j + "].getRawText=" +trarr[j].getRawText());RichTextRun[] rtrarr = trarr[j].getRichTextRuns();StringBuffer indent2 = new StringBuffer();for(int k=0; k < rtrarr.length; k++) {String rawText = rtrarr[k].getRawText();System.out.println("\nrtrarr[" + k + "].getRawText=" +rawText);System.out.println("rtrarr[" + k + "].isBullet=" +rtrarr[k].isBullet());System.out.println("rtrarr[" + k + "].isUnderlined=" +rtrarr[k].isUnderlined());//System.out.println("Ends with CR=" +// StringUtils.endsWith(rawText, "\r"));//System.out.println("\nRawText=" + rawText);/*if(StringUtils.strip(rawText).isEmpty()) {continue;}*/int indent = rtrarr[k].getIndentLevel();System.out.println("indent=" + indent);if(indent == 2) {indent2.append(rawText);}int rgb = rtrarr[k].getBulletColor().getRGB();System.out.println("rgb=" + rgb);int fontrgb = rtrarr[k].getFontColor().getRGB();System.out.println("font rgb=" + fontrgb);String fontname = rtrarr[k].getFontName();System.out.println("font rgb=" + fontname);/*String indent = "";switch(rgb) {// Orangecase -26368 :indent = "\t";break;// Purplecase -6749953 :indent = "";break;// Greencase -16711936 :indent = "\t";break;// Bluecase -16776961 :indent = "\t\t";break;default:continue;}//System.out.println(indent + rawText);*/}/*System.out.println("indent2="+ indent2.toString());String[] sa = StringUtils.split(indent2.toString(), "\r");for(i=0;i<sa.length;i++) {System.out.println("sa["+i+"]=" + sa[i]);}*/}}}/*** TEST ONLY** @param filename* @throws Exception*/public static void showAllText(String filename) throws Exception {PowerPointExtractor ppe = new PowerPointExtractor(filename);System.out.println(ppe.getText());}}