/** * OpenKM, Open Document Management System (http://www.openkm.com) * Copyright (c) 2006-2011 Paco Avila & Josep Llort * * No bytes were intentionally harmed during the development of this application. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ package com.openkm.kea.metadata; import java.io.BufferedInputStream; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.ObjectInputStream; import java.util.Date; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.openkm.kea.filter.KEAFilter; import com.openkm.kea.stemmers.SremovalStemmer; import com.openkm.kea.stemmers.Stemmer; import com.openkm.kea.stopwords.Stopwords; /** * KEAFilterBank * * @author jllort * */ public class KEAFilterBank { private static Logger log = LoggerFactory.getLogger(KEAFilterBank.class); private static KEAFilterBank instance; private KEAFilter filter; /** * KEAFilterBank * * @return * @throws MetadataExtractionException */ public static synchronized KEAFilterBank getInstance() throws MetadataExtractionException { if (instance == null) { instance = new KEAFilterBank(); } return instance; } /** * KEAFilterBank * * @throws MetadataExtractionException */ private KEAFilterBank() throws MetadataExtractionException { Date start = new Date(); String modelPath = WorkspaceHelper.KEA_MODEL_PATH; String vocabularyPath = WorkspaceHelper.RDF_SKOS_VOVABULARY_PATH; int numPhrases = 5; String className = WorkspaceHelper.KEA_STOPWORDS_CLASSNAME; Stopwords stopwords = null; if (className != null) { try { Class<?> clazz = Class.forName(className); stopwords = (Stopwords) clazz.newInstance(); } catch (Exception e) { log.error("Error creating class instance", e); } } filter = buildFilter(modelPath, vocabularyPath, "skos", WorkspaceHelper.KEA_LANGUAGE, new SremovalStemmer(), stopwords, numPhrases); Date stop = new Date(); long time = (stop.getTime() - start.getTime()); log.info("KEA filters built in " + time + "ms"); } /** * getFilter * * @return * @throws MetadataExtractionException */ public static KEAFilter getFilter() throws MetadataExtractionException { return getInstance().filter; } /** * buildFilter * * @param modelPath * @param vocabularyPath * @param vocabularyFormat * @param language * @param stemmer * @param stopwords * @param numPhrases * * @return * @throws MetadataExtractionException */ private KEAFilter buildFilter(String modelPath, String vocabularyPath, String vocabularyFormat, String language, Stemmer stemmer, Stopwords stopwords, int numPhrases) throws MetadataExtractionException { KEAFilter newFilter = null; try { BufferedInputStream bis = new BufferedInputStream(new FileInputStream(modelPath)); ObjectInputStream ois = new ObjectInputStream(bis); newFilter = (KEAFilter) ois.readObject(); newFilter.setVocabulary(vocabularyPath); newFilter.setVocabularyFormat(vocabularyFormat); newFilter.setDocumentLanguage(language); newFilter.setStemmer(stemmer); newFilter.setStopwords(stopwords); newFilter.loadThesaurus(stemmer,stopwords); newFilter.setNumPhrases(numPhrases); return newFilter; } catch (FileNotFoundException e) { log.error("Unable to find KEA model file"); log.error(e.getMessage(), e); throw new MetadataExtractionException("Subject Extraction failed (see trace for details."); } catch (IOException e) { log.error("Cannot read KEA model from stream"); log.error(e.getMessage(), e); throw new MetadataExtractionException("Subject Extraction failed (see trace for source."); } catch (ClassNotFoundException e) { log.error("Class cast- KEA model.",e); log.error(e.getMessage(), e); throw new MetadataExtractionException("Subject Extraction failed (see trace for source."); } catch (Throwable e) { log.error("Unexpected error with model"); log.error(e.getMessage(), e); throw new MetadataExtractionException("Subject Extraction failed (see trace for source."); } } }