/* * #! * Ontopia Classify * #- * Copyright (C) 2001 - 2013 The Ontopia Project * #- * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * !# */ package net.ontopia.topicmaps.classify; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileWriter; import java.io.InputStreamReader; import java.io.IOException; import java.util.Collection; import java.util.HashSet; import net.ontopia.utils.OntopiaRuntimeException; /** * INTERNAL: */ public class BlackList implements TermAnalyzerIF { protected Collection<String> stopList; protected double stopFactor = 0.0002d; protected File file; protected Collection<String> added = new HashSet<String>(); protected long lastModified; BlackList(File _file) { this.file = _file; load(); } private void load() { if (this.file.exists()) { try { BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(this.file))); try { this.stopList = new HashSet<String>(); String line = null; while ((line = reader.readLine()) != null) { // downcase before adding to list stopList.add(line.trim().toLowerCase()); } this.lastModified = file.lastModified(); } finally { reader.close(); } } catch (IOException e) { throw new OntopiaRuntimeException(e); } } else { this.stopList = new HashSet<String>(); } } public synchronized void addStopWord(String term) { added.add(term); stopList.add(term); } public synchronized void save() { if (!added.isEmpty()) { try { boolean reload = (file.lastModified() > lastModified); // make sure directories exists this.file.getParentFile().mkdirs(); // write black list to disk FileWriter writer = new FileWriter(file.getPath(), true); try { for (String term : added) { writer.write(term); writer.write('\n'); } } finally { added.clear(); writer.close(); } if (reload) load(); } catch (IOException e) { throw new OntopiaRuntimeException(e); } } } public void setStopFactor(double stopFactor) { this.stopFactor = stopFactor; } public synchronized boolean isStopWord(String word) { return stopList.contains(word); } public void analyzeTerm(Term term) { if (isStopWord(term.getStem())) term.multiplyScore(stopFactor, "blacklisted"); } public void startAnalysis(TermDatabase tdb) { } public void endAnalysis() { } }