/* * #! * Ontopia Classify * #- * Copyright (C) 2001 - 2013 The Ontopia Project * #- * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * !# */ package net.ontopia.topicmaps.classify; /** * INTERNAL: */ public class DistanceAnalyzer extends AbstractDocumentAnalyzer { protected double high; protected int termCount; public DistanceAnalyzer() { super(2); } public void startDocument(Document doc) { super.startDocument(doc); if (this.iteration == 1) this.high = 0d; else this.termCount = 0; } public void endDocument(Document doc) { // calculate high if (this.iteration == 1) this.high = Math.log(termCount); } public void analyzeToken(TextBlock parent, Token token, int index) { // ignore non variant tokens if (token.getType() != Token.TYPE_VARIANT) return; // count term this.termCount++; // adjust score by distance from start if (this.iteration > 1) { Term term = ((Variant)token).getTerm(); double score = term.getScore(); // if no existing score add distance score if (score > 0d) term.addScore(high - Math.log(termCount), "distance adjustment"); } } }