/* * Copyright 2015 Themistoklis Mavridis <themis.mavridis@issel.ee.auth.gr>. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.thesmartweb.swebrank; /** * Class for Normalized Web Distance or Normalized Google distance calculation * http://arxiv.org/abs/0905.4039 * http://en.wikipedia.org/wiki/Normalized_Google_distance * @author Themis Mavridis */ public class NWD_Analysis { /** * Method that calculates the Normalized Web Distance (or former Normalized Google Distance) score * @param term1 the first term * @param term2 the second term * @param config_path the path were to find the api keys of the Search Engine we would like to use * @return the NWD or NGD score (closest to zero means closer resemblance) */ public double NWD_score(String term1, String term2, String config_path) { System.out.println("into ngd score"); Long M = 10000000000L; //802080446201L (2007) double freqx = logResults(term1, config_path); double freqy = logResults(term2, config_path); System.out.println("into taking results"); String xy = term1.concat("+").concat(term2); double freqxy = logResults(xy, config_path); if (freqx == Double.NEGATIVE_INFINITY || freqy == Double.NEGATIVE_INFINITY) { //deal with zero results = infinite logarithms return 1; //return 1 by definition } else { double num = Math.max(freqx, freqy) - freqxy; double den = Math.log10(M) - Math.min(freqx, freqy); double formula = num / den; return formula; } } /** * Method that calculates the Normalized Web Distance (or former Normalized Google Distance) score given the first terms frequency * @param term1 the first term * @param term2 the second term * @param config_path the path were to find the api keys of the Search Engine we would like to use * @param freqx the log10 of the amount of results of the first term * @return the NWD or NGD score (closest to zero means closer resemblance) */ public double NWD_score(String term1, String term2, String config_path, double freqx) { System.out.println("into ngd score"); Long M = 10000000000L; //802080446201L (2007) //double freqx = logResults(term1, config_path); double freqy = logResults(term2, config_path); System.out.println("into taking results"); String xy = term1.concat("+").concat(term2); double freqxy = logResults(xy, config_path); if (freqx == Double.NEGATIVE_INFINITY || freqy == Double.NEGATIVE_INFINITY) { //deal with zero results = infinite logarithms return 1; //return 1 by definition } else { double num = Math.max(freqx, freqy) - freqxy; double den = Math.log10(M) - Math.min(freqx, freqy); double formula = num / den; return formula; } } /** * Method that get the log of the number of results that correspond to a specific query in a search engine * @param term the term to get results number for * @param config_path the path were to find the api keys of the Search Engine we would like to use * @return the log of the of the number of results that correspond to a specific query in a search engine */ public double logResults(String term, String config_path) { //GoogleResults gr=new GoogleResults(); //YahooResults yr = new YahooResults(); //long sc = yr.Get_Results_Number(term); BingResults bg=new BingResults(); long sc=bg.Get_Results_Number(term, config_path); System.out.println(term+":"+sc); return Math.log10(sc); } }