/* * Copyright 2015 Themistoklis Mavridis <themis.mavridis@issel.ee.auth.gr>. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.thesmartweb.swebrank; import java.util.*; /** * Class regarding Visibility score http://www.advancedwebranking.com/user-guide/html/en/ch08s06.html * @author Themistoklis Mavridis */ public class VisibilityScore { /** * Method to get all the links in a single array from all the search engines (duplicates removed) * @param links1 the links of one engine * @param links2 the links of another engine * @param links3 the links of another engine * @return an array with all the links */ public String[] perform(String[] links1,String[] links2,String[] links3){ //gets as input the links from the engines and gets them to lists in order to combine them List<String> link_list1=Arrays.asList(links1); List<String> link_list2=Arrays.asList(links2); List<String> link_list3=Arrays.asList(links3); List<String> link_list_total=new ArrayList<>(); link_list_total.addAll(link_list1); link_list_total.addAll(link_list2); link_list_total.addAll(link_list3); //********remove duplicate words************ //Create a HashSet which allows no duplicates HashSet<String> hashSet = new HashSet<>(link_list_total); //Assign the HashSet to a new ArrayList ArrayList<String> arrayList = new ArrayList<>(hashSet); arrayList.removeAll(Collections.singleton(null)); String[] links = arrayList.toArray(new String[arrayList.size()]); return links; } /** * Method to compute the visibility score and rank the links according to it * @param links All the links in an array * @param links1 the links of one engine * @param links2 the links of another engine * @param links3 the links of another engine * @param top_visible the amount of links to keep in the result * @return the links sorted according to their visibility score */ public String[] visibility_score(String[] links,String[] links1,String[] links2,String[] links3,int top_visible){ Integer[] scores=new Integer[links.length]; for(int i=0;i<0;i++){scores[i]=0;} //we compare every link of the merged engine with the links of each engine and when we found it in an engine we give the link a score of the place we found it //as a result links with small score are better in the rank and links with high score worse for(int j=0;j<links.length;j++){ for (int i=0;i<links1.length;i++){ if(links[j]!=null&&links1[i]!=null){ if(links[j].equalsIgnoreCase(links1[i])){ scores[j]=i; } } } for (int i=0;i<links2.length;i++){ if(links[j]!=null&&links2[i]!=null){ if(links[j].equalsIgnoreCase(links2[i])){ scores[j]=i; } } } for (int i=0;i<links1.length;i++){ if(links[j]!=null&&links3[i]!=null){ if(links[j].equalsIgnoreCase(links3[i])){ scores[j]=i; } } } } List<Integer> scores_list=Arrays.asList(scores); //create a hashmap in order to map the scores with the indexes IdentityHashMap<Integer, Integer> originalIndices = new IdentityHashMap<>(); //copy the original scores list for(int i2=0; i2<scores_list.size(); i2++) {originalIndices.put(scores_list.get(i2), i2);} //sort the scores List<Integer> sorted_scores = new ArrayList<>(); sorted_scores.addAll(scores_list); sorted_scores.removeAll(Collections.singleton(null)); Collections.sort(sorted_scores); //to get the top int draw_counter=0; for(int i=top_visible;i<sorted_scores.size();i++){ Integer score_previous=sorted_scores.get(i-1); Integer score_current=sorted_scores.get(i); if(score_current.compareTo(score_previous)==0){draw_counter++;} } int[] origIndex=new int[top_visible+draw_counter]; for(int i=0; i<origIndex.length; i++) { Integer score = sorted_scores.get(i); // Lookup original index efficiently origIndex[i] = originalIndices.get(score); } int size_links_out=0; if(top_visible<origIndex.length){ size_links_out=top_visible; } else{ size_links_out=origIndex.length; } String[] links_out=new String[size_links_out]; List<Integer> noDup = new ArrayList<>(); noDup.add(origIndex[0]); for (int c = 1; c < origIndex.length-1; c++){ if(!noDup.contains(origIndex[c])){ noDup.add(origIndex[c]); } } int[] noDupArray = new int[noDup.size()]; for(int i = 0; i < noDup.size(); i++){noDupArray[i] = noDup.get(i);} if(top_visible<noDupArray.length){size_links_out=top_visible;} else{size_links_out=noDupArray.length;} for(int j=0;j<size_links_out;j++){ links_out[j]=links[noDupArray[j]]; } return links_out; } }