/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package org.erasmusmc.rmi.peregrine.client; import java.util.HashMap; import java.util.List; import java.util.Map; import org.erasmusmc.peregrine.ResultConcept; import org.erasmusmc.peregrine.ResultTerm; import org.erasmusmc.peregrine.Tokenizer; public class RMIKnewcoPeregrineTest { public static void main(String[] args) throws Exception { RMIPeregrine peregrine = new RMIPeregrine("mojojojo.biosemantics.org", 1011, "RMIPeregrineServerService"); //RMIPeregrine peregrine = new RMIPeregrine("69.64.173.228", 1099, "KnewcoPeregrine"); peregrine.setDisambiguate(false); //peregrine.setDisambiguate(true); for(int i=0; i<1;i++) { String input = "KLK3"; peregrine.index( input ); System.out.println( XML(input, peregrine) ); //List<ResultTerm> resultTerms = peregrine.resultTerms; List<ResultConcept> resultConcepts = peregrine.resultConcepts; //Tokenizer tokenizer = peregrine.tokenizer; if (i % 100 == 0){ for(ResultConcept result: resultConcepts) { System.out.println(result.conceptId); } System.out.println("***"+i); for(ResultTerm result: peregrine.resultTerms) { System.out.println(result.term.termId); for(String str: peregrine.tokenizer.tokens) { System.out.println(str); } } } } } public static String XML(String input, RMIPeregrine peregrine){ String result = ""; Integer NrOfTerms = 0; Integer EndOfSentence = 0; Double MaxNrOfTerms = 0.0; Double Rank = 0.0; Map<ResultTerm, Integer> term2clid = new HashMap<ResultTerm, Integer>(); if ( peregrine == null ){ return result; } for (Integer i = 0; i < peregrine.resultTerms.size(); i++){ term2clid.put(peregrine.resultTerms.get(i), i); } Tokenizer tokenizer = peregrine.tokenizer; result += "<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?>"; result += "<fingerprint>"; result += "<concepts count=\""+peregrine.resultConcepts.size()+ "\" clusters=\""+ peregrine.resultTerms.size()+"\"/>"; Integer LineCount = peregrine.tokenizer.endOfSentence.size(); result += "<lineslist count=\"" + LineCount + "\">"; Integer StartPos = 0; Integer LineLength = 0; for (Integer i = 0 ; i < LineCount ; i++ ){ EndOfSentence = peregrine.tokenizer.endOfSentence.get(i); if ( EndOfSentence > 0 ){ LineLength = peregrine.tokenizer.endpositions.get(EndOfSentence-1) - StartPos + 1; int e = input.length(); if ( (StartPos+LineLength) < e ){ char ch = input.charAt(StartPos + LineLength); while ( ( (StartPos+LineLength) < e ) && ch != '.' && (int)ch != 10 && ch != '!' && ch != '?' && ch != ';' ){ LineLength++; ch = input.charAt(StartPos + LineLength); } } } else { LineLength = 0; } result += "<line startpos=\"" + (StartPos+1) + "\" length=\"" + LineLength + "\"/>"; StartPos = StartPos + LineLength + 1; } result += "</lineslist>"; for (ResultConcept concept : peregrine.resultConcepts){ NrOfTerms = concept.terms.size(); if ( NrOfTerms > MaxNrOfTerms ){ MaxNrOfTerms = NrOfTerms.doubleValue(); } } for (ResultConcept concept : peregrine.resultConcepts){ Rank = concept.terms.size()/MaxNrOfTerms; result += "<concept id=\""+concept.conceptId+"\" rank=\""+Rank+"\" freq=\""+concept.terms.size()+"\">"; for (ResultTerm term : concept.terms){ int clid = term2clid.get(term); for (int word : term.words){ result += "<word clid=\""+Integer.toString(clid+1)+ "\" pos=\""+ Integer.toString(tokenizer.startpositions.get(word)+1)+ "\" len=\""+ tokenizer.tokens.get(word).length()+ "\">"+tokenizer.tokens.get(word)+"</word>"; } } result += "</concept>"; } result += "</fingerprint>"; return result; } }