/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package org.erasmusmc.peregrine; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.erasmusmc.ontology.Concept; import org.erasmusmc.ontology.ConceptVector; import org.erasmusmc.ontology.Ontology; import org.erasmusmc.userinterface.components.Occurrence; /** Converts the output of a Peregrine indexation into various different formats.*/ public class PeregrineOutputConverter { //This method translates the index results into an array of Occurences: public static Occurrence[] resultOccurrences(AbstractPeregrine peregrine){ int count = 0; Ontology ontology = peregrine.ontology; for (ResultConcept concept : peregrine.resultConcepts){ for (ResultTerm term : concept.terms){ count += term.words.length; } } Occurrence[] result = new Occurrence[count]; count = 0; Occurrence occurrence; for (ResultConcept concept : peregrine.resultConcepts){ for (ResultTerm term : concept.terms){ for (int word : term.words){ occurrence = new Occurrence(); occurrence.concept = ontology.getConcept(concept.conceptId); //Remove this!!!! //if (occurrence.concept == null) occurrence.concept = new Concept(1); occurrence.cui = occurrence.concept.getID(); occurrence.start = peregrine.tokenizer.startpositions.get(word); occurrence.end = peregrine.tokenizer.endpositions.get(word); result[count] = occurrence; count++; } } } return result; } //This method translates the index results into a simple text representation: public static List<String> simpleText(ConceptPeregrine peregrine){ List<String> result = new ArrayList<String>(peregrine.resultConcepts.size()); Ontology ontology = peregrine.ontology; for (ResultConcept concept : peregrine.resultConcepts){ result.add(concept.terms.size()+"\t"+concept.conceptId+"\t"+ontology.getConcept(concept.terms.get(0).term.conceptId[0]).getName()); } return result; } //Converts the index results into an XML format public static List<String> XML(ConceptPeregrine peregrine){ List<String> result = new ArrayList<String>(); Map<ResultTerm, Integer> term2clid = new HashMap<ResultTerm, Integer>(); for (Integer i = 0; i < peregrine.resultTerms.size(); i++) term2clid.put(peregrine.resultTerms.get(i), i); Ontology ontology = peregrine.ontology; Tokenizer tokenizer = peregrine.tokenizer; result.add("<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?>"); if (ontology == null || ontology.getName() == "") result.add("<fingerprint>"); else result.add("<fingerprint thesaurus=\""+ontology.getName()+"\">"); result.add(" <concepts count=\""+peregrine.resultConcepts.size()+ "\" clusters=\""+ peregrine.resultTerms.size()+"\"/>"); for (ResultConcept concept : peregrine.resultConcepts){ result.add(" <concept id=\""+concept.conceptId+"\" rank=\""+concept.terms.size()+"\">"); if (ontology != null){ Concept conceptstore = ontology.getConcept(concept.conceptId); String name = ""; if (conceptstore.getName().equals("")){ if (conceptstore.getTerms().size() != 0) name = conceptstore.getTerms().get(0).text; }else name = conceptstore.getName(); if (!name.equals("")) result.add(" <name>"+name+"</name>"); } for (ResultTerm term : concept.terms){ int clid = term2clid.get(term); for (int word : term.words){ result.add(" <word clid=\""+Integer.toString(clid+1)+ "\" pos=\""+ Integer.toString(tokenizer.startpositions.get(word)+1)+ "\" len=\""+ tokenizer.tokens.get(word).length()+ "\">"+tokenizer.tokens.get(word)+"</word>"); } } result.add(" </concept>"); } result.add("</fingerprint>"); return result; } //Converts the index results into a simple XML format public static String simpleXML(ConceptPeregrine peregrine){ StringBuffer result = new StringBuffer(); //result.add("<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?>"); result.append("<fingerprint thesaurus=\""+peregrine.ontologyName+"\">"); for (ResultConcept concept : peregrine.resultConcepts){ result.append("<concept id=\""+concept.conceptId+"\" rank=\""+concept.terms.size()+"\"/>"); } result.append("</fingerprint>"); return result.toString(); } //Converts the index results into a RobFingerprint for STORING in the Groundhog public static ConceptVector convertResult2ConceptVector(AbstractPeregrine peregrine, Ontology ontology){ ConceptVector conceptVector = new ConceptVector(ontology); for (ResultConcept concept : peregrine.resultConcepts){ conceptVector.add(concept.conceptId,concept.terms.size()); } return conceptVector; } }