package com.vistatec.ocelot.tm.okapi; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import net.sf.okapi.common.resource.TextFragment; import net.sf.okapi.tm.pensieve.common.TmHit; import com.google.inject.Inject; import com.vistatec.ocelot.config.ConfigService; import com.vistatec.ocelot.config.xml.TmManagement; import com.vistatec.ocelot.segment.model.SegmentAtom; import com.vistatec.ocelot.tm.TmMatch; import com.vistatec.ocelot.tm.TmPenalizer; import com.vistatec.ocelot.tm.TmService; /** * Use Okapi Pensieve to search the Lucene index. */ public class OkapiTmService implements TmService { private final OkapiTmManager manager; private final TmPenalizer penalizer; private final ConfigService cfgService; @Inject public OkapiTmService(OkapiTmManager manager, TmPenalizer penalizer, ConfigService cfgService) { this.manager = manager; this.penalizer = penalizer; this.cfgService = cfgService; } @Override public List<TmMatch> getFuzzyTermMatches(List<SegmentAtom> segment) throws IOException { Iterator<OkapiTmManager.TmPair> tmPairs = manager.getSeekers(); int pensieveThreshold = new Double(cfgService.getFuzzyThreshold()).intValue(); List<TmMatch> matches = new ArrayList<>(); while (tmPairs.hasNext()) { OkapiTmManager.TmPair tmPair = tmPairs.next(); if (checkTmEnabled(tmPair)) { List<TmHit> results = tmPair.getSeeker().searchFuzzy( new TextFragment(getSearchText(segment)), pensieveThreshold, cfgService.getMaxResults(), null); matches.addAll(convertOkapiTmHit(tmPair.getTmOrigin(), results)); } tmPair.getSeeker().close(); } return penalizer.applyPenalties(matches); } @Override public List<TmMatch> getConcordanceMatches(List<SegmentAtom> segment) throws IOException { Iterator<OkapiTmManager.TmPair> tmPairs = manager.getSeekers(); int pensieveThreshold = new Double(cfgService.getFuzzyThreshold()).intValue(); List<TmMatch> matches = new ArrayList<>(); while(tmPairs.hasNext()) { OkapiTmManager.TmPair tmPair = tmPairs.next(); if (checkTmEnabled(tmPair)) { List<TmHit> results = tmPair.getSeeker().searchSimpleConcordance( getSearchText(segment), pensieveThreshold, cfgService.getMaxResults(), null); matches.addAll(convertOkapiTmHit(tmPair.getTmOrigin(), results)); } tmPair.getSeeker().close(); } return penalizer.applyPenalties(matches); } public List<TmMatch> convertOkapiTmHit(String tmOrigin, List<TmHit> leverageResults) { List<TmMatch> matches = new ArrayList<>(); for (TmHit hit : leverageResults) { matches.add(new PensieveTmMatch(tmOrigin, hit)); } return matches; } private String getSearchText(List<SegmentAtom> segment) { StringBuilder searchText = new StringBuilder(); for (SegmentAtom atom : segment) { searchText.append(atom.getData()); } return searchText.toString(); } private boolean checkTmEnabled(OkapiTmManager.TmPair tmPair) { TmManagement.TmConfig config = this.manager.fetchTm(tmPair.getTmOrigin()); return config != null && config.isEnabled(); } }