package au.com.acpfg.misc.StringMatcher; import java.util.ArrayList; import java.util.List; import org.knime.core.data.DataCell; import org.knime.core.data.DataType; import org.knime.core.data.collection.CollectionCellFactory; import org.knime.core.data.def.IntCell; import org.knime.core.data.def.StringCell; import org.knime.core.data.vector.bitvector.DenseBitVector; /** * This implementation offers no guarantees about which overlapping match is reported, but it works * based on a "left-most" match is likely to be reported first. * * @author andrew.cassin * */ public class NonOverlappingMatchesReporter implements MatchReporter { private boolean m_report_cnt; public NonOverlappingMatchesReporter(boolean report_count) { m_report_cnt = report_count; } @Override public DataCell report(StringMatcherNodeModel m, String str) throws Exception { List<Extent> match_pos = m.getMatchPos(); List<String> matches = m.getMatches(); if (matches == null || match_pos == null) { return DataType.getMissingCell(); } if (matches.size() < 1) { return m_report_cnt ? new IntCell(0) : DataType.getMissingCell(); } assert(matches.size() == match_pos.size()); // every match must have a corresponding position! DenseBitVector bv = new DenseBitVector(str.length()); ArrayList<StringCell> vec = new ArrayList<StringCell>(); for (int i=0; i<matches.size(); i++) { Extent e = match_pos.get(i); String s = matches.get(i); long next_set_bit = bv.nextSetBit(e.m_start); if (next_set_bit >= 0 && next_set_bit < (long) e.m_end) { // skip match as its an overlapping one } else { bv.set(e.m_start, e.m_end); vec.add(new StringCell(s)); } } return m_report_cnt ? new IntCell(vec.size()) : CollectionCellFactory.createListCell(vec); } }