package com.vistatec.ocelot.findrep; import java.text.BreakIterator; import java.util.ArrayList; import java.util.List; import java.util.Locale; import com.vistatec.ocelot.segment.model.OcelotSegment; import com.vistatec.ocelot.segment.model.SegmentAtom; import com.vistatec.ocelot.segment.model.TextAtom; /** * This class provides methods for searching text occurrences in Ocelot * segments. */ public class WordFinder { /** The case sensitive option constant. */ public static final int CASE_SENSITIVE_OPTION = 0; /** The whole word option constant. */ public static final int WHOLE_WORD_OPTION = 1; /** The wrap search option constant. */ public static final int WRAP_SEARCH_OPTION = 2; /** None scope constant. */ public static final int SCOPE_NONE = 0; /** The source scope constant. */ public static final int SCOPE_SOURCE = 1; /** The target scope constant. */ public static final int SCOPE_TARGET = 2; /** The direction down constant. */ public static final int DIRECTION_DOWN = 0; /** The direction up constant. */ public static final int DIRECTION_UP = 1; /** The number of available options. */ private static final int AVAILABLE_OPTIONS_COUNT = 3; /** An index is assigned this value when it is reset. */ private static final int RESET_VALUE = -2; /** The list of options. */ private boolean[] options; /** The search scope. */ private int scope; /** The search direction. */ private int direction; /** Current segment index. */ private int currSegIndex; /** Current atom index. */ private int currAtomIndex; /** Current offset index. */ private int currOffsetIdx; /** previous text boundary. */ private int prevBoundary; /** current text boundary. */ private int currBoundary; /** first text boundary. */ private int firstBoundary; /** whole word found so far. */ private String currWholeWord = ""; /** The break iterator finding word boundaries. */ private BreakIterator breakIt; /** The list of results. */ private List<FindResult> allResults; /** The index of the current result. */ private int currResultIndex = -1; /** * Constructor. */ public WordFinder() { options = new boolean[AVAILABLE_OPTIONS_COUNT]; } /** * Reset all the fields. */ public void reset() { System.out.println("RESET"); goToStartOfDocument(); allResults = null; currResultIndex = -1; breakIt = null; scope = SCOPE_NONE; direction = DIRECTION_DOWN; options = new boolean[AVAILABLE_OPTIONS_COUNT]; } /** * Goes to the start of the document. If the search direction is set to * <code>DIRECTION_DOWN</code>, then the search will restart from the * beginning of the document; if the direction is <code>DIRECTION_UP</code>, * then the search will restart from the end of the document. */ public void goToStartOfDocument() { currSegIndex = RESET_VALUE; currAtomIndex = RESET_VALUE; currOffsetIdx = RESET_VALUE; resetBoundaries(); } /** * Resets the word boundaries. */ private void resetBoundaries() { currBoundary = RESET_VALUE; prevBoundary = -1; if (currWholeWord.isEmpty()) { firstBoundary = -1; } } /** * Sets the search direction. * * @param direction * the search direction. Available directions are: * <code>FindAndReplaceManager.DIRECTION_DOWN</code> and * <code>FindAndReplaceManager.DIRECTION_UP</code>. */ public void setDirection(int direction) { if (direction != DIRECTION_DOWN && direction != DIRECTION_UP) { throw new IllegalArgumentException("Invalid direction value: " + direction + ". Accepted values are " + DIRECTION_DOWN + " and " + DIRECTION_UP + "."); } this.direction = direction; } /** * Enables/disables a specific option * * @param optionType * the option * @param enable * a boolean stating if the option has to be enabled or disabled. */ public void enableOption(int optionType, boolean enable) { if (optionType < AVAILABLE_OPTIONS_COUNT) { options[optionType] = enable; } } /** * Sets the search scope. * * @param scope * the search scope. Available values are * <code>FindAndReplaceManager.SCOPE_SOURCE</code> and * <code>FindAndReplaceManager.SCOPE_TARGET</code> * @param locale * the locale related to the specific scope. */ public void setScope(int scope, Locale locale) { System.out.println("SCOPE"); if (scope != SCOPE_SOURCE && scope != SCOPE_TARGET) { throw new IllegalArgumentException("Invalid scope value: " + scope + ". Accepted values are " + SCOPE_SOURCE + " and " + SCOPE_TARGET + "."); } if (this.scope != scope) { this.scope = scope; breakIt = BreakIterator.getWordInstance(locale); goToStartOfDocument(); allResults = null; currResultIndex = -1; } } /** * Finds all occurrences of a text in the Ocelot segments. * * @param text * the text * @param segments * the Ocelot segments * @return the list of results. */ public List<FindResult> findWord(String text, List<OcelotSegment> segments) { allResults = new ArrayList<FindResult>(); goToStartOfDocument(); while (findNextWord(text, segments)) { allResults.add(getCurrentResult()); } if (!allResults.isEmpty()) { if (direction == DIRECTION_DOWN) { currResultIndex = 0; } else { currResultIndex = allResults.size() - 1; } } return allResults; } /** * Finds the next word in the Ocelot segments. * * @param text * the string to be searched. * @param segments * the Ocelot segments. * @return <code>true</code> if the string has been found: * <code>false</code> otherwise. */ private boolean findNextWord(String text, List<OcelotSegment> segments) { boolean found = false; if (options[WHOLE_WORD_OPTION]) { found = findNextWholeWord(text, segments); } else { found = findNextOccurrence(text, segments); } return found; } /** * Finds the next occurrence of a specific string * * @param occurrence * the string to be found * @param segments * the Ocelot segments. * @return <code>true</code> if the occurrence has been found; * <code>false</code> otherwise. */ private boolean findNextOccurrence(String occurrence, List<OcelotSegment> segments) { boolean found = false; adjustSegIndex(segments); while (((direction == DIRECTION_DOWN && currSegIndex < segments.size()) || (direction == DIRECTION_UP && currSegIndex >= 0)) && !found) { List<SegmentAtom> atoms = getAtomsFromSegment(segments .get(currSegIndex)); String text = null; if (atoms != null) { adjustAtomIndex(atoms); while (((direction == DIRECTION_DOWN && currAtomIndex < atoms .size()) || (direction == DIRECTION_UP && currAtomIndex >= 0)) && !found) { if (atoms.get(currAtomIndex) instanceof TextAtom) { text = atoms.get(currAtomIndex).getData(); if (text != null) { adjustOffset(text); found = findNextOccurrence(occurrence, text); } } if (!found) { incrementAtomIndex(); currOffsetIdx = RESET_VALUE; resetBoundaries(); } } } if (text == null || !found) { incrementSegmentIndex(segments.size()); currAtomIndex = RESET_VALUE; currOffsetIdx = RESET_VALUE; resetBoundaries(); } } return found; } /** * Adjust the offset value in case it was reset. * * @param text * the text that the offset relates to */ private void adjustOffset(String text) { if (currOffsetIdx == RESET_VALUE) { if (direction == DIRECTION_DOWN || text.isEmpty()) { currOffsetIdx = 0; } else { currOffsetIdx = text.length() - 1; } } } /** * Finds the next occurrence of a specific string in a text. * * @param occurrence * the string to be found * @param text * the text * @return <code>true</code> if the occurrence has been found; * <code>false</code> otherwise. */ private boolean findNextOccurrence(String occurrence, String text) { boolean found = false; while (((direction == DIRECTION_DOWN && currOffsetIdx < text.length()) || (direction == DIRECTION_UP && currOffsetIdx >= 0)) && !found) { found = text.regionMatches(!options[CASE_SENSITIVE_OPTION], currOffsetIdx, occurrence, 0, occurrence.length()); if (found) { if (direction == DIRECTION_DOWN) { firstBoundary = currOffsetIdx; currBoundary = currOffsetIdx + occurrence.length(); } else { currBoundary = currOffsetIdx; firstBoundary = currOffsetIdx + occurrence.length(); } } incrementOffsetIndex(); } return found; } /** * Properly increments or decrements the offset index depending on the * search direction. */ private void incrementOffsetIndex() { if (direction == DIRECTION_DOWN) { currOffsetIdx++; } else { currOffsetIdx--; } } /** * Finds the next whole word in the Ocelot segments list. * * @param word * the word to be searched * @param segments * the Ocelot segments * @return <code>true</code> if the word is found; <code>false</code> * otherwise */ private boolean findNextWholeWord(String word, List<OcelotSegment> segments) { boolean found = false; adjustSegIndex(segments); while (((direction == DIRECTION_DOWN && currSegIndex < segments.size()) || (direction == DIRECTION_UP && currSegIndex >= 0)) && !found) { List<SegmentAtom> atoms = getAtomsFromSegment(segments .get(currSegIndex)); String text = null; if (atoms != null) { adjustAtomIndex(atoms); while (((direction == DIRECTION_DOWN && currAtomIndex < atoms .size()) || (direction == DIRECTION_UP && currAtomIndex >= 0)) && !found) { if (atoms.get(currAtomIndex) instanceof TextAtom) { text = atoms.get(currAtomIndex).getData(); if (text != null) { adjustBoundaries(text); found = findNextWholeWord(word, text); } } if (!found) { incrementAtomIndex(); resetBoundaries(); } } } if (text == null || !found) { currWholeWord = ""; incrementSegmentIndex(segments.size()); currAtomIndex = RESET_VALUE; resetBoundaries(); } } return found; } /** * Increments or decrements the segment index depending on the search * direction * * @param segCount * the number of segments. */ private void incrementSegmentIndex(int segCount) { if (direction == DIRECTION_DOWN) { currSegIndex++; } else { currSegIndex--; } } /** * Increments or decrements the atom index depending on the search * direction. */ private void incrementAtomIndex() { if (direction == DIRECTION_DOWN) { currAtomIndex++; } else { currAtomIndex--; } } /** * Adjusts boundaries for the text in case they were reset. * * @param text * the text */ private void adjustBoundaries(String text) { if (currBoundary == RESET_VALUE) { if (direction == DIRECTION_DOWN || text.isEmpty()) { currBoundary = 0; } else { currBoundary = text.length() - 1; } } } /** * Finds the next whole word in a text. * * @param word * the word to be searched * @param text * the text * @return <code>true</code> if the word has been found; <code>false</code> * otherwise */ private boolean findNextWholeWord(String word, String text) { boolean found = false; breakIt.setText(text); prevBoundary = currBoundary; if (direction == DIRECTION_DOWN) { if (prevBoundary == -1) { currBoundary = breakIt.first(); } else { currBoundary = breakIt.following(prevBoundary); } } else { if (prevBoundary == -1) { currBoundary = breakIt.last(); } else { currBoundary = breakIt.preceding(prevBoundary); } } while (currBoundary != BreakIterator.DONE && !found) { String subString = null; if (currWholeWord.isEmpty()) { firstBoundary = prevBoundary; } if (direction == DIRECTION_DOWN) { subString = text.substring(prevBoundary, currBoundary); currWholeWord += subString; } else { subString = text.substring(currBoundary, prevBoundary); currWholeWord = subString + currWholeWord; } found = checkCurrWordEqualsText(word, currWholeWord); if (!found) { if (!checkWholeWordSubstring(word)) { currWholeWord = ""; } prevBoundary = currBoundary; if (direction == DIRECTION_DOWN) { currBoundary = breakIt.next(); } else { currBoundary = breakIt.previous(); } } else { currWholeWord = ""; } } return found; } /** * Checks if the current whole word found is a substring of the word to be * found. It manages the case the "whole word" option is set and the user * requests for finding a text composed by many whole words (ex. * "word_1 word_2... word_n"). * * @param wordToFind * the word (or concatenation of words) to find. * @return <code>true</code> if the found word is a substring of the word to * be searched; <code>false</code> otherwise */ private boolean checkWholeWordSubstring(String wordToFind) { boolean retValue = false; String currWholeWordCS = currWholeWord; if (options[CASE_SENSITIVE_OPTION]) { wordToFind = wordToFind.toLowerCase(); currWholeWordCS = currWholeWord.toLowerCase(); } if (direction == DIRECTION_DOWN) { retValue = wordToFind.startsWith(currWholeWordCS); } else { retValue = wordToFind.endsWith(currWholeWordCS); } return retValue; } /** * Checks if the two strings passed as parameter are the same string. * * @param text * the first string * @param currWord * the second string * @return <code>true</code> if they are the same string; <code>false</code> * otherwise */ private boolean checkCurrWordEqualsText(String text, String currWord) { boolean equal = false; if (!options[CASE_SENSITIVE_OPTION]) { equal = text.equalsIgnoreCase(currWord); } else { equal = text.equals(currWord); } return equal; } /** * Adjusts the atom index in case it was reset. * * @param atoms * the list of atoms. */ private void adjustAtomIndex(List<SegmentAtom> atoms) { if (currAtomIndex == RESET_VALUE) { if (direction == DIRECTION_DOWN || atoms.isEmpty()) { currAtomIndex = 0; } else { currAtomIndex = atoms.size() - 1; } } else { if (currAtomIndex == -1 && direction == DIRECTION_DOWN) { currAtomIndex++; } else if (currAtomIndex == atoms.size() && direction == DIRECTION_UP) { currAtomIndex--; } else if (currAtomIndex == -1 && direction == DIRECTION_UP && options[WRAP_SEARCH_OPTION]) { currAtomIndex = atoms.size() - 1; } else if (currAtomIndex == atoms.size() && direction == DIRECTION_DOWN && options[WRAP_SEARCH_OPTION]) { currAtomIndex = 0; } } } /** * Adjusts the segment index in case it was reset. * * @param segments * the list of segments. */ private void adjustSegIndex(List<OcelotSegment> segments) { if (currSegIndex == RESET_VALUE) { if (direction == DIRECTION_DOWN || segments == null || segments.isEmpty()) { currSegIndex = 0; } else { currSegIndex = segments.size() - 1; } } else { if (currSegIndex == segments.size() && direction == DIRECTION_UP) { currSegIndex--; } else if (currSegIndex == -1 && direction == DIRECTION_DOWN) { currSegIndex++; } else if (currSegIndex == -1 && direction == DIRECTION_UP && options[WRAP_SEARCH_OPTION]) { currSegIndex = segments.size() - 1; } else if (currSegIndex == segments.size() && direction == DIRECTION_DOWN && options[WRAP_SEARCH_OPTION]) { currSegIndex = 0; } } } /** * Gets the proper list of atoms defined in a segment depending on the scope * selected. * * @param segment * the segment * @return the propert list of atoms */ private List<SegmentAtom> getAtomsFromSegment(OcelotSegment segment) { List<SegmentAtom> atoms = null; if (scope == SCOPE_SOURCE) { atoms = segment.getSource().getAtoms(); } else if (segment.getTarget() != null) { atoms = segment.getTarget().getAtoms(); } return atoms; } /** * Gets the found word first index. * * @return the found word first index. */ private int getWordFirstIndex() { if (direction == DIRECTION_DOWN) { return firstBoundary; } else { return currBoundary; } } /** * Gets the found word last index. * * @return the found word last index. */ private int getWordLastIndex() { if (direction == DIRECTION_DOWN) { return currBoundary; } else { return firstBoundary; } } /** * Gets the current result. * * @return the current result. */ public FindResult getCurrentResult() { return new FindResult(currSegIndex, currAtomIndex, getWordFirstIndex(), getWordLastIndex(), scope == SCOPE_TARGET); } /** * Gets all the results. * * @return the list of results. */ public List<FindResult> getAllResults() { return allResults; } /** * Clears the list of results. */ public void clearAllResults() { allResults = null; } /** * Gets the index in the list of the current result. * * @return the index in the list of the current result. */ public int getCurrentResIndex() { return currResultIndex; } /** * Move the index to the next result. */ public void goToNextResult() { if (allResults != null) { if (currResultIndex == -1 && direction == DIRECTION_UP && options[WRAP_SEARCH_OPTION]) { currResultIndex = allResults.size() - 1; } else if (currResultIndex == allResults.size() && direction == DIRECTION_DOWN && options[WRAP_SEARCH_OPTION]) { currResultIndex = 0; } else if (currResultIndex == -1 && direction == DIRECTION_DOWN) { currResultIndex++; } else if (currResultIndex == allResults.size() && direction == DIRECTION_UP) { currResultIndex--; } else if (currResultIndex > -1 && currResultIndex < allResults.size()) { if (direction == DIRECTION_DOWN) { if (options[WRAP_SEARCH_OPTION]) { currResultIndex = (currResultIndex + 1) % allResults.size(); } else { currResultIndex++; } } else { currResultIndex--; if (options[WRAP_SEARCH_OPTION] && currResultIndex == -1) { currResultIndex = allResults.size() - 1; } } } } else { currResultIndex = -1; } } /** * Removes the current result from the list. */ public void removeCurrentResult() { if (allResults != null && currResultIndex > -1) { allResults.remove(currResultIndex); currResultIndex = -1; } } /** * Adjusts results boundaries when the current occurrence is replaced by a * new string. * * @param newString * the new string. */ public void replacedString(String newString) { if (allResults != null && currResultIndex > -1 && currResultIndex < allResults.size()) { FindResult currRes = allResults.get(currResultIndex); int resIdx = currResultIndex + 1; FindResult nextRes = null; int delta = newString.length() - (currRes.getStringEndIndex() - currRes .getStringStartIndex()); while (resIdx < allResults.size()) { nextRes = allResults.get(resIdx++); if (nextRes.getSegmentIndex() == currRes.getSegmentIndex() && nextRes.getAtomIndex() == currRes.getAtomIndex()) { nextRes.setStringStartIndex(nextRes.getStringStartIndex() + delta); nextRes.setStringEndIndex(nextRes.getStringEndIndex() + delta); } } } } }