/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.text.preprocessing.filter; import java.util.*; import org.carrot2.shaded.guava.common.collect.Lists; /** * Filters out phrases that are not left complete. */ class LeftCompleteLabelFilter extends CompleteLabelFilterBase { int [] createLcp(List<LabelIndexWithCodes> phraseCodes) { int [] lcpArray = new int [phraseCodes.size()]; for (int i = 0; i < phraseCodes.size() - 1; i++) { int [] codes = phraseCodes.get(i).getCodes(); int [] nextCodes = phraseCodes.get(i + 1).getCodes(); int minLength = Math.min(codes.length, nextCodes.length); for (int j = 1; j <= minLength; j++) { if (codes[codes.length - j] != nextCodes[nextCodes.length - j]) { break; } lcpArray[i]++; } } lcpArray[lcpArray.length - 1] = -1; return lcpArray; } List<LabelIndexWithCodes> sortPhraseCodes( List<LabelIndexWithCodes> phrasesWithCodes) { final ArrayList<LabelIndexWithCodes> sortedPhraseCodes = Lists .newArrayList(phrasesWithCodes); Collections.sort(sortedPhraseCodes, new Comparator<LabelIndexWithCodes>() { public int compare(LabelIndexWithCodes o1, LabelIndexWithCodes o2) { int [] codesA = o1.getCodes(); int [] codesB = o2.getCodes(); int minLength = Math.min(codesA.length, codesB.length); for (int i = 1; i <= minLength; i++) { if (codesA[codesA.length - i] < codesB[codesB.length - i]) { return -1; } else if (codesA[codesA.length - i] > codesB[codesB.length - i]) { return 1; } } if (codesA.length < codesB.length) { return -1; } else if (codesA.length > codesB.length) { return 1; } return 0; } }); return sortedPhraseCodes; } }