/* * Encog(tm) Core v3.4 - Java Version * http://www.heatonresearch.com/encog/ * https://github.com/encog/encog-java-core * Copyright 2008-2016 Heaton Research, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For more information on Heaton Research copyrights, licenses * and trademarks visit: * http://www.heatonresearch.com/copyright */ package org.encog.app.analyst.csv.filter; import java.io.File; import java.io.PrintWriter; import java.util.ArrayList; import java.util.List; import org.encog.app.analyst.csv.basic.BasicFile; import org.encog.app.analyst.csv.basic.LoadedRow; import org.encog.util.csv.CSVFormat; import org.encog.util.csv.ReadCSV; /** * This class can be used to remove certain rows from a CSV. You can remove rows * where a specific field has a specific value * */ public class FilterCSV extends BasicFile { /** * The excluded fields. */ private final List<ExcludedField> excludedFields = new ArrayList<ExcludedField>(); /** * A count of the filtered rows. */ private int filteredCount; /** * Analyze the file. * * @param inputFile * The name of the input file. * @param headers * True, if headers are expected. * @param format * The format. */ public void analyze(final File inputFile, final boolean headers, final CSVFormat format) { setInputFilename(inputFile); setExpectInputHeaders(headers); setInputFormat(format); setAnalyzed(true); performBasicCounts(); } /** * Exclude rows where the specified field has the specified value. * * @param fieldNumber * The field number. * @param fieldValue * The field value. */ public void exclude(final int fieldNumber, final String fieldValue) { this.excludedFields.add(new ExcludedField(fieldNumber, fieldValue)); } /** * @return A list of the fields and their values, that should be excluded. */ public List<ExcludedField> getExcluded() { return this.excludedFields; } /** * @return A count of the filtered rows. This is the resulting line count * for the output CSV. */ public int getFilteredRowCount() { return this.filteredCount; } /** * Process the input file. * * @param outputFile * The output file to write to. */ public void process(final File outputFile) { final ReadCSV csv = new ReadCSV(getInputFilename().toString(), isExpectInputHeaders(), getFormat()); final PrintWriter tw = prepareOutputFile(outputFile); this.filteredCount = 0; resetStatus(); while (csv.next() && !shouldStop()) { updateStatus(false); final LoadedRow row = new LoadedRow(csv); if (shouldProcess(row)) { writeRow(tw, row); this.filteredCount++; } } reportDone(false); tw.close(); csv.close(); } /** * Determine if the specified row should be processed, or not. * * @param row * The row. * @return True, if the row should be processed. */ private boolean shouldProcess(final LoadedRow row) { for (final ExcludedField field : this.excludedFields) { if (row.getData()[field.getFieldNumber()].trim().equals( field.getFieldValue().trim())) { return false; } } return true; } }