/* --------------------------------------------------------------------- * Numenta Platform for Intelligent Computing (NuPIC) * Copyright (C) 2014, Numenta, Inc. Unless you have an agreement * with Numenta, Inc., for a separate license for this software code, the * following terms and conditions apply: * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero Public License version 3 as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Affero Public License for more details. * * You should have received a copy of the GNU Affero Public License * along with this program. If not, see http://www.gnu.org/licenses. * * http://numenta.org/licenses/ * --------------------------------------------------------------------- */ package org.numenta.nupic.network.sensor; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Serializable; import java.nio.charset.Charset; import java.nio.file.Files; import java.util.jar.JarFile; import java.util.stream.Stream; import org.numenta.nupic.network.Network; /** * Default implementation of a {@link Sensor} for inputting data from * a file. * * All {@link Sensor}s represent the bottom-most level of any given {@link Network}. * Sensors are used to connect to a data source and feed data into the Network, therefore * there are no nodes beneath them or which precede them within the Network hierarchy, in * terms of data flow. In fact, a Sensor will throw an {@link Exception} if an attempt to * connect another node to the input of a node containing a Sensor is made. * * @author David Ray * @see SensorFactory * @see Sensor#create(SensorFactory, SensorParams) */ public class FileSensor implements Sensor<File>, Serializable { private static final long serialVersionUID = 1L; private static final int HEADER_SIZE = 3; private static final int BATCH_SIZE = 20; // This is OFF until Encoders are made concurrency safe private static final boolean DEFAULT_PARALLEL_MODE = false; private transient BatchedCsvStream<String[]> stream; private SensorParams params; /** * Private constructor. Instances of this class should be obtained * through the {@link #create(SensorParams)} factory method. * * @param params */ private FileSensor(SensorParams params) { this.params = params; if(!params.hasKey("PATH")) { throw new IllegalArgumentException("Passed improperly formed Tuple: no key for \"PATH\""); } String pathStr = (String)params.get("PATH"); if(pathStr.indexOf("!") != -1) { pathStr = pathStr.indexOf("file:") != -1 ? pathStr.substring("file:".length()) : pathStr; Stream<String> stream = getJarEntryStream(pathStr); this.stream = BatchedCsvStream.batch( stream, BATCH_SIZE, DEFAULT_PARALLEL_MODE, HEADER_SIZE); }else{ File f = new File(pathStr); if(!f.exists()) { throw new IllegalArgumentException("Passed improperly formed Tuple: invalid PATH: " + params.get("PATH")); } try { Stream<String> stream = Files.lines(f.toPath(), Charset.forName("UTF-8")); this.stream = BatchedCsvStream.batch( stream, BATCH_SIZE, DEFAULT_PARALLEL_MODE, HEADER_SIZE); } catch(IOException e) { e.printStackTrace(); } } } /** * Factory method to allow creation through the {@link SensorFactory} in * the {@link Sensor#create(SensorFactory, SensorParams)} method of the * parent {@link Sensor} class. This indirection allows the decoration of * the returned {@code Sensor} type by wrapping it in an {@link HTMSensor} * (which is the current implementation but could be any wrapper). * * @param p the {@link SensorParams} which describe connection or source * data details. * @return the Sensor. */ public static Sensor<File> create(SensorParams p) { Sensor<File> fs = new FileSensor(p); return fs; } /** * Returns this {@code FileSensor}'s {@link SensorParams} * @return the SensorParams */ @Override public SensorParams getSensorParams() { return params; } /** * Returns the configured {@link MetaStream} if this is of * Type Stream, otherwise it throws an {@link UnsupportedOperationException} * * @return the MetaStream */ @SuppressWarnings("unchecked") @Override public <K> MetaStream<K> getInputStream() { return (MetaStream<K>)stream; } /** * Returns the values specifying meta information about the * underlying stream. */ public ValueList getMetaInfo() { return stream.getMeta(); } /** * Returns a {@link Stream} from a Jar entry * @param path * @return */ public static Stream<String> getJarEntryStream(String path) { Stream<String> retVal = null; String[] parts = path.split("\\!"); try { JarFile jar = new JarFile(parts[0]); String innerPath = parts[1]; innerPath = innerPath.startsWith("!") ? innerPath.substring(1) : innerPath; InputStream inStream = jar.getInputStream(jar.getEntry(innerPath)); BufferedReader br = new BufferedReader(new InputStreamReader(inStream)); retVal = br.lines().onClose(() -> { try { br.close(); jar.close(); } catch(Exception e) { e.printStackTrace(); }}); }catch(Exception e) { e.printStackTrace(); } return retVal; } public static void main(String[] args) { String filepart = System.getProperty("user.home") + "/git/htm.java/src/test/resources/pathtest.jar"; File f = new File(filepart); System.out.println("file exists ? " + f.exists()); String path = filepart + "!rec-center-hourly.csv"; Stream<String> stream = getJarEntryStream(path); stream.forEach(l -> System.out.println(l)); } }