/*
* Apache License
* Version 2.0, January 2004
* http://www.apache.org/licenses/
*
* Copyright 2013 Aurelian Tutuianu
* Copyright 2014 Aurelian Tutuianu
* Copyright 2015 Aurelian Tutuianu
* Copyright 2016 Aurelian Tutuianu
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package rapaio.io.json.stream;
import rapaio.io.json.tree.JsonValue;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.Spliterator;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
/**
* JsonSpliterator
* Created by <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a> on 3/6/15.
*/
public class JsonSpliterator implements Spliterator<JsonValue> {
private static final Logger logger = Logger.getLogger(JsonSpliterator.class.getName());
private LinkedList<File> files;
private final Consumer<String> messageHandler;
private final boolean parallel;
private final Predicate<String> propFilter;
private long estimateSize = Long.MAX_VALUE;
private JsonInput input;
public JsonSpliterator(List<File> files, Consumer<String> messageHandler, Predicate<String> propFilter) {
this.files = new LinkedList<>(files);
this.parallel = files.size() > 1;
this.messageHandler = messageHandler;
this.propFilter = propFilter;
estimateSize = files.stream().mapToLong(File::length).sum();
}
public boolean isParallel() {
return parallel;
}
private JsonValue parseStream() throws IOException {
if (input != null) {
JsonValue js;
try {
js = input.read();
} catch (IOException eof) {
js = null;
}
if (js != null)
return js;
input.close();
if (files.isEmpty())
return null;
messageHandler.accept("parsing (next): " + files.getFirst().getName());
estimateSize = files.stream().mapToLong(File::length).sum();
input = buildInput(files.pollFirst());
return parseStream();
} else {
if (files.isEmpty()) {
return null;
}
messageHandler.accept("parsing (head): " + files.getFirst().getName());
estimateSize = files.stream().mapToLong(File::length).sum();
input = buildInput(files.pollFirst());
return parseStream();
}
}
private JsonInput buildInput(File file) throws IOException {
if (file.getName().endsWith(".lzjson"))
return new LzJsonInput(new BufferedInputStream(new GZIPInputStream(new FileInputStream(file))), propFilter);
return new JsonInputFlat(file);
}
@Override
public boolean tryAdvance(Consumer<? super JsonValue> action) {
try {
JsonValue value = parseStream();
if (value == null)
return false;
action.accept(value);
} catch (IOException ex) {
logger.log(Level.SEVERE, "error at try advance", ex);
return false;
}
return true;
}
@Override
public Spliterator<JsonValue> trySplit() {
if (files.size() > 1) {
int len = files.size() / 2;
LinkedList<File> splitFiles = new LinkedList<>(files.subList(files.size() - len, files.size()));
files = new LinkedList<>(files.subList(0, files.size() - len));
return new JsonSpliterator(splitFiles, messageHandler, propFilter);
}
return null;
}
@Override
public void forEachRemaining(Consumer<? super JsonValue> action) {
while (true) {
try {
JsonValue value = parseStream();
if (value == null)
return;
action.accept(value);
} catch (IOException e) {
logger.log(Level.SEVERE, "error at forEachRemaining", e);
return;
}
}
}
@Override
public long estimateSize() {
return estimateSize;
}
@Override
public int characteristics() {
return SIZED & SUBSIZED & IMMUTABLE;
}
}