/*
* Copyright (C) 2014 Jörg Prante
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.xbib.io.archive.esbulk;
import org.xbib.io.archive.ArchiveInputStream;
import org.xbib.io.archive.ArchiveUtils;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class EsBulkArchiveInputStream extends ArchiveInputStream<EsBulkArchiveEntry> {
private final static Pattern indexPattern = Pattern.compile("\"_index\"\\s*:\\s*\"(.*?)\"");
private final static Pattern typePattern = Pattern.compile("\"_type\"\\s*:\\s*\"(.*?)\"");
private final static Pattern idPattern = Pattern.compile("\"_id\"\\s*:\\s*\"(.*?)\"");
private final BufferedReader reader;
private ByteArrayInputStream in;
public EsBulkArchiveInputStream(InputStream in) throws UnsupportedEncodingException {
this.reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
}
@Override
public EsBulkArchiveEntry getNextEntry() throws IOException {
String meta = reader.readLine();
if (meta == null) {
return null;
}
String data = reader.readLine();
if (data == null) {
return null;
}
EsBulkArchiveEntry entry = new EsBulkArchiveEntry();
StringBuilder sb = new StringBuilder();
Matcher m = indexPattern.matcher(meta);
if (m.find()) {
sb.append(ArchiveUtils.encode(m.group(1), Charset.forName("UTF-8")));
} else {
throw new IOException("no _index found");
}
m = typePattern.matcher(meta);
if (m.find()) {
sb.append('/').append(ArchiveUtils.encode(m.group(1), Charset.forName("UTF-8")));
} else {
throw new IOException("no _type found");
}
m = idPattern.matcher(meta);
if (m.find()) {
sb.append('/').append(ArchiveUtils.encode(m.group(1), Charset.forName("UTF-8")));
} else {
throw new IOException("no _id found");
}
entry.setName(sb.toString());
byte[] b = data.getBytes("UTF-8");
entry.setEntrySize(b.length);
this.in = new ByteArrayInputStream(b);
return entry;
}
@Override
public int read(byte[] buffer, int start, int length) throws IOException {
return in.read(buffer, start, length);
}
@Override
public void close() throws IOException {
reader.close();
}
@Override
public long skip(long value) throws IOException {
throw new UnsupportedOperationException();
}
}