/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.streams.hdfs.test;
import org.apache.streams.core.StreamsDatum;
import org.apache.streams.core.StreamsResultSet;
import org.apache.streams.hdfs.HdfsConfiguration;
import org.apache.streams.hdfs.HdfsReaderConfiguration;
import org.apache.streams.hdfs.HdfsWriterConfiguration;
import org.apache.streams.hdfs.WebHdfsPersistReader;
import org.apache.streams.hdfs.WebHdfsPersistWriter;
import org.apache.streams.jackson.StreamsJacksonMapper;
import org.apache.streams.pojo.json.Activity;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.io.IOUtils;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
/**
* Test reading and writing documents
*/
public class TestHdfsPersist {
private static final Logger LOGGER = LoggerFactory.getLogger(TestHdfsPersist.class);
private ObjectMapper MAPPER = StreamsJacksonMapper.getInstance();
@Before
public void setup() {
File file = new File("/target/TestHdfsPersist/");
if (file.exists()) {
file.delete();
}
}
@Test
public void TestHdfsPersist() throws Exception {
List<List<String>> fieldArrays = new ArrayList<>();
fieldArrays.add(new ArrayList<>());
fieldArrays.add(Collections.singletonList("ID"));
fieldArrays.add(Arrays.asList("ID", "DOC"));
fieldArrays.add(Arrays.asList("ID", "TS", "DOC"));
fieldArrays.add(Arrays.asList("ID", "TS", "META", "DOC"));
for (List<String> fields : fieldArrays) {
TestHdfsPersistCase(fields);
}
}
public void TestHdfsPersistCase(List<String> fields) throws Exception {
HdfsConfiguration hdfsConfiguration = new HdfsConfiguration().withScheme(HdfsConfiguration.Scheme.FILE).withHost("localhost").withUser("cloudera").withPath("target/TestHdfsPersist");
hdfsConfiguration.setFields(fields);
HdfsWriterConfiguration hdfsWriterConfiguration = MAPPER.convertValue(hdfsConfiguration, HdfsWriterConfiguration.class);
if (fields.size() % 2 == 1) {
hdfsWriterConfiguration.setCompression(HdfsWriterConfiguration.Compression.GZIP);
}
hdfsWriterConfiguration.setWriterFilePrefix("activities");
hdfsWriterConfiguration.setWriterPath(Integer.toString(fields.size()));
WebHdfsPersistWriter writer = new WebHdfsPersistWriter(hdfsWriterConfiguration);
writer.prepare(null);
InputStream testActivityFolderStream = TestHdfsPersist.class.getClassLoader()
.getResourceAsStream("activities");
List<String> files = IOUtils.readLines(testActivityFolderStream, StandardCharsets.UTF_8);
int count = 0;
for (String file : files) {
LOGGER.info("File: " + file);
InputStream testActivityFileStream = TestHdfsPersist.class.getClassLoader()
.getResourceAsStream("activities/" + file);
Activity activity = MAPPER.readValue(testActivityFileStream, Activity.class);
activity.getAdditionalProperties().remove("$license");
StreamsDatum datum = new StreamsDatum(activity, activity.getVerb());
writer.write(datum);
LOGGER.info("Wrote: " + activity.getVerb());
count++;
}
writer.cleanUp();
HdfsReaderConfiguration hdfsReaderConfiguration = MAPPER.convertValue(hdfsConfiguration, HdfsReaderConfiguration.class);
WebHdfsPersistReader reader = new WebHdfsPersistReader(hdfsReaderConfiguration);
hdfsReaderConfiguration.setReaderPath(Integer.toString(fields.size()));
reader.prepare(null);
StreamsResultSet resultSet = reader.readAll();
Assert.assertEquals(resultSet.size(), count);
}
}