/* * Copyright 2014, The OpenNMS Group * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opennms.newts.gsod; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.SECONDS; import static org.opennms.newts.gsod.FileObservable.fileTreeWalker; import static org.opennms.newts.gsod.FileObservable.lines; import static rx.exceptions.Exceptions.propagate; import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URI; import java.nio.file.Path; import java.text.ParseException; import java.util.List; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import org.apache.http.entity.ContentType; import org.apache.http.impl.nio.client.CloseableHttpAsyncClient; import org.apache.http.impl.nio.client.HttpAsyncClients; import org.apache.http.nio.client.methods.HttpAsyncMethods; import org.kohsuke.args4j.Argument; import org.kohsuke.args4j.CmdLineException; import org.kohsuke.args4j.CmdLineParser; import org.kohsuke.args4j.Option; import org.opennms.newts.api.MetricType; import org.opennms.newts.api.Sample; import org.opennms.newts.api.SampleRepository; import org.opennms.newts.api.Timestamp; import org.opennms.newts.reporter.metrics.NewtsReporter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import rx.Observable; import rx.Observable.Operator; import rx.Observer; import rx.Subscriber; import rx.Subscription; import rx.apache.http.ObservableHttp; import rx.apache.http.ObservableHttpResponse; import rx.exceptions.Exceptions; import rx.functions.Action0; import rx.functions.Func1; import rx.functions.Functions; import rx.schedulers.Schedulers; import com.codahale.metrics.ConsoleReporter; import com.codahale.metrics.Gauge; import com.codahale.metrics.Meter; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.Timer; import com.codahale.metrics.Timer.Context; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.google.inject.Guice; import com.google.inject.Injector; public class ImportRunner { private int m_samplesPerBatch = 1000; private File m_source; private String m_restUrl = null; private SampleRepository m_repository; private int m_threadCount = 1; private int m_maxThreadQueueSize = 0; private double m_timescaleFactor = 1.0; private long m_timeoffset = 0; private void checkArgument(boolean check, String failureMessage) { if (!check) throw new IllegalArgumentException(failureMessage); } @Option(name="-n", aliases="--samples-per-batch", metaVar="sample-count", usage="the maxinum number of samples to include in each post to the repository (default: 1000)") public void setSamplesPerBatch(int samplesPerBatch) { checkArgument(samplesPerBatch > 0, "samples per batch must be greater than zero!"); m_samplesPerBatch = samplesPerBatch; } @Option(name="-u", aliases="--url", metaVar="url", usage="publish data via a Newts REST server at the given url (default: use direct access via Newts API)") public void setURL(String url) { checkArgument(url != null && !url.isEmpty(), "the url must not be empty"); m_restUrl = url; } @Option(name="-p", aliases="--parallelism", metaVar="thread-count", usage="when using direct the size of the thread pool that posts the results. (defaults to 1 ie no parallelism)") public void setParallelism(int threadCount) { checkArgument(threadCount > 0, "thread count must be at least 1."); m_threadCount = threadCount; } @Option(name="-q", aliases="--max-work-queue-size", metaVar="batch-count", usage="when using direct the max size of the work-queue (defaults to thread-count * 3)") public void setMaxThreadQueueSize(int maxThreadQueueSize) { checkArgument(maxThreadQueueSize > 0, "max thread queue size must be at least 1."); m_maxThreadQueueSize = maxThreadQueueSize; } @Option(name="-f", aliases="--time-scale-factor", metaVar="long", usage="to scale down the date we compress time dividing time by this factor") public void setTimescaleFactor(double factor) { m_timescaleFactor = factor; } @Option(name="-o", aliases="--time-offset", metaVar="timestamp", usage="adjust epoch time in seconds to be <time-offset>. defaults to no offset. 'now' is allowed.") public void setTimeoffset(String offset) { if (offset.equals("now")) { m_timeoffset = System.currentTimeMillis(); } else { m_timeoffset = Long.valueOf(offset)*1000; } } @Argument(metaVar="sourceDir", required=true, usage="the source directory that contains gsod data to import. These must be gzip'd files") public void setSource(File source) { checkArgument(source.exists(), "the source directory "+source+" does not exist"); checkArgument(source.isDirectory(), "the source directory must be a directory"); m_source = source; } private static final Logger LOG = LoggerFactory.getLogger(ImportRunner.class); public static void main(String... args) throws Exception { new ImportRunner().execute(args); } public void execute(String... args) throws Exception { CmdLineParser parser = new CmdLineParser(this); try { parser.parseArgument(args); } catch (CmdLineException e) { // handling of wrong arguments System.err.println(e.getMessage()); parser.printUsage(System.err); return; } // Setup the slf4j metrics reporter MetricRegistry metrics = new MetricRegistry(); final long start = System.currentTimeMillis(); metrics.register("elapsed-seconds", new Gauge<Double>() { @Override public Double getValue() { return (System.currentTimeMillis() - start)/1000.0; } }); final ConsoleReporter reporter = ConsoleReporter.forRegistry(metrics) .outputTo(System.err) .convertRatesTo(SECONDS) .convertDurationsTo(MILLISECONDS) .build(); reporter.start(10, SECONDS); if (m_restUrl == null) { // we are using a direct importer so use a NewtsReporter for storing metrics NewtsReporter newtsReporter = NewtsReporter.forRegistry(metrics) .name("importer") .convertRatesTo(SECONDS) .convertDurationsTo(MILLISECONDS) .build(repository()); newtsReporter.start(1, SECONDS); } LOG.debug("Scanning {} for GSOD data files...", m_source); // walk the files in the directory given Observable<Sample> samples = fileTreeWalker(m_source.toPath()) .subscribeOn(Schedulers.io()) // set up a meter for each file processed .map(meter(metrics.meter("files"), Path.class)) // report file .map(reportFile()) // read all the files and convert them into lines .mergeMap(lines()) // excluding the header lines .filter(exclude("YEARMODA")) // turn each line into a list of samples .mergeMap(samples()) // adjust time on samples according to arguments .map(adjustTime()) // meter the samples .map(meter(metrics.meter("samples"), Sample.class)) ; Observable<List<Sample>> batches = samples // create batches each second or of size m_samplesPerBatch whichever comes first .buffer(m_samplesPerBatch) ; Observable<Boolean> doImport = m_restUrl != null ? restPoster(batches, metrics) : directPoster(batches, metrics); System.err.println("doImport = " + doImport); // GO!!! final AtomicReference<Subscription> subscription = new AtomicReference<>(); final AtomicBoolean failed = new AtomicBoolean(false); final CountDownLatch latch = new CountDownLatch(1); Subscription s = doImport.subscribe(new Observer<Boolean>() { @Override public void onCompleted() { System.err.println("Finished Importing Everything!"); reporter.report(); latch.countDown(); System.exit(0); } @Override public void onError(Throwable e) { failed.set(true); System.err.println("Error importing!"); e.printStackTrace(); try { //latch.await(); Subscription s = subscription.get(); if (s != null) s.unsubscribe(); } catch (Exception ex) { System.err.println("Failed to close httpClient!"); ex.printStackTrace(); } finally { //dumpThreads(); } } @Override public void onNext(Boolean t) { System.err.println("Received a boolen: " + t); } }); subscription.set(s); if (failed.get()) { s.unsubscribe(); } //latch.countDown(); System.err.println("Return from Subscribe!"); latch.await(); //dumpThreads(); } private Func1<? super Sample, ? extends Sample> adjustTime() { return new Func1<Sample, Sample>() { @Override public Sample call(Sample s) { Timestamp oldTs = s.getTimestamp(); Timestamp newTs = Timestamp.fromEpochMillis(m_timeoffset + Math.round(oldTs.asMillis()/m_timescaleFactor)); return new Sample(newTs, s.getResource(), s.getName(), s.getType(), s.getValue()); } }; } private SampleRepository repository() { if (m_repository == null) { Injector injector = Guice.createInjector(new Config()); m_repository = injector.getInstance(SampleRepository.class); } return m_repository; } private Observable<Boolean> directPoster(Observable<List<Sample>> samples, MetricRegistry metrics) { final SampleRepository repository = repository(); final Timer timer = metrics.timer("writes"); final Meter completions = metrics.meter("samples-completed"); Func1<List<Sample>, Boolean> insert = new Func1<List<Sample>, Boolean>() { @Override public Boolean call(List<Sample> s) { int sz = s.size(); try (Context timerCtx = timer.time()) { repository.insert(s); return true; } finally { completions.mark(sz); } } }; return (m_threadCount == 1 ? samples.map(insert) : parMap(samples, metrics, insert)).all(Functions.<Boolean>identity()); } private Observable<Boolean> parMap(Observable<List<Sample>> samples, MetricRegistry metrics, Func1<List<Sample>, Boolean> insert) { final Timer waitTime = metrics.timer("wait-time"); @SuppressWarnings("serial") final BlockingQueue<Runnable> workQueue = new LinkedBlockingQueue<Runnable>(m_maxThreadQueueSize == 0 ? m_threadCount * 3 : m_maxThreadQueueSize) { @Override public boolean offer(Runnable r) { try (Context time = waitTime.time()) { this.put(r); return true; } catch (InterruptedException e) { throw Exceptions.propagate(e); } } @Override public boolean add(Runnable r) { try (Context time = waitTime.time()) { this.put(r); return true; } catch (InterruptedException e) { throw Exceptions.propagate(e); } } }; final ThreadPoolExecutor executor = new ThreadPoolExecutor(m_threadCount, m_threadCount, 0L, TimeUnit.MILLISECONDS, workQueue); metrics.register("active-threads", new Gauge<Integer>() { @Override public Integer getValue() { return executor.getActiveCount(); } }); metrics.register("pool-size", new Gauge<Integer>() { @Override public Integer getValue() { return executor.getPoolSize(); } }); metrics.register("largest-pool-size", new Gauge<Integer>() { @Override public Integer getValue() { return executor.getLargestPoolSize(); } }); metrics.register("work-queue-size", new Gauge<Integer>() { @Override public Integer getValue() { return workQueue.size(); } }); return parMap(samples, executor, metrics, insert); } private Observable<Boolean> parMap(Observable<List<Sample>> samples, ExecutorService executorSvc, final MetricRegistry metrics, final Func1<List<Sample>, Boolean> insert) { final ListeningExecutorService executor = MoreExecutors.listeningDecorator(executorSvc); return samples.lift(new Operator<ListenableFuture<Boolean>, List<Sample>>() { @Override public Subscriber<? super List<Sample>> call(final Subscriber<? super ListenableFuture<Boolean>> s) { return new Subscriber<List<Sample>>() { @Override public void onCompleted() { if (!s.isUnsubscribed()) { s.onCompleted(); } executor.shutdown(); } @Override public void onError(Throwable e) { if (!s.isUnsubscribed()) { s.onError(e); } } @Override public void onNext(final List<Sample> t) { if (!s.isUnsubscribed()) { try { ListenableFuture<Boolean> f = executor.submit(new Callable<Boolean>() { @Override public Boolean call() throws Exception { return insert.call(t); } }); s.onNext(f); } catch (Throwable ex) { onError(ex); } } } }; } }) .observeOn(Schedulers.io()) .map(new Func1<ListenableFuture<Boolean>, Boolean>() { @Override public Boolean call(ListenableFuture<Boolean> f) { try { return f.get(); } catch (Throwable e) { throw Exceptions.propagate(e); } } }); } private Observable<Boolean> restPoster(Observable<List<Sample>> samples, MetricRegistry metrics) { final CloseableHttpAsyncClient httpClient = HttpAsyncClients.createDefault(); httpClient.start(); return samples // turn each batch into json .map(toJSON()) // meter them as the go into the post code .map(meter(metrics.meter("posts"), String.class)) // post the json to the REST server .mergeMap(postJSON(m_restUrl, httpClient)) // meter the responses .map(meter(metrics.meter("responses"), ObservableHttpResponse.class)) // count sample completions .map(meter(metrics.meter("samples-completed"), m_samplesPerBatch, ObservableHttpResponse.class)) // make sure every request has a successful return code .all(successful()) .doOnCompleted(new Action0() { @Override public void call() { try { httpClient.close(); } catch (IOException e) { System.err.println("Failed to close httpClient!"); e.printStackTrace(); } } }); } private static Func1<? super Path, ? extends Path> reportFile() { return new Func1<Path, Path>() { @Override public Path call(Path file) { System.err.println("Begin Processing: " + file); return file; } }; } public static Func1<String, Observable<Sample>> samples() { final LineParser parser = new LineParser(); return new Func1<String, Observable<Sample>>() { @Override public Observable<Sample> call(String line) { try { return Observable.from(parser.parseLine(line)); } catch (ParseException e) { throw propagate(e); } } }; } private static boolean isNaN(Sample sample) { return (sample.getType() == MetricType.GAUGE) && Double.isNaN(sample.getValue().doubleValue()); } public static Func1<List<Sample>, String> toJSON() { return new Func1<List<Sample>, String>() { @Override public String call(List<Sample> samples) { JSONBuilder bldr = new JSONBuilder(); for(Sample sample : samples) { if (isNaN(sample)) continue; //System.err.println("Importing: " + sample); bldr.newObject(); bldr.attr("timestamp", sample.getTimestamp().asMillis()); bldr.attr("resource", sample.getResource().getId()); bldr.attr("name", sample.getName()); bldr.attr("type", sample.getType().name()); if (sample.getType() == MetricType.GAUGE) { bldr.attr("value", sample.getValue().doubleValue()); } else { bldr.attr("value", sample.getValue().longValue()); } } return bldr.toString(); } }; } private static Func1<ObservableHttpResponse, Boolean> successful() { return new Func1<ObservableHttpResponse, Boolean>() { @Override public Boolean call(ObservableHttpResponse response) { if (response.getResponse().getStatusLine().getStatusCode() >= 400) { throw new RuntimeException("Failed to post samples: " + response.getResponse().getStatusLine()); } return true; } }; } public static Func1<String, Observable<ObservableHttpResponse>> postJSON(final String baseURL, final CloseableHttpAsyncClient httpClient) { final URI baseURI = URI.create(baseURL); return new Func1<String, Observable<ObservableHttpResponse>>() { @Override public Observable<ObservableHttpResponse> call(String json) { try { return ObservableHttp.createRequest(HttpAsyncMethods.createPost(baseURI, json, ContentType.APPLICATION_JSON), httpClient).toObservable(); } catch (UnsupportedEncodingException e) { throw Exceptions.propagate(e); } } }; } public static Func1<String, Boolean> exclude(final String pattern) { return new Func1<String, Boolean>() { @Override public Boolean call(String s) { return !s.contains(pattern); } }; } public static <T> Func1<T, T> meter(final Meter meter, Class<T> clazz) { return meter(meter, 1, clazz); } public static <T> Func1<T, T> meter(final Meter meter, final int count, Class<T> clazz) { return new Func1<T, T>() { @Override public T call(T t) { meter.mark(count); return t; } }; } }