/** * Licensed to Cloudera, Inc. under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Cloudera, Inc. licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.flume.handlers.syslog; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.EOFException; import java.io.IOException; import org.junit.Test; import com.cloudera.flume.core.Event; import com.cloudera.flume.handlers.debug.NoNlSynthSource; import com.cloudera.flume.handlers.text.EventExtractException; import com.cloudera.util.Benchmark; /** * This demonstrates the rate that these different extractors work at. * * test on the same machine" * * Old method using regex: 1M messages, 100 bytes each, 8.4s => 11,9 MB/s (which * is close to SyslogTcp socket throughput limit). (Apparently there were bugs * in the regex) * * 500MB / 41.0 s => `12.2 MB/s * * New method using custom parser removing time syscalls: 1M message, 100 bytes, * each, 5x * * 500MB / 15.3 => 32.7 MB/s */ public class PerfSyslogWireExtract { /** * Generates a dataset, puts it into a memory buffer, and the uses the * DataInputStream machinery to read through it 100 bytes at a time. * * 1M x 100 bytes, 5 times */ @Test public void testNewExtractScan100() throws IOException, EventExtractException { Benchmark b = new Benchmark("new extract - scan 100 blocks"); b.mark("build dataset"); ByteArrayOutputStream out = new ByteArrayOutputStream(); // 1M x 100 byte messages, 0 is the rand seed NoNlSynthSource src = new NoNlSynthSource(1000000, 100, 0); src.open(); Event e = null; while ((e = src.next()) != null) { out.write("<33>".getBytes()); out.write(e.getBody()); out.write('\n'); } b.mark("start parsing dataset"); int good = 0; int bad = 0; int lines = 0; // We do this test 100 times! for (int i = 0; i < 5; i++) { DataInputStream in = new DataInputStream(new ByteArrayInputStream(out .toByteArray())); lines++; try { byte[] data = new byte[100]; while (true) in.readFully(data); } catch (EOFException eof) { // expected. } } b.mark("complete-good-bad", good, bad, lines); b.done(); } /** * Generates a dataset, puts it into a memory buffer, and the uses the * DataInputStream machinery to read through it 1000 bytes at a time. * * 1M x 100 bytes, 5 times */ @Test public void testNewExtractScan1000() throws IOException, EventExtractException { Benchmark b = new Benchmark("new extract - scan 1000 blocks"); b.mark("build dataset"); ByteArrayOutputStream out = new ByteArrayOutputStream(); // 1M x 100 byte messages, 0 is the rand seed NoNlSynthSource src = new NoNlSynthSource(1000000, 100, 0); src.open(); Event e = null; while ((e = src.next()) != null) { out.write("<33>".getBytes()); out.write(e.getBody()); out.write('\n'); } b.mark("start parsing dataset"); int good = 0; int bad = 0; int lines = 0; // We do this test 100 times! for (int i = 0; i < 5; i++) { DataInputStream in = new DataInputStream(new ByteArrayInputStream(out .toByteArray())); try { byte[] data = new byte[1000]; while (true) { lines++; in.readFully(data); } } catch (EOFException eof) { // expected. } } b.mark("complete-good-bad", good, bad, lines); b.done(); } /** * Generates a dataset, puts it into a memory buffer, and the uses the * DataInputStream machinery to read through it one byte at a time. * * 1M x 100 bytes, 5 times */ @Test public void testNewExtractScan() throws IOException, EventExtractException { Benchmark b = new Benchmark("new extract - scan single byte"); b.mark("build dataset"); ByteArrayOutputStream out = new ByteArrayOutputStream(); // 1M x 100 byte messages, 0 is the rand seed NoNlSynthSource src = new NoNlSynthSource(1000000, 100, 0); src.open(); Event e = null; while ((e = src.next()) != null) { out.write("<33>".getBytes()); out.write(e.getBody()); out.write('\n'); } b.mark("start parsing dataset"); int good = 0; int bad = 0; int lines = 0; // We do this test 100 times! for (int i = 0; i < 5; i++) { DataInputStream in = new DataInputStream(new ByteArrayInputStream(out .toByteArray())); try { while (true) { lines++; in.readByte(); } } catch (EOFException eof) { // expected. } } b.mark("complete-good-bad", good, bad, lines); b.done(); } /** * Generates a dataset, puts it into a memory buffer, and the uses the * DataInputStream machinery to read through it one parsed record at a time. */ @Test public void testNewExtract() throws IOException, EventExtractException { Benchmark b = new Benchmark("regex extract"); b.mark("build dataset"); ByteArrayOutputStream out = new ByteArrayOutputStream(); // 1M x 100 byte messages, 0 is the rand seed NoNlSynthSource src = new NoNlSynthSource(1000000, 100, 0); src.open(); Event e = null; while ((e = src.next()) != null) { out.write("<33>".getBytes()); out.write(e.getBody()); out.write('\n'); } byte[] outbytes = out.toByteArray(); System.out.println("Outbytes length : " + outbytes.length); b.mark("start parsing dataset"); int good = 0; int bad = 0; int lines = 0; // We do this test 50 times! for (int i = 0; i < 5; i++) { DataInputStream in = new DataInputStream(new ByteArrayInputStream( outbytes)); Event evt = null; while (true) { try { lines++; evt = SyslogWireExtractor.extractEvent(in); if (evt == null) break; good++; } catch (Exception eee) { bad++; } } } b.mark("complete-good-bad", good, bad, lines); b.done(); } /** * A harness so that the profiler can attach to the process. */ static public void main(String[] argv) throws IOException, EventExtractException { // A harness for the profiler. new PerfSyslogWireExtract().testNewExtract(); // new PerfNewSyslogWireExtract().testOldExtract(); // new PerfNewSyslogWireExtract().testNewBlockExtract(); } }