/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.beam.runners.flink.streaming; import com.google.api.services.bigquery.model.TableRow; import com.google.common.base.Joiner; import java.io.Serializable; import java.util.Arrays; import org.apache.beam.runners.flink.FlinkTestPipeline; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.transforms.Count; import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.windowing.Sessions; import org.apache.beam.sdk.transforms.windowing.Window; import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.PCollection; import org.apache.flink.streaming.util.StreamingProgramTestBase; import org.joda.time.Duration; import org.joda.time.Instant; /** * Session window test. */ public class TopWikipediaSessionsITCase extends StreamingProgramTestBase implements Serializable { protected String resultPath; public TopWikipediaSessionsITCase(){ } static final String[] EXPECTED_RESULT = new String[] { "user: user1 value:3", "user: user1 value:1", "user: user2 value:4", "user: user2 value:6", "user: user3 value:7", "user: user3 value:2" }; @Override protected void preSubmit() throws Exception { resultPath = getTempDirPath("result"); } @Override protected void postSubmit() throws Exception { compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath); } @Override protected void testProgram() throws Exception { Pipeline p = FlinkTestPipeline.createForStreaming(); Long now = (System.currentTimeMillis() + 10000) / 1000; PCollection<KV<String, Long>> output = p.apply(Create.of(Arrays.asList(new TableRow().set("timestamp", now).set ("contributor_username", "user1"), new TableRow().set("timestamp", now + 10).set ("contributor_username", "user3"), new TableRow().set("timestamp", now).set ("contributor_username", "user2"), new TableRow().set("timestamp", now).set ("contributor_username", "user1"), new TableRow().set("timestamp", now + 2).set ("contributor_username", "user1"), new TableRow().set("timestamp", now).set ("contributor_username", "user2"), new TableRow().set("timestamp", now + 1).set ("contributor_username", "user2"), new TableRow().set("timestamp", now + 5).set ("contributor_username", "user2"), new TableRow().set("timestamp", now + 7).set ("contributor_username", "user2"), new TableRow().set("timestamp", now + 8).set ("contributor_username", "user2"), new TableRow().set("timestamp", now + 200).set ("contributor_username", "user2"), new TableRow().set("timestamp", now + 230).set ("contributor_username", "user1"), new TableRow().set("timestamp", now + 230).set ("contributor_username", "user2"), new TableRow().set("timestamp", now + 240).set ("contributor_username", "user2"), new TableRow().set("timestamp", now + 245).set ("contributor_username", "user3"), new TableRow().set("timestamp", now + 235).set ("contributor_username", "user3"), new TableRow().set("timestamp", now + 236).set ("contributor_username", "user3"), new TableRow().set("timestamp", now + 237).set ("contributor_username", "user3"), new TableRow().set("timestamp", now + 238).set ("contributor_username", "user3"), new TableRow().set("timestamp", now + 239).set ("contributor_username", "user3"), new TableRow().set("timestamp", now + 240).set ("contributor_username", "user3"), new TableRow().set("timestamp", now + 241).set ("contributor_username", "user2"), new TableRow().set("timestamp", now) .set("contributor_username", "user3")))) .apply(ParDo.of(new DoFn<TableRow, String>() { @ProcessElement public void processElement(ProcessContext c) throws Exception { TableRow row = c.element(); long timestamp = (Integer) row.get("timestamp"); String userName = (String) row.get("contributor_username"); if (userName != null) { // Sets the timestamp field to be used in windowing. c.outputWithTimestamp(userName, new Instant(timestamp * 1000L)); } } })) .apply(Window.<String>into(Sessions.withGapDuration(Duration.standardMinutes(1)))) .apply(Count.<String>perElement()); PCollection<String> format = output.apply(ParDo.of(new DoFn<KV<String, Long>, String>() { @ProcessElement public void processElement(ProcessContext c) throws Exception { KV<String, Long> el = c.element(); String out = "user: " + el.getKey() + " value:" + el.getValue(); c.output(out); } })); format.apply(TextIO.write().to(resultPath)); p.run(); } }