/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.hadoop.util; import org.apache.hadoop.fs.Path; import javax.xml.bind.annotation.XmlTransient; import javax.xml.bind.annotation.XmlType; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; /** * A simple POJO for maintaining state about the last HDFS Listing that was performed so that * we can avoid pulling the same file multiple times */ @XmlType(name = "listing") public class HDFSListing { private Date latestTimestamp; private Collection<String> matchingPaths; public static class StateKeys { public static final String TIMESTAMP = "timestamp"; public static final String PATH_PREFIX = "path."; } /** * @return the modification date of the newest file that was contained in the HDFS Listing */ public Date getLatestTimestamp() { return latestTimestamp; } /** * Sets the timestamp of the modification date of the newest file that was contained in the HDFS Listing * * @param latestTimestamp the timestamp of the modification date of the newest file that was contained in the HDFS Listing */ public void setLatestTimestamp(Date latestTimestamp) { this.latestTimestamp = latestTimestamp; } /** * @return a Collection containing the paths of all files in the HDFS Listing whose Modification date * was equal to {@link #getLatestTimestamp()} */ @XmlTransient public Collection<String> getMatchingPaths() { return matchingPaths; } /** * @return a Collection of {@link Path} objects equivalent to those returned by {@link #getMatchingPaths()} */ public Set<Path> toPaths() { final Set<Path> paths = new HashSet<>(matchingPaths.size()); for ( final String pathname : matchingPaths ) { paths.add(new Path(pathname)); } return paths; } /** * Sets the Collection containing the paths of all files in the HDFS Listing whose Modification Date was * equal to {@link #getLatestTimestamp()} * @param matchingPaths the paths that have last modified date matching the latest timestamp */ public void setMatchingPaths(Collection<String> matchingPaths) { this.matchingPaths = matchingPaths; } /** * Converts this HDFSListing into a Map<String, String> so that it can be stored in a StateManager. * * @return a Map that represents the same information as this HDFSListing */ public Map<String, String> toMap() { final Map<String, String> map = new HashMap<>(1 + matchingPaths.size()); map.put(StateKeys.TIMESTAMP, String.valueOf(latestTimestamp.getTime())); int counter = 0; for (final String path : matchingPaths) { map.put(StateKeys.PATH_PREFIX + String.valueOf(counter++), path); } return map; } public static HDFSListing fromMap(final Map<String, String> map) { if (map == null || map.isEmpty()) { return null; } final String timestampValue = map.get(StateKeys.TIMESTAMP); final long timestamp = Long.parseLong(timestampValue); final Collection<String> matchingPaths = new ArrayList<>(map.size() - 1); for (final Map.Entry<String, String> entry : map.entrySet()) { if (entry.getKey().startsWith(StateKeys.PATH_PREFIX)) { matchingPaths.add(entry.getValue()); } } final HDFSListing listing = new HDFSListing(); listing.setLatestTimestamp(new Date(timestamp)); listing.setMatchingPaths(matchingPaths); return listing; } }