/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.source.extractor.extract.google; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.api.client.auth.oauth2.Credential; import com.google.api.client.repackaged.com.google.common.base.Preconditions; import com.google.api.services.drive.Drive; import com.google.common.io.Closer; import static gobblin.configuration.ConfigurationKeys.*; import static gobblin.source.extractor.extract.google.GoogleCommonKeys.*; import gobblin.configuration.SourceState; import gobblin.configuration.State; import gobblin.configuration.WorkUnitState; import gobblin.source.extractor.Extractor; import gobblin.source.extractor.filebased.FileBasedHelperException; import gobblin.source.extractor.filebased.FileBasedSource; /** * Source for Google drive using GoogleDriveFsHelper. * @param <S> * @param <D> */ public class GoogleDriveSource<S, D> extends FileBasedSource<S, D> { private static final Logger LOG = LoggerFactory.getLogger(GoogleDriveSource.class); public static final String GOOGLE_DRIVE_PREFIX = GOOGLE_SOURCE_PREFIX + "drive."; public static final String BUFFER_BYTE_SIZE = "buffer_byte_size"; private final Closer closer = Closer.create(); /** * As Google Drive extractor needs file system helper, it invokes to initialize file system helper. * {@inheritDoc} * @see gobblin.source.Source#getExtractor(gobblin.configuration.WorkUnitState) */ @Override public Extractor<S, D> getExtractor(WorkUnitState state) throws IOException { Preconditions.checkNotNull(state, "WorkUnitState should not be null"); LOG.info("WorkUnitState from getExtractor: " + state); try { //GoogleDriveExtractor needs GoogleDriveFsHelper initFileSystemHelper(state); } catch (FileBasedHelperException e) { throw new IOException(e); } Preconditions.checkNotNull(fsHelper, "File system helper should not be null"); return new GoogleDriveExtractor<>(state, fsHelper); } /** * Initialize file system helper at most once for this instance. * {@inheritDoc} * @see gobblin.source.extractor.filebased.FileBasedSource#initFileSystemHelper(gobblin.configuration.State) */ @Override public synchronized void initFileSystemHelper(State state) throws FileBasedHelperException { if (fsHelper == null) { Credential credential = new GoogleCommon.CredentialBuilder(state.getProp(SOURCE_CONN_PRIVATE_KEY), state.getPropAsList(API_SCOPES)) .fileSystemUri(state.getProp(PRIVATE_KEY_FILESYSTEM_URI)) .proxyUrl(state.getProp(SOURCE_CONN_USE_PROXY_URL)) .port(state.getProp(SOURCE_CONN_USE_PROXY_PORT)) .serviceAccountId(state.getProp(SOURCE_CONN_USERNAME)) .build(); Drive driveClient = new Drive.Builder(credential.getTransport(), GoogleCommon.getJsonFactory(), credential) .setApplicationName(Preconditions.checkNotNull(state.getProp(APPLICATION_NAME), "ApplicationName is required")) .build(); this.fsHelper = closer.register(new GoogleDriveFsHelper(state, driveClient)); } } /** * Provide list of files snapshot where snap shot is consist of list of file ID with modified time. * Folder ID and file ID are all optional where missing folder id represent search from root folder where * missing file ID represents all files will be included on current and subfolder. * * {@inheritDoc} * @see gobblin.source.extractor.filebased.FileBasedSource#getcurrentFsSnapshot(gobblin.configuration.State) */ @Override public List<String> getcurrentFsSnapshot(State state) { List<String> results = new ArrayList<>(); String folderId = state.getProp(SOURCE_FILEBASED_DATA_DIRECTORY, ""); try { LOG.info("Running ls with folderId: " + folderId); List<String> fileIds = this.fsHelper.ls(folderId); for (String fileId : fileIds) { results.add(fileId + splitPattern + this.fsHelper.getFileMTime(fileId)); } } catch (FileBasedHelperException e) { throw new RuntimeException("Failed to retrieve list of file IDs for folderID: " + folderId, e); } return results; } @Override public void shutdown(SourceState state) { try { closer.close(); } catch (IOException e) { throw new RuntimeException(e); } } }