/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.googleanalytics; import java.io.IOException; import java.util.List; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.row.value.ValueMetaFactory; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; import com.google.api.client.auth.oauth2.TokenResponseException; import com.google.api.services.analytics.Analytics; public class GaInputStep extends BaseStep implements StepInterface { private static Class<?> PKG = GaInputStepMeta.class; // for i18n purposes private GaInputStepData data; private GaInputStepMeta meta; private Analytics analytics; private String accountName; public GaInputStep( StepMeta s, StepDataInterface stepDataInterface, int c, TransMeta t, Trans dis ) { super( s, stepDataInterface, c, t, dis ); } @Override public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { meta = (GaInputStepMeta) smi; data = (GaInputStepData) sdi; if ( first ) { first = false; data.outputRowMeta = new RowMeta(); meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore ); // stores the indices where to look for the key fields in the input rows data.conversionMeta = new ValueMetaInterface[ meta.getFieldsCount() ]; for ( int i = 0; i < meta.getFieldsCount(); i++ ) { // get output and from-string conversion format for each field ValueMetaInterface returnMeta = data.outputRowMeta.getValueMeta( i ); ValueMetaInterface conversionMeta; conversionMeta = ValueMetaFactory.cloneValueMeta( returnMeta, ValueMetaInterface.TYPE_STRING ); conversionMeta.setConversionMask( meta.getConversionMask()[ i ] ); conversionMeta.setDecimalSymbol( "." ); // google analytics is en-US conversionMeta.setGroupingSymbol( null ); // google analytics uses no grouping symbol data.conversionMeta[ i ] = conversionMeta; } } // generate output row, make it correct size Object[] outputRow = RowDataUtil.allocateRowData( data.outputRowMeta.size() ); List<String> entry = getNextDataEntry(); if ( entry != null && ( meta.getRowLimit() <= 0 || getLinesWritten() < meta.getRowLimit() ) ) { // another record to // fill the output fields with look up data for ( int i = 0, j = 0; i < meta.getFieldsCount(); i++ ) { String fieldName = environmentSubstitute( meta.getFeedField()[ i ] ); Object dataObject; String type = environmentSubstitute( meta.getFeedFieldType()[ i ] ); // We handle fields differently depending on whether its a Dimension/Metric, Data Source Property, or // Data Source Field. Also the API doesn't exactly match the concepts anymore (see individual comments below), // so there is quite a bit of special processing. if ( GaInputStepMeta.FIELD_TYPE_DATA_SOURCE_PROPERTY.equals( type ) ) { // Account name has to be handled differently, it's in the Accounts API not Profiles API if ( GaInputStepMeta.PROPERTY_DATA_SOURCE_ACCOUNT_NAME.equals( fieldName ) ) { // We expect a single account name, and already fetched it during init dataObject = accountName; } else { dataObject = data.feed.getProfileInfo().get( removeClassifier( fieldName ) ); } } else if ( GaInputStepMeta.FIELD_TYPE_DATA_SOURCE_FIELD.equals( type ) ) { // Get tableId or tableName if ( GaInputStepMeta.FIELD_DATA_SOURCE_TABLE_ID.equals( fieldName ) ) { dataObject = data.feed.getProfileInfo().get( removeClassifier( fieldName ) ); } else { // We only have two Data Source Fields and they're hard-coded, so we handle tableName in this else-clause // since tableId was done in the if-clause. We have to handle the two differently because tableName is // actually the profile name in this version (v3) of the Google Analytics API. dataObject = data.feed.getProfileInfo().getProfileName(); } } else if ( GaInputStepMeta.DEPRECATED_FIELD_TYPE_CONFIDENCE_INTERVAL.equals( type ) ) { dataObject = null; if ( log.isRowLevel() ) { logRowlevel( BaseMessages.getString( PKG, "GoogleAnalytics.Warn.FieldTypeNotSupported", GaInputStepMeta.DEPRECATED_FIELD_TYPE_CONFIDENCE_INTERVAL ) ); } } else { // Assume it's a Dimension or Metric, we've covered the rest of the cases above. dataObject = entry.get( j++ ); } outputRow[ i ] = data.outputRowMeta.getValueMeta( i ).convertData( data.conversionMeta[ i ], dataObject ); } // copy row to possible alternate rowset(s) putRow( data.outputRowMeta, outputRow ); // Some basic logging if ( checkFeedback( getLinesWritten() ) ) { if ( log.isBasic() ) { logBasic( "Linenr " + getLinesWritten() ); } } return true; } else { setOutputDone(); return false; } } protected Analytics.Data.Ga.Get getQuery( Analytics analytics ) { Analytics.Data dataApi = analytics.data(); Analytics.Data.Ga.Get query; try { String metrics = environmentSubstitute( meta.getMetrics() ); if ( Utils.isEmpty( metrics ) ) { logError( BaseMessages.getString( PKG, "GoogleAnalytics.Error.NoMetricsSpecified.Message" ) ); return null; } query = dataApi.ga().get( meta.isUseCustomTableId() ? environmentSubstitute( meta.getGaCustomTableId() ) : meta.getGaProfileTableId(), //ids environmentSubstitute( meta.getStartDate() ), // start date environmentSubstitute( meta.getEndDate() ), // end date metrics // metrics ); String dimensions = environmentSubstitute( meta.getDimensions() ); if ( !Utils.isEmpty( dimensions ) ) { query.setDimensions( dimensions ); } if ( meta.isUseSegment() ) { if ( meta.isUseCustomSegment() ) { query.setSegment( environmentSubstitute( meta.getCustomSegment() ) ); } else { query.setSegment( meta.getSegmentId() ); } } if ( !Utils.isEmpty( meta.getSamplingLevel() ) ) { query.setSamplingLevel( environmentSubstitute( meta.getSamplingLevel() ) ); } if ( !Utils.isEmpty( meta.getFilters() ) && !Utils.isEmpty( environmentSubstitute( meta.getFilters() ) ) ) { query.setFilters( environmentSubstitute( meta.getFilters() ) ); } if ( !Utils.isEmpty( meta.getSort() ) ) { query.setSort( environmentSubstitute( meta.getSort() ) ); } return query; } catch ( IOException ioe ) { return null; } } @Override public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { meta = (GaInputStepMeta) smi; data = (GaInputStepData) sdi; if ( !super.init( smi, sdi ) ) { return false; } // Look for deprecated field types and log error(s) for them String[] types = environmentSubstitute( meta.getFeedFieldType() ); if ( types != null ) { for ( String type : types ) { if ( GaInputStepMeta.DEPRECATED_FIELD_TYPE_CONFIDENCE_INTERVAL.equals( type ) ) { logError( BaseMessages.getString( PKG, "GoogleAnalytics.Warn.FieldTypeNotSupported", GaInputStepMeta.DEPRECATED_FIELD_TYPE_CONFIDENCE_INTERVAL ) ); } } } String appName = environmentSubstitute( meta.getGaAppName() ); String serviceAccount = environmentSubstitute( meta.getOAuthServiceAccount() ); String OAuthKeyFile = environmentSubstitute( meta.getOAuthKeyFile() ); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "GoogleAnalyticsDialog.AppName.Label" ) + ": " + appName ); logDetailed( BaseMessages.getString( PKG, "GoogleAnalyticsDialog.OauthAccount.Label" ) + ": " + serviceAccount ); logDetailed( BaseMessages.getString( PKG, "GoogleAnalyticsDialog.KeyFile.Label" ) + ": " + OAuthKeyFile ); } try { // Create an Analytics object, and fetch what we can for later (account name, e.g.) analytics = GoogleAnalyticsApiFacade.createFor( appName, serviceAccount, OAuthKeyFile ).getAnalytics(); // There is necessarily an account name associated with this, so any NPEs or other exceptions mean bail out accountName = analytics.management().accounts().list().execute().getItems().iterator().next().getName(); } catch ( TokenResponseException tre ) { Exception exceptionToLog = tre; if ( tre.getDetails() != null && tre.getDetails().getError() != null ) { exceptionToLog = new IOException( BaseMessages.getString( PKG, "GoogleAnalytics.Error.OAuth2.Auth", tre.getDetails().getError() ), tre ); } logError( BaseMessages.getString( PKG, "GoogleAnalytics.Error.AccessingGaApi" ), exceptionToLog ); return false; } catch ( Exception e ) { logError( BaseMessages.getString( PKG, "GoogleAnalytics.Error.AccessingGaApi" ), e ); return false; } return true; } // made not private for testing purposes List<String> getNextDataEntry() throws KettleException { // no query prepared yet? if ( data.query == null ) { data.query = getQuery( analytics ); // use default max results for now //data.query.setMaxResults( 10000 ); if ( log.isDetailed() ) { logDetailed( "querying google analytics: " + data.query.buildHttpRequestUrl().toURI().toString() ); } try { data.feed = data.query.execute(); data.entryIndex = 0; } catch ( IOException e2 ) { throw new KettleException( e2 ); } } else if ( data.feed != null // getItemsPerPage(): // Its value ranges from 1 to 10,000 with a value of 1000 by default, or otherwise // specified by the max-results query parameter && data.entryIndex + 1 >= data.feed.getItemsPerPage() ) { try { // query is there, check whether we hit the last entry and re-query as necessary int startIndex = ( data.query.getStartIndex() == null ) ? 1 : data.query.getStartIndex(); int totalResults = ( data.feed.getTotalResults() == null ) ? 0 : data.feed.getTotalResults(); int newStartIndex = startIndex + data.entryIndex; if ( newStartIndex <= totalResults ) { // need to query for next page data.query.setStartIndex( newStartIndex ); data.feed = data.query.execute(); data.entryIndex = 0; } } catch ( IOException e2 ) { throw new KettleException( e2 ); } } if ( data.feed != null ) { List<List<String>> entries = data.feed.getRows(); if ( entries != null && data.entryIndex < entries.size() ) { return entries.get( data.entryIndex++ ); } else { return null; } } else { return null; } } @Override public void dispose( StepMetaInterface smi, StepDataInterface sdi ) { meta = (GaInputStepMeta) smi; data = (GaInputStepData) sdi; super.dispose( smi, sdi ); } private String removeClassifier( String original ) { int colonIndex = original.indexOf( ":" ); return original.substring( colonIndex + 1 ); } }