/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.usergrid.tools;
import au.com.bytecode.opencsv.CSVReader;
import au.com.bytecode.opencsv.CSVWriter;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.Protocol;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.PutObjectResult;
import com.google.common.collect.BiMap;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Module;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.usergrid.management.OrganizationInfo;
import org.apache.usergrid.persistence.*;
import org.apache.usergrid.persistence.entities.Application;
import org.apache.usergrid.persistence.schema.CollectionInfo;
import org.apache.usergrid.utils.InflectionUtils;
import org.jclouds.http.config.JavaUrlHttpCommandExecutorServiceModule;
import org.jclouds.logging.log4j.config.Log4JLoggingModule;
import org.jclouds.netty.config.NettyPayloadModule;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;
import static org.apache.usergrid.persistence.Schema.getDefaultSchema;
/**
* Exports all known (non-Dynamic) fields from Schema that are primitive, Date, or String into a pipe-delimited file.
* Also includes (hard-coded for now) fields from Notification, Notifier, and Receipt. With no -startTime, scans the
* existing *.csv files in the output directory and starts from last end date found. With no -endTime, ends at current
* time - 1 hour. Explicitly sets "cassandra.readcl=ONE" for efficiency.
*/
public class WarehouseExport extends ExportingToolBase {
private static final Logger logger = LoggerFactory.getLogger( WarehouseExport.class );
private static final char SEPARATOR = '|';
public static final String BUCKET_PROPNAME = "usergrid.warehouse-export-bucket";
public static final String ACCESS_ID_PROPNAME = "usergrid.warehouse-export-access-id";
public static final String SECRET_KEY_PROPNAME = "usergrid.warehouse-export-secret-key";
private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss.SSS" );
private static final String[] BASE_ATTRIBUTES =
{ "uuid", "organization", "application", "type", "created", "modified" };
private static final String START_TIME = "startTime";
private static final String END_TIME = "endTime";
private static final String UPLOAD = "upload";
private static final String[] NOTIFICATION_ATTRIBUTES = {
"payloads", "queued", "started", "finished", "deliver", "expire", "canceled", "errorMessage", "statistics"
};
private static final String[] NOTIFIER_ATTRIBUTES = { "provider", "environment" };
private static final String[] RECEIPT_ATTRIBUTES =
{ "payload", "sent", "errorCode", "errorMessage", "notifierId", "notificationUUID" };
private static final Map<String, String[]> URAP_ATTRIBUTES = new HashMap<String, String[]>();
static {
URAP_ATTRIBUTES.put( "notification", NOTIFICATION_ATTRIBUTES );
URAP_ATTRIBUTES.put( "notifier", NOTIFIER_ATTRIBUTES );
URAP_ATTRIBUTES.put( "receipt", RECEIPT_ATTRIBUTES );
}
private CSVWriter writer;
private String[] collectionNames;
private Map<String, String[]> collectionFieldMap;
private Date startTime, endTime;
@Override
public void runTool( CommandLine line ) throws Exception {
// keep it light and fast
System.setProperty( "cassandra.readcl", "ONE" );
startSpring();
setVerbose( line );
applyOrgId( line );
prepareBaseOutputFileName( line );
outputDir = createOutputParentDir();
logger.info( "Export directory: {}", outputDir.getAbsolutePath() );
// create writer
applyStartTime( line );
applyEndTime( line );
logger.error( "startTime: {}, endTime: {}", startTime, endTime );
if ( startTime.getTime() >= endTime.getTime() ) {
logger.error( "startTime must be before endTime. exiting." );
System.exit( 1 );
}
// create "modified" query to select data
StringBuilder builder = new StringBuilder();
builder.append( "modified >= " ).append( startTime.getTime() ).append( " and " );
builder.append( "modified <= " ).append( endTime.getTime() );
String queryString = builder.toString();
// create writer
String dateString = DATE_FORMAT.format( new Date() );
String fileName = outputDir.getAbsolutePath() + "/" + dateString + ".csv";
FileWriter fw = new FileWriter( fileName );
writer = new CSVWriter( fw, SEPARATOR, CSVWriter.NO_QUOTE_CHARACTER, '\'' );
try {
writeMetadata();
writeHeaders();
// Loop through the organizations
Map<UUID, String> organizations = getOrganizations();
for ( Entry<UUID, String> orgIdAndName : organizations.entrySet() ) {
exportApplicationsForOrg( orgIdAndName, queryString );
}
}
finally {
writer.close();
}
// now that file is written, copy it to S3
if ( line.hasOption( "upload" ) ) {
logger.info( "Copy to S3" );
copyToS3( fileName );
}
}
private void copyToS3( String fileName ) {
String bucketName = ( String ) properties.get( BUCKET_PROPNAME );
String accessId = ( String ) properties.get( ACCESS_ID_PROPNAME );
String secretKey = ( String ) properties.get( SECRET_KEY_PROPNAME );
Properties overrides = new Properties();
overrides.setProperty( "s3" + ".identity", accessId );
overrides.setProperty( "s3" + ".credential", secretKey );
final Iterable<? extends Module> MODULES = ImmutableSet
.of( new JavaUrlHttpCommandExecutorServiceModule(), new Log4JLoggingModule(),
new NettyPayloadModule() );
AWSCredentials credentials = new BasicAWSCredentials(accessId, secretKey);
ClientConfiguration clientConfig = new ClientConfiguration();
clientConfig.setProtocol( Protocol.HTTP);
AmazonS3Client s3Client = new AmazonS3Client(credentials, clientConfig);
s3Client.createBucket( bucketName );
File uploadFile = new File( fileName );
PutObjectResult putObjectResult = s3Client.putObject( bucketName, uploadFile.getName(), uploadFile );
logger.info("Uploaded file etag={}", putObjectResult.getETag());
}
@Override
@SuppressWarnings("static-access")
public Options createOptions() {
Options options = super.createOptions();
Option startTime =
OptionBuilder.hasArg().withDescription( "minimum modified time -startTime" ).create( START_TIME );
Option endTime = OptionBuilder.hasArg().withDescription( "maximum modified time -endTime" ).create( END_TIME );
Option upload = OptionBuilder.withDescription( "upload files to blob-store" ).create( UPLOAD );
options.addOption( startTime );
options.addOption( endTime );
options.addOption( upload );
return options;
}
private void applyStartTime( CommandLine line ) throws Exception {
if ( line.hasOption( START_TIME ) ) {
startTime = new Date( Long.parseLong( line.getOptionValue( START_TIME ) ) );
}
else {
// attempt to read last end time from directory
File[] files = outputDir.listFiles( new FilenameFilter() {
@Override
public boolean accept( File dir, String name ) {
return name.endsWith( ".csv" );
}
} );
long lastEndTime = 0;
for ( File file : files ) {
long endTime = readEndTime( file );
if ( endTime > lastEndTime ) {
lastEndTime = endTime;
}
}
startTime = new Date( lastEndTime + 1 );
}
}
private void applyEndTime( CommandLine line ) {
if ( line.hasOption( END_TIME ) ) {
endTime = new Date( Long.parseLong( line.getOptionValue( END_TIME ) ) );
}
else {
endTime = new Date( System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert( 1L, TimeUnit.HOURS ) );
}
}
private long readEndTime( File file ) throws Exception {
CSVReader reader = new CSVReader( new FileReader( file ), SEPARATOR, CSVWriter.NO_QUOTE_CHARACTER, '\'' );
try {
String[] firstLine = reader.readNext();
if ( "start".equals( firstLine[0] ) && "end".equals( firstLine[2] ) ) {
return Long.parseLong( firstLine[3] );
}
}
finally {
reader.close();
}
return 0;
}
private void writeMetadata() {
writer.writeNext( new String[] { "start", "" + startTime.getTime(), "end", "" + endTime.getTime() } );
}
private void writeHeaders() {
writer.writeNext( getHeaders() );
}
private String[] getHeaders() {
List<String> headers = new ArrayList<String>();
headers.addAll( Arrays.asList( BASE_ATTRIBUTES ) );
Map<String, String[]> cfm = getCollectionFieldMap();
for ( Map.Entry<String, String[]> entry : cfm.entrySet() ) {
String collection = entry.getKey();
String[] attributes = entry.getValue();
for ( String attribute : attributes ) {
headers.add( collection + "_" + attribute );
}
}
String[] stringHeaders = new String[headers.size()];
return headers.toArray( stringHeaders );
}
private Map<String, String[]> getCollectionFieldMap() {
if ( collectionFieldMap != null ) {
return collectionFieldMap;
}
// get basic stuff from Schema
String[] collectionTypes = getCollectionTypes();
collectionFieldMap = new TreeMap<String, String[]>();
for ( String type : collectionTypes ) {
Set<String> propertyNames = Schema.getDefaultSchema().getPropertyNames( type );
for ( String attr : BASE_ATTRIBUTES ) {
propertyNames.remove( attr );
}
Iterator<String> i = propertyNames.iterator();
while ( i.hasNext() ) {
String property = i.next();
Class cls = Schema.getDefaultSchema().getPropertyType( type, property );
if ( !cls.isPrimitive() && cls != String.class && cls != Date.class ) {
i.remove();
}
}
String[] props = new String[propertyNames.size()];
propertyNames.toArray( props );
Arrays.sort( props );
collectionFieldMap.put( type, props );
}
// add URAP stuff that's not visible to usergrid-stack
for ( Map.Entry<String, String[]> entry : URAP_ATTRIBUTES.entrySet() ) {
String type = entry.getKey();
String[] attributes = entry.getValue();
Arrays.sort( attributes );
collectionFieldMap.put( type, attributes );
}
return collectionFieldMap;
}
/** @return Map of Organization UUID -> Name */
private Map<UUID, String> getOrganizations() throws Exception {
Map<UUID, String> organizationNames;
if ( orgId == null ) {
organizationNames = managementService.getOrganizations();
}
else {
OrganizationInfo info = managementService.getOrganizationByUuid( orgId );
if ( info == null ) {
logger.error( "Organization info is null!" );
System.exit( 1 );
}
organizationNames = new HashMap<UUID, String>();
organizationNames.put( orgId, info.getName() );
}
return organizationNames;
}
private String[] getCollectionTypes() {
if ( collectionNames != null ) {
return collectionNames;
}
Collection<CollectionInfo> system_collections =
getDefaultSchema().getCollections( Application.ENTITY_TYPE ).values();
ArrayList<String> collections = new ArrayList<String>( system_collections.size() );
for ( CollectionInfo collection : system_collections ) {
if ( !Schema.isAssociatedEntityType( collection.getType() ) ) {
collections.add( collection.getType() );
}
}
collectionNames = new String[collections.size()];
Collections.sort( collections );
return collections.toArray( collectionNames );
}
private void exportApplicationsForOrg( Entry<UUID, String> orgIdAndName, String queryString ) throws Exception {
logger.info( "organization: {} / {}", orgIdAndName.getValue(), orgIdAndName.getKey() );
String orgName = orgIdAndName.getValue();
BiMap<UUID, String> applications = managementService.getApplicationsForOrganization( orgIdAndName.getKey() );
for ( Entry<UUID, String> appIdAndName : applications.entrySet() ) {
String appName = appIdAndName.getValue();
appName = appName.substring( appName.indexOf( '/' ) + 1 );
logger.info( "application {} / {}", appName, appIdAndName.getKey() );
EntityManager em = emf.getEntityManager( appIdAndName.getKey() );
Map<String, String[]> cfm = getCollectionFieldMap();
// Loop through the collections of the Application
Set<String> collections = em.getApplicationCollections();
for ( String collectionName : collections ) {
// set up for retrieving only the necessary properties
String entityType = InflectionUtils.singularize( collectionName );
String[] props = cfm.get( entityType );
Collection<String> properties =
new ArrayList<String>( BASE_ATTRIBUTES.length + ( props != null ? props.length : 0 ) );
properties.addAll( Arrays.asList( BASE_ATTRIBUTES ) );
if ( props != null ) {
properties.addAll( Arrays.asList( props ) );
}
Query query = Query.fromQL( queryString );
query.setLimit( MAX_ENTITY_FETCH );
query.setResultsLevel( Query.Level.REFS );
Results results = em.searchCollection( em.getApplicationRef(), collectionName, query );
while ( results.size() > 0 ) {
List<Entity> entities = em.getPartialEntities( results.getIds(), properties );
for ( Entity entity : entities ) {
write( orgName, appName, entity, em );
}
if ( results.getCursor() == null ) {
break;
}
query.setCursor( results.getCursor() );
results = em.searchCollection( em.getApplicationRef(), collectionName, query );
}
}
}
}
private void write( String orgName, String appName, Entity entity, EntityManager em ) throws Exception {
Map<String, String[]> cfm = getCollectionFieldMap();
String uuid = entity.getUuid().toString();
String created = DATE_FORMAT.format( entity.getCreated() );
String modified = DATE_FORMAT.format( entity.getModified() );
String type = entity.getType();
List<String> values = new ArrayList<String>( 30 );
values.add( uuid );
values.add( orgName );
values.add( appName );
values.add( entity.getType() );
values.add( created );
values.add( modified );
for ( Map.Entry<String, String[]> entry : cfm.entrySet() ) {
String collection = entry.getKey();
String[] attributes = entry.getValue();
if ( collection.equals( type ) ) {
for ( String attribute : attributes ) {
Object prop = entity.getProperty( attribute );
values.add( prop != null ? prop.toString() : null );
}
}
else {
for ( String attribute : attributes ) {
values.add( null );
}
}
}
String[] stringValues = new String[values.size()];
values.toArray( stringValues );
writer.writeNext( stringValues );
}
}