package io.lumify.twitter; import static com.google.common.base.Preconditions.checkNotNull; import io.lumify.core.model.properties.LumifyProperties; import io.lumify.core.model.termMention.TermMentionBuilder; import io.lumify.core.model.user.UserRepository; import io.lumify.core.model.workQueue.WorkQueueRepository; import io.lumify.core.security.VisibilityTranslator; import io.lumify.twitter.loaders.LoaderConstants; import io.lumify.twitter.loaders.TweetVertexLoader; import io.lumify.twitter.loaders.UserVertexDetails; import io.lumify.twitter.loaders.UserVertexLoader; import io.lumify.web.clientapi.model.VisibilityJson; import java.util.concurrent.TimeUnit; import org.securegraph.Authorizations; import org.securegraph.Edge; import org.securegraph.Graph; import org.securegraph.Vertex; import org.securegraph.VertexBuilder; import twitter4j.HashtagEntity; import twitter4j.RetweetStatusFactory; import twitter4j.Status; import twitter4j.URLEntity; import twitter4j.UserMentionEntity; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.inject.Inject; /** * Responsible for transforming a tweet {@link Status} to prepare it for loading into the data store */ public final class TweetTransformer { private static final String PROCESS_TWITTER_INGEST = "twitter-ingest"; private final Graph graph; private final WorkQueueRepository workQueueRepository; private final UserRepository userRepository; private final VisibilityTranslator visibilityTranslator; private final Authorizations authorizations; private final UserVertexLoader userLoader; private final TweetVertexLoader tweetLoader; private final Cache<String, Vertex> urlVertexCache = CacheBuilder.newBuilder().expireAfterWrite(15, TimeUnit.MINUTES).build(); private final Cache<String, Vertex> hashtagVertexCache = CacheBuilder.newBuilder().expireAfterWrite(15, TimeUnit.MINUTES).build(); /** * * @param secureGraph The underlying graph data store instance, not null * @param workQueueRepo The work queue used to store pending operations, not null * @param userRepo The system user repository used for retrieving users known to the system, not null * @param translator The visibility expression translator, not null * @param userVertexLoader The loader used for storing user vertices, not null * @param tweetVertexLoader The loader used for storing tweet status vertices, not null */ @Inject public TweetTransformer(final Graph secureGraph, final WorkQueueRepository workQueueRepo, final UserRepository userRepo, final VisibilityTranslator translator, final UserVertexLoader userVertexLoader, final TweetVertexLoader tweetVertexLoader) { graph = checkNotNull(secureGraph); workQueueRepository = checkNotNull(workQueueRepo); userRepository = checkNotNull(userRepo); visibilityTranslator = checkNotNull(translator); userLoader = checkNotNull(userVertexLoader); tweetLoader = checkNotNull(tweetVertexLoader); authorizations = userRepository.getAuthorizations(userRepository.getSystemUser()); } /** * Transforms the content of the provided tweet status to the format required by the underlying data store * @param tweetStatus The status to process, not null * @return The vertex representing the transformed tweet status content */ public Vertex transformTweetStatus(final Status tweetStatus) { checkNotNull(tweetStatus); final UserVertexDetails userDetails = UserVertexDetails.fromTweetStatus(tweetStatus); Vertex userVertex = userLoader.loadVertex(userDetails); Vertex tweetVertex = tweetLoader.loadVertex(tweetStatus); createTweetedEdge(userVertex, tweetVertex); processEntities(tweetVertex, tweetStatus); processRetweetStatus(tweetVertex, tweetStatus); return tweetVertex; } private void createTweetedEdge(final Vertex userVertex, final Vertex tweetVertex) { final String tweetedEdgeId = userVertex.getId() + "_TWEETED_" + tweetVertex.getId(); graph.addEdge(tweetedEdgeId, userVertex, tweetVertex, TwitterOntology.EDGE_LABEL_TWEETED, LoaderConstants.EMPTY_VISIBILITY, authorizations); graph.flush(); } private void processRetweetStatus(final Vertex tweetVertex, final Status tweetStatus) { Status retweetedStatus = tweetStatus.getRetweetedStatus(); if( retweetedStatus == null ) { return; } retweetedStatus = RetweetStatusFactory.createRetweetedStatus(tweetStatus); final Vertex retweetedTweet = transformTweetStatus(retweetedStatus); final String retweetEdgeId = tweetVertex.getId() + "_RETWEET_" + retweetedTweet.getId(); graph.addEdge(retweetEdgeId, retweetedTweet, tweetVertex, TwitterOntology.EDGE_LABEL_RETWEET, LoaderConstants.EMPTY_VISIBILITY, authorizations); graph.flush(); } private void processEntities(final Vertex tweetVertex, final Status tweetStatus) { processHashtags(tweetVertex, tweetStatus.getHashtagEntities()); processUrls(tweetVertex, tweetStatus.getURLEntities()); processUserMentions(tweetVertex, tweetStatus.getUserMentionEntities()); processUrls(tweetVertex, tweetStatus.getMediaEntities()); } private void processUrls(Vertex tweetVertex, final URLEntity[] urlEntities) { for (final URLEntity urlEntity : urlEntities) { final Vertex urlVertex = getUrlVertex(urlEntity); final Edge edge = createReferencesUrlEdge(tweetVertex, urlVertex); createTermMention(tweetVertex, urlVertex, edge, TwitterOntology.CONCEPT_TYPE_URL, urlEntity.getStart(), urlEntity.getEnd()); } } private Vertex getUrlVertex(final URLEntity urlEntity) { String url = urlEntity.getExpandedURL(); if( url == null ) { url = urlEntity.getURL(); } final String vertexId = "TWITTER_URL_" + url; Vertex urlVertex = urlVertexCache.getIfPresent(vertexId); if( urlVertex != null ) { return urlVertex; } urlVertex = graph.getVertex(vertexId, authorizations); if( urlVertex == null ) { VertexBuilder vertexBuilder = graph.prepareVertex(vertexId, LoaderConstants.EMPTY_VISIBILITY); LumifyProperties.CONCEPT_TYPE.addPropertyValue(vertexBuilder, LoaderConstants.MULTI_VALUE_KEY, TwitterOntology.CONCEPT_TYPE_URL, LoaderConstants.EMPTY_VISIBILITY); LumifyProperties.SOURCE.addPropertyValue(vertexBuilder, LoaderConstants.MULTI_VALUE_KEY, LoaderConstants.SOURCE_NAME, LoaderConstants.EMPTY_VISIBILITY); LumifyProperties.TITLE.addPropertyValue(vertexBuilder, LoaderConstants.MULTI_VALUE_KEY, url, LoaderConstants.EMPTY_VISIBILITY); urlVertex = vertexBuilder.save(authorizations); graph.flush(); workQueueRepository.pushGraphPropertyQueue(urlVertex, LumifyProperties.TITLE.getProperty(urlVertex)); } urlVertexCache.put(vertexId, urlVertex); return urlVertex; } private Edge createReferencesUrlEdge(Vertex tweetVertex, Vertex urlVertex) { final String mentionedEdgeId = tweetVertex.getId() + "_REFURL_" + urlVertex.getId(); final Edge edge = graph.addEdge(mentionedEdgeId, tweetVertex, urlVertex, TwitterOntology.EDGE_LABEL_REFERENCED_URL, LoaderConstants.EMPTY_VISIBILITY, authorizations); graph.flush(); return edge; } private void processUserMentions(Vertex tweetVertex, final UserMentionEntity[] userMentionEntities) { for (final UserMentionEntity userMentionEntity : userMentionEntities) { final UserVertexDetails userDetails = UserVertexDetails.fromUserMention(userMentionEntity); final Vertex userVertex = userLoader.loadVertex(userDetails); final Edge edge = createMentionedEdge(tweetVertex, userVertex); createTermMention(tweetVertex, userVertex, edge, TwitterOntology.CONCEPT_TYPE_USER, userMentionEntity.getStart(), userMentionEntity.getEnd()); } } private Edge createMentionedEdge(Vertex tweetVertex, Vertex userVertex) { final String mentionedEdgeId = tweetVertex.getId() + "_MENTIONED_" + userVertex.getId(); final Edge edge = graph.addEdge(mentionedEdgeId, tweetVertex, userVertex, TwitterOntology.EDGE_LABEL_MENTIONED, LoaderConstants.EMPTY_VISIBILITY, authorizations); graph.flush(); return edge; } private void processHashtags(Vertex tweetVertex, final HashtagEntity[] hashtagEntities) { for (final HashtagEntity hashtagEntity : hashtagEntities) { final Vertex hashtagVertex = getHashtagVertex(hashtagEntity); final Edge edge = createTaggedEdge(tweetVertex, hashtagVertex); createTermMention(tweetVertex, hashtagVertex, edge, TwitterOntology.CONCEPT_TYPE_HASHTAG, hashtagEntity.getStart(), hashtagEntity.getEnd()); } } private Vertex getHashtagVertex(final HashtagEntity hashtagEntity) { final String hashtagText = hashtagEntity.getText(); final String vertexId = "TWITTER_HASHTAG_" + hashtagText.toLowerCase(); Vertex hashtagVertex = hashtagVertexCache.getIfPresent(vertexId); if( hashtagVertex != null ) { return hashtagVertex; } hashtagVertex = graph.getVertex(vertexId, authorizations); if( hashtagVertex == null ) { final VertexBuilder vertexBuilder = graph.prepareVertex(vertexId, LoaderConstants.EMPTY_VISIBILITY); LumifyProperties.CONCEPT_TYPE.addPropertyValue(vertexBuilder, LoaderConstants.MULTI_VALUE_KEY, TwitterOntology.CONCEPT_TYPE_HASHTAG, LoaderConstants.EMPTY_VISIBILITY); LumifyProperties.SOURCE.addPropertyValue(vertexBuilder, LoaderConstants.MULTI_VALUE_KEY, LoaderConstants.SOURCE_NAME, LoaderConstants.EMPTY_VISIBILITY); LumifyProperties.TITLE.addPropertyValue(vertexBuilder, LoaderConstants.MULTI_VALUE_KEY, hashtagText, LoaderConstants.EMPTY_VISIBILITY); hashtagVertex = vertexBuilder.save(authorizations); graph.flush(); workQueueRepository.pushGraphPropertyQueue(hashtagVertex, LumifyProperties.TITLE.getProperty(hashtagVertex)); } hashtagVertexCache.put(vertexId, hashtagVertex); return hashtagVertex; } private Edge createTaggedEdge(Vertex tweetVertex, Vertex hashtagVertex) { final String mentionedEdgeId = tweetVertex.getId() + "_TAGGED_" + hashtagVertex.getId(); final Edge edge = graph.addEdge(mentionedEdgeId, tweetVertex, hashtagVertex, TwitterOntology.EDGE_LABEL_TAGGED, LoaderConstants.EMPTY_VISIBILITY, authorizations); graph.flush(); return edge; } private void createTermMention(Vertex tweetVertex, Vertex vertex, Edge edge, String conceptUri, final long startOffset, final long endOffset) { final VisibilityJson visibilitySource = new VisibilityJson(); final String title = LumifyProperties.TITLE.getPropertyValue(vertex); new TermMentionBuilder() .sourceVertex(tweetVertex) .propertyKey(LoaderConstants.MULTI_VALUE_KEY) .start(startOffset) .end(endOffset) .title(title) .process(PROCESS_TWITTER_INGEST) .conceptIri(conceptUri) .visibilityJson(visibilitySource) .resolvedTo(vertex, edge) .save(graph, visibilityTranslator, authorizations); } }