package io.lumify.themoviedb; import io.lumify.core.mapreduce.LumifyElementMapperBase; import io.lumify.core.model.properties.LumifyProperties; import io.lumify.core.security.DirectVisibilityTranslator; import io.lumify.core.security.VisibilityTranslator; import org.apache.hadoop.io.Text; import org.json.JSONArray; import org.json.JSONObject; import org.securegraph.Metadata; import org.securegraph.Vertex; import org.securegraph.VertexBuilder; import org.securegraph.Visibility; import org.securegraph.accumulo.AccumuloAuthorizations; import org.securegraph.property.StreamingPropertyValue; import java.io.ByteArrayInputStream; import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; public class ImportJsonMRMapper extends LumifyElementMapperBase<SequenceFileKey, Text> { public static final String MULTI_VALUE_KEY = ImportJsonMR.class.getName(); public static final String SOURCE = "TheMovieDb.org"; private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd"); private static final SimpleDateFormat DATE_YEAR_FORMAT = new SimpleDateFormat("yyyy"); private Visibility visibility; private AccumuloAuthorizations authorizations; private Visibility defaultVisibility; @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); VisibilityTranslator visibilityTranslator = new DirectVisibilityTranslator(); this.visibility = visibilityTranslator.getDefaultVisibility(); this.defaultVisibility = visibilityTranslator.getDefaultVisibility(); this.authorizations = new AccumuloAuthorizations(); } @Override protected void safeMap(SequenceFileKey key, Text line, Context context) throws IOException, InterruptedException, ParseException { String lineString = line.toString(); JSONObject json = new JSONObject(lineString); int id = json.getInt("id"); RecordType recordType = key.getRecordType(); context.setStatus(recordType + ":" + id); switch (recordType) { case MOVIE: mapMovie(id, json, context); break; case PERSON: mapPerson(id, json, context); break; case PRODUCTION_COMPANY: mapProductionCompany(id, json, context); break; } } private void mapProductionCompany(int productionCompanyId, JSONObject json, Context context) { VertexBuilder productionCompanyMutation = prepareVertex(TheMovieDbOntology.getProductionCompanyVertexId(productionCompanyId), visibility); LumifyProperties.CONCEPT_TYPE.addPropertyValue(productionCompanyMutation, MULTI_VALUE_KEY, TheMovieDbOntology.CONCEPT_TYPE_PRODUCTION_COMPANY, visibility); LumifyProperties.SOURCE.addPropertyValue(productionCompanyMutation, MULTI_VALUE_KEY, SOURCE, visibility); String name = json.optString("name"); if (name != null && name.length() > 0) { LumifyProperties.TITLE.addPropertyValue(productionCompanyMutation, MULTI_VALUE_KEY, name, visibility); } productionCompanyMutation.save(authorizations); context.getCounter(TheMovieDbImportCounters.PRODUCTION_COMPANIES_PROCESSED).increment(1); } private void mapPerson(int personId, JSONObject personJson, Context context) throws ParseException { String name = personJson.getString("name"); String vertexId = TheMovieDbOntology.getPersonVertexId(personId); VertexBuilder m = prepareVertex(vertexId, visibility); LumifyProperties.CONCEPT_TYPE.addPropertyValue(m, MULTI_VALUE_KEY, TheMovieDbOntology.CONCEPT_TYPE_PERSON, visibility); LumifyProperties.SOURCE.addPropertyValue(m, MULTI_VALUE_KEY, SOURCE, visibility); StreamingPropertyValue rawValue = new StreamingPropertyValue(new ByteArrayInputStream(personJson.toString().getBytes()), byte[].class); rawValue.store(true); rawValue.searchIndex(false); LumifyProperties.RAW.addPropertyValue(m, MULTI_VALUE_KEY, rawValue, visibility); LumifyProperties.TITLE.addPropertyValue(m, MULTI_VALUE_KEY, name, visibility); String biography = personJson.optString("biography"); if (biography != null) { Metadata metadata = new Metadata(); LumifyProperties.META_DATA_TEXT_DESCRIPTION.setMetadata(metadata, "Biography", defaultVisibility); LumifyProperties.META_DATA_MIME_TYPE.setMetadata(metadata, "text/plain", defaultVisibility); StreamingPropertyValue value = new StreamingPropertyValue(new ByteArrayInputStream(biography.getBytes()), String.class); LumifyProperties.TEXT.addPropertyValue(m, MULTI_VALUE_KEY, value, metadata, visibility); } String birthDateString = personJson.optString("birthday"); if (birthDateString != null && birthDateString.length() > 0) { Date birthDate = parseDate(birthDateString); TheMovieDbOntology.BIRTHDATE.addPropertyValue(m, MULTI_VALUE_KEY, birthDate, visibility); } String deathDateString = personJson.optString("deathday"); if (deathDateString != null && deathDateString.length() > 0) { Date deathDate = parseDate(deathDateString); TheMovieDbOntology.DEATH_DATE.addPropertyValue(m, MULTI_VALUE_KEY, deathDate, visibility); } JSONArray akas = personJson.optJSONArray("also_known_as"); if (akas != null) { for (int i = 0; i < akas.length(); i++) { String aka = akas.getString(i); TheMovieDbOntology.ALSO_KNOWN_AS.addPropertyValue(m, "aka" + i, aka, visibility); } } Vertex personVertex = m.save(authorizations); processPersonCredits(personId, personJson, personVertex); context.getCounter(TheMovieDbImportCounters.PERSONS_PROCESSED).increment(1); } private void processPersonCredits(int personId, JSONObject personJson, Vertex personVertex) { JSONObject combinedCredits = personJson.getJSONObject("combined_credits"); JSONArray cast = combinedCredits.getJSONArray("cast"); for (int i = 0; i < cast.length(); i++) { JSONObject movieJson = cast.getJSONObject(i); String mediaType = movieJson.getString("media_type"); if (!mediaType.equals("movie")) { continue; } int movieId = movieJson.getInt("id"); VertexBuilder movieMutation = prepareVertex(TheMovieDbOntology.getMovieVertexId(movieId), visibility); LumifyProperties.CONCEPT_TYPE.addPropertyValue(movieMutation, MULTI_VALUE_KEY, TheMovieDbOntology.CONCEPT_TYPE_MOVIE, visibility); LumifyProperties.SOURCE.addPropertyValue(movieMutation, MULTI_VALUE_KEY, SOURCE, visibility); String title = movieJson.optString("title"); if (title != null && title.length() > 0) { LumifyProperties.TITLE.addPropertyValue(movieMutation, MULTI_VALUE_KEY, title, visibility); } Vertex movieVertex = movieMutation.save(authorizations); addEdge(TheMovieDbOntology.getStarredInEdgeId(personId, movieId), personVertex, movieVertex, TheMovieDbOntology.EDGE_LABEL_STARRED_IN, visibility, authorizations); } } private void mapMovie(int movieId, JSONObject movieJson, Context context) throws ParseException { String title = movieJson.getString("title"); String vertexId = TheMovieDbOntology.getMovieVertexId(movieId); String sourceUrl = "http://www.themoviedb.org/movie/" + movieId; VertexBuilder m = prepareVertex(vertexId, visibility); LumifyProperties.CONCEPT_TYPE.addPropertyValue(m, MULTI_VALUE_KEY, TheMovieDbOntology.CONCEPT_TYPE_MOVIE, visibility); LumifyProperties.SOURCE.addPropertyValue(m, MULTI_VALUE_KEY, SOURCE, visibility); LumifyProperties.SOURCE_URL.addPropertyValue(m, MULTI_VALUE_KEY, sourceUrl, visibility); StreamingPropertyValue rawValue = new StreamingPropertyValue(new ByteArrayInputStream(movieJson.toString().getBytes()), byte[].class); rawValue.store(true); rawValue.searchIndex(false); LumifyProperties.RAW.addPropertyValue(m, MULTI_VALUE_KEY, rawValue, visibility); LumifyProperties.TITLE.addPropertyValue(m, MULTI_VALUE_KEY, title, visibility); String releaseDateString = movieJson.optString("release_date"); if (releaseDateString != null && releaseDateString.length() > 0) { Date releaseDate = parseDate(releaseDateString); TheMovieDbOntology.RELEASE_DATE.addPropertyValue(m, MULTI_VALUE_KEY, releaseDate, visibility); } JSONArray genres = movieJson.optJSONArray("genres"); if (genres != null) { for (int i = 0; i < genres.length(); i++) { JSONObject genre = genres.getJSONObject(i); String genreName = genre.getString("name"); TheMovieDbOntology.GENRE.addPropertyValue(m, MULTI_VALUE_KEY + "_" + genreName, genreName, visibility); } } double runtime = movieJson.optDouble("runtime", -1); if (runtime > 0) { runtime = runtime * 60; TheMovieDbOntology.RUNTIME.addPropertyValue(m, MULTI_VALUE_KEY, runtime, visibility); } int revenue = movieJson.optInt("revenue", -1); if (revenue > 0) { TheMovieDbOntology.REVENUE.addPropertyValue(m, MULTI_VALUE_KEY, revenue, visibility); } int budget = movieJson.optInt("budget", -1); if (budget > 0) { TheMovieDbOntology.BUDGET.addPropertyValue(m, MULTI_VALUE_KEY, budget, visibility); } String overview = movieJson.optString("overview"); if (overview != null && overview.length() > 0) { Metadata metadata = new Metadata(); LumifyProperties.META_DATA_TEXT_DESCRIPTION.setMetadata(metadata, "Overview", defaultVisibility); LumifyProperties.META_DATA_MIME_TYPE.setMetadata(metadata, "text/plain", defaultVisibility); StreamingPropertyValue value = new StreamingPropertyValue(new ByteArrayInputStream(overview.getBytes()), String.class); LumifyProperties.TEXT.addPropertyValue(m, MULTI_VALUE_KEY, value, metadata, visibility); } String tagLine = movieJson.optString("tagline"); if (tagLine != null && tagLine.length() > 0) { TheMovieDbOntology.TAG_LINE.addPropertyValue(m, MULTI_VALUE_KEY, tagLine, visibility); } Vertex movieVertex = m.save(authorizations); processMovieCredits(movieId, movieJson, movieVertex); processMovieProductionCompanies(movieId, movieJson, movieVertex); context.getCounter(TheMovieDbImportCounters.MOVIES_PROCESSED).increment(1); } private void processMovieProductionCompanies(int movieId, JSONObject movieJson, Vertex movieVertex) { JSONArray productionCompanies = movieJson.optJSONArray("production_companies"); if (productionCompanies != null) { for (int i = 0; i < productionCompanies.length(); i++) { JSONObject productionCompany = productionCompanies.getJSONObject(i); int productionCompanyId = productionCompany.getInt("id"); String sourceUrl = "http://www.themoviedb.org/company/" + productionCompanyId; VertexBuilder productionCompanyMutation = prepareVertex(TheMovieDbOntology.getProductionCompanyVertexId(productionCompanyId), visibility); LumifyProperties.CONCEPT_TYPE.addPropertyValue(productionCompanyMutation, MULTI_VALUE_KEY, TheMovieDbOntology.CONCEPT_TYPE_PRODUCTION_COMPANY, visibility); LumifyProperties.SOURCE.addPropertyValue(productionCompanyMutation, MULTI_VALUE_KEY, SOURCE, visibility); LumifyProperties.SOURCE_URL.addPropertyValue(productionCompanyMutation, MULTI_VALUE_KEY, sourceUrl, visibility); String name = productionCompany.optString("name"); if (name != null && name.length() > 0) { LumifyProperties.TITLE.addPropertyValue(productionCompanyMutation, MULTI_VALUE_KEY, name, visibility); } Vertex productionCompanyVertex = productionCompanyMutation.save(authorizations); addEdge(TheMovieDbOntology.getProductionCompanyProducedEdgeId(productionCompanyId, movieId), productionCompanyVertex, movieVertex, TheMovieDbOntology.EDGE_LABEL_PRODUCED, visibility, authorizations); } } } private void processMovieCredits(int movieId, JSONObject movieJson, Vertex movieVertex) { JSONObject credits = movieJson.getJSONObject("credits"); JSONArray cast = credits.getJSONArray("cast"); for (int i = 0; i < cast.length(); i++) { JSONObject castJson = cast.getJSONObject(i); int personId = castJson.getInt("id"); String sourceUrl = "http://www.themoviedb.org/person/" + personId; VertexBuilder personMutation = prepareVertex(TheMovieDbOntology.getPersonVertexId(personId), visibility); LumifyProperties.CONCEPT_TYPE.addPropertyValue(personMutation, MULTI_VALUE_KEY, TheMovieDbOntology.CONCEPT_TYPE_PERSON, visibility); LumifyProperties.SOURCE.addPropertyValue(personMutation, MULTI_VALUE_KEY, SOURCE, visibility); LumifyProperties.SOURCE_URL.addPropertyValue(personMutation, MULTI_VALUE_KEY, sourceUrl, visibility); String name = castJson.optString("name"); if (name != null && name.length() > 0) { LumifyProperties.TITLE.addPropertyValue(personMutation, MULTI_VALUE_KEY, name, visibility); } Vertex personVertex = personMutation.save(authorizations); addEdge(TheMovieDbOntology.getStarredInEdgeId(personId, movieId), personVertex, movieVertex, TheMovieDbOntology.EDGE_LABEL_STARRED_IN, visibility, authorizations); String character = castJson.optString("character"); if (character != null && character.length() > 0) { String roleId = TheMovieDbOntology.getRoleId(personId, movieId); VertexBuilder roleMutation = prepareVertex(TheMovieDbOntology.getRoleVertexId(roleId), visibility); LumifyProperties.CONCEPT_TYPE.addPropertyValue(roleMutation, MULTI_VALUE_KEY, TheMovieDbOntology.CONCEPT_TYPE_ROLE, visibility); LumifyProperties.SOURCE.addPropertyValue(roleMutation, MULTI_VALUE_KEY, SOURCE, visibility); LumifyProperties.TITLE.addPropertyValue(roleMutation, MULTI_VALUE_KEY, character, visibility); Vertex roleVertex = roleMutation.save(authorizations); addEdge(TheMovieDbOntology.getPlayedEdgeId(personId, roleId), personVertex, roleVertex, TheMovieDbOntology.EDGE_LABEL_PLAYED, visibility, authorizations); addEdge(TheMovieDbOntology.getHasRoleEdgeId(movieId, roleId), movieVertex, roleVertex, TheMovieDbOntology.EDGE_LABEL_HAS_ROLE, visibility, authorizations); } } } private Date parseDate(String str) throws ParseException { try { return DATE_FORMAT.parse(str); } catch (ParseException p) { return DATE_YEAR_FORMAT.parse(str); } } }