// Copyright 2017 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.rde;
import static google.registry.request.Action.Method.GET;
import static google.registry.request.Action.Method.POST;
import static google.registry.util.PipelineUtils.createJobPath;
import static google.registry.xml.ValidationMode.LENIENT;
import static google.registry.xml.ValidationMode.STRICT;
import static javax.servlet.http.HttpServletResponse.SC_NO_CONTENT;
import com.google.common.base.Ascii;
import com.google.common.base.Optional;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSetMultimap;
import com.google.common.collect.Multimaps;
import google.registry.config.RegistryConfig.Config;
import google.registry.mapreduce.MapreduceRunner;
import google.registry.mapreduce.inputs.EppResourceInputs;
import google.registry.mapreduce.inputs.NullInput;
import google.registry.model.EppResource;
import google.registry.model.common.Cursor;
import google.registry.model.common.Cursor.CursorType;
import google.registry.model.contact.ContactResource;
import google.registry.model.host.HostResource;
import google.registry.model.index.EppResourceIndex;
import google.registry.model.rde.RdeMode;
import google.registry.model.registrar.Registrar;
import google.registry.request.Action;
import google.registry.request.HttpException.BadRequestException;
import google.registry.request.Parameter;
import google.registry.request.RequestParameters;
import google.registry.request.Response;
import google.registry.request.auth.Auth;
import google.registry.request.auth.AuthLevel;
import google.registry.util.Clock;
import google.registry.util.FormattingLogger;
import javax.inject.Inject;
import org.joda.time.DateTime;
import org.joda.time.Duration;
/**
* MapReduce that idempotently stages escrow deposit XML files on GCS for RDE/BRDA for all TLDs.
*
* <h3>MapReduce Operation</h3>
*
* <p>This task starts by asking {@link PendingDepositChecker} which deposits need to be generated.
* If there's nothing to deposit, we return 204 No Content; otherwise, we fire off a MapReduce job
* and redirect to its status GUI. The task can also be run in manual operation, as described below.
*
* <p>The mapreduce job scans every {@link EppResource} in Datastore. It maps a point-in-time
* representation of each entity to the escrow XML files in which it should appear.
*
* <p>There is one map worker for each {@code EppResourceIndexBucket} entity group shard. There is
* one reduce worker for each deposit being generated.
*
* <p>{@link ContactResource} and {@link HostResource} are emitted on all TLDs, even when the
* domains on a TLD don't reference them. BRDA {@link RdeMode#THIN thin} deposits exclude contacts
* and hosts entirely.
*
* <p>{@link Registrar} entities, both active and inactive, are included in all deposits. They are
* not rewinded point-in-time.
*
* <p>The XML deposit files generated by this job are humongous. A tiny XML report file is generated
* for each deposit, telling us how much of what it contains.
*
* <p>Once a deposit is successfully generated, an {@link RdeUploadAction} is enqueued which will
* upload it via SFTP to the third-party escrow provider.
*
* <p>To generate escrow deposits manually and locally, use the {@code nomulus} tool command
* {@code GenerateEscrowDepositCommand}.
*
* <h3>Logging</h3>
*
* <p>To identify the reduce worker request for a deposit in App Engine's log viewer, you can use
* search text like {@code tld=soy}, {@code watermark=2015-01-01}, and {@code mode=FULL}.
*
* <h3>Error Handling</h3>
*
* <p>Valid model objects might not be valid to the RDE XML schema. A single invalid object will
* cause the whole deposit to fail. You need to check the logs, find out which entities are broken,
* and perform Datastore surgery.
*
* <p>If a deposit fails, an error is emitted to the logs for each broken entity. It tells you the
* key and shows you its representation in lenient XML.
*
* <p>Failed deposits will be retried indefinitely. This is because RDE and BRDA each have a
* {@link Cursor} for each TLD. Even if the cursor lags for days, it'll catch up gradually on its
* own, once the data becomes valid.
*
* <p>The third-party escrow provider will validate each deposit we send them. They do both schema
* validation and reference checking.
*
* <p>This job does not perform reference checking. Administrators can do this locally with the
* {@code ValidateEscrowDepositCommand} command in the {@code nomulus} tool.
*
* <h3>Cursors</h3>
*
* <p>Deposits are generated serially for a given (tld, mode) pair. A deposit is never started
* beyond the cursor. Once a deposit is completed, its cursor is rolled forward transactionally.
*
* <p>The mode determines which cursor is used. {@link CursorType#RDE_STAGING} is used for thick
* deposits and {@link CursorType#BRDA} is used for thin deposits.
*
* <p>Use the {@code ListCursorsCommand} and {@code UpdateCursorsCommand} commands to administrate
* with these cursors.
*
* <h3>Security</h3>
*
* <p>The deposit and report are encrypted using {@link Ghostryde}. Administrators can use the
* {@code GhostrydeCommand} command in the {@code nomulus} tool to view them.
*
* <p>Unencrypted XML fragments are stored temporarily between the map and reduce steps. The
* ghostryde encryption on the full archived deposits makes life a little more difficult for an
* attacker. But security ultimately depends on the bucket.
*
* <h3>Idempotency</h3>
*
* <p>We lock the reduce tasks. This is necessary because: a) App Engine tasks might get double
* executed; and b) Cloud Storage file handles get committed on close <i>even if our code throws an
* exception.</i>
*
* <p>Deposits are generated serially for a given (watermark, mode) pair. A deposit is never started
* beyond the cursor. Once a deposit is completed, its cursor is rolled forward transactionally.
* Duplicate jobs may exist {@code <=cursor}. So a transaction will not bother changing the cursor
* if it's already been rolled forward.
*
* <p>Enqueuing {@code RdeUploadAction} is also part of the cursor transaction. This is necessary
* because the first thing the upload task does is check the staging cursor to verify it's been
* completed, so we can't enqueue before we roll. We also can't enqueue after the roll, because then
* if enqueuing fails, the upload might never be enqueued.
*
* <h3>Determinism</h3>
*
* <p>The filename of an escrow deposit is determistic for a given (TLD, watermark,
* {@linkplain RdeMode mode}) triplet. Its generated contents is deterministic in all the ways that
* we care about. Its view of the database is strongly consistent.
*
* <p>This is because:
* <ol>
* <li>{@code EppResource} queries are strongly consistent thanks to {@link EppResourceIndex}
* <li>{@code EppResource} entities are rewinded to the point-in-time of the watermark
* </ol>
*
* <p>Here's what's not deterministic:
* <ul>
* <li>Ordering of XML fragments. We don't care about this.
* <li>Information about registrars. There's no point-in-time for these objects. So in order to
* guarantee referential correctness of your deposits, you must never delete a registrar entity.
* </ul>
*
* <h3>Manual Operation</h3>
*
* <p>The task can be run in manual operation by setting certain parameters. Rather than generating
* deposits which are currently outstanding, the task will generate specific deposits. The files
* will be stored in a subdirectory of the "manual" directory, to avoid overwriting regular deposit
* files. Cursors and revision numbers will not be updated, and the upload task will not be kicked
* off. The parameters are:
* <ul>
* <li>manual: if present and true, manual operation is indicated
* <li>directory: the subdirectory of "manual" into which the files should be placed
* <li>mode: the mode(s) to generate: FULL for RDE deposits, THIN for BRDA deposits
* <li>tld: the tld(s) for which deposits should be generated
* <li>watermark: the date(s) for which deposits should be generated; dates should be start-of-day
* <li>revision: optional; if not specified, the next available revision number will be used
* </ul>
*
* <p>The manual, directory, mode, tld and watermark parameters must be present for manual
* operation; they must all be absent for standard operation (except that manual can be present but
* set to false). The revision parameter is optional in manual operation, and must be absent for
* standard operation.
*
* @see <a href="https://tools.ietf.org/html/draft-arias-noguchi-registry-data-escrow-06">Registry Data Escrow Specification</a>
* @see <a href="https://tools.ietf.org/html/draft-arias-noguchi-dnrd-objects-mapping-05">Domain Name Registration Data Objects Mapping</a>
*/
@Action(
path = RdeStagingAction.PATH,
method = {GET, POST},
auth =
@Auth(
methods = {Auth.AuthMethod.INTERNAL, Auth.AuthMethod.API},
minimumLevel = AuthLevel.APP,
userPolicy = Auth.UserPolicy.ADMIN
))
public final class RdeStagingAction implements Runnable {
public static final String PATH = "/_dr/task/rdeStaging";
private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass();
@Inject Clock clock;
@Inject PendingDepositChecker pendingDepositChecker;
@Inject RdeStagingReducer reducer;
@Inject Response response;
@Inject MapreduceRunner mrRunner;
@Inject @Config("transactionCooldown") Duration transactionCooldown;
@Inject @Parameter(RdeModule.PARAM_MANUAL) boolean manual;
@Inject @Parameter(RdeModule.PARAM_DIRECTORY) Optional<String> directory;
@Inject @Parameter(RdeModule.PARAM_MODE) ImmutableSet<String> modeStrings;
@Inject @Parameter(RequestParameters.PARAM_TLD) ImmutableSet<String> tlds;
@Inject @Parameter(RdeModule.PARAM_WATERMARK) ImmutableSet<DateTime> watermarks;
@Inject @Parameter(RdeModule.PARAM_REVISION) Optional<Integer> revision;
@Inject @Parameter(RdeModule.PARAM_LENIENT) boolean lenient;
@Inject RdeStagingAction() {}
@Override
public void run() {
ImmutableSetMultimap<String, PendingDeposit> pendings =
manual ? getManualPendingDeposits() : getStandardPendingDeposits();
if (pendings.isEmpty()) {
String message = "Nothing needs to be deposited";
logger.info(message);
response.setStatus(SC_NO_CONTENT);
response.setPayload(message);
return;
}
for (PendingDeposit pending : pendings.values()) {
logger.infofmt("%s", pending);
}
RdeStagingMapper mapper = new RdeStagingMapper(lenient ? LENIENT : STRICT, pendings);
response.sendJavaScriptRedirect(createJobPath(mrRunner
.setJobName("Stage escrow deposits for all TLDs")
.setModuleName("backend")
.setDefaultReduceShards(pendings.size())
.runMapreduce(
mapper,
reducer,
ImmutableList.of(
// Add an extra shard that maps over a null resource. See the mapper code for why.
new NullInput<EppResource>(),
EppResourceInputs.createEntityInput(EppResource.class)))));
}
private ImmutableSetMultimap<String, PendingDeposit> getStandardPendingDeposits() {
if (directory.isPresent()) {
throw new BadRequestException("Directory parameter not allowed in standard operation");
}
if (!modeStrings.isEmpty()) {
throw new BadRequestException("Mode parameter not allowed in standard operation");
}
if (!tlds.isEmpty()) {
throw new BadRequestException("Tld parameter not allowed in standard operation");
}
if (!watermarks.isEmpty()) {
throw new BadRequestException("Watermark parameter not allowed in standard operation");
}
if (revision.isPresent()) {
throw new BadRequestException("Revision parameter not allowed in standard operation");
}
return ImmutableSetMultimap.copyOf(
Multimaps.filterValues(
pendingDepositChecker.getTldsAndWatermarksPendingDepositForRdeAndBrda(),
new Predicate<PendingDeposit>() {
@Override
public boolean apply(PendingDeposit pending) {
if (clock.nowUtc().isBefore(pending.watermark().plus(transactionCooldown))) {
logger.infofmt("Ignoring within %s cooldown: %s", transactionCooldown, pending);
return false;
} else {
return true;
}
}}));
}
private ImmutableSetMultimap<String, PendingDeposit> getManualPendingDeposits() {
if (!directory.isPresent()) {
throw new BadRequestException("Directory parameter required in manual operation");
}
if (directory.get().startsWith("/")) {
throw new BadRequestException("Directory must not start with a slash");
}
String directoryWithTrailingSlash =
directory.get().endsWith("/") ? directory.get() : (directory.get() + '/');
if (modeStrings.isEmpty()) {
throw new BadRequestException("Mode parameter required in manual operation");
}
ImmutableSet.Builder<RdeMode> modesBuilder = new ImmutableSet.Builder<>();
for (String modeString : modeStrings) {
try {
modesBuilder.add(RdeMode.valueOf(Ascii.toUpperCase(modeString)));
} catch (IllegalArgumentException e) {
throw new BadRequestException("Mode must be FULL for RDE deposits, THIN for BRDA deposits");
}
}
ImmutableSet<RdeMode> modes = modesBuilder.build();
if (tlds.isEmpty()) {
throw new BadRequestException("Tld parameter required in manual operation");
}
if (watermarks.isEmpty()) {
throw new BadRequestException("Watermark parameter required in manual operation");
}
// In theory, BRDA deposits should be on a specific day of the week, but in manual mode, let the
// user create deposits on other days. But dates should definitely be at the start of the day;
// otherwise, confusion is likely.
for (DateTime watermark : watermarks) {
if (!watermark.equals(watermark.withTimeAtStartOfDay())) {
throw new BadRequestException("Watermarks must be at the start of a day.");
}
}
if (revision.isPresent() && (revision.get() < 0)) {
throw new BadRequestException("Revision must be greater than or equal to zero");
}
ImmutableSetMultimap.Builder<String, PendingDeposit> pendingsBuilder =
new ImmutableSetMultimap.Builder<>();
for (String tld : tlds) {
for (DateTime watermark : watermarks) {
for (RdeMode mode : modes) {
pendingsBuilder.put(
tld,
PendingDeposit.createInManualOperation(
tld,
watermark,
mode,
directoryWithTrailingSlash,
revision));
}
}
}
return pendingsBuilder.build();
}
}