/* * Copyright 2015 Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.yarn.server.resourcemanager.quota; import io.hops.exception.StorageException; import io.hops.metadata.yarn.dal.quota.ContainersCheckPointsDataAccess; import io.hops.metadata.yarn.dal.quota.ContainersLogsDataAccess; import io.hops.metadata.yarn.dal.quota.ProjectQuotaDataAccess; import io.hops.metadata.yarn.dal.quota.ProjectsDailyCostDataAccess; import io.hops.metadata.yarn.dal.rmstatestore.ApplicationStateDataAccess; import io.hops.metadata.yarn.dal.util.YARNOperationType; import io.hops.metadata.yarn.entity.quota.ContainerCheckPoint; import io.hops.metadata.yarn.entity.quota.ContainerLog; import io.hops.metadata.yarn.entity.quota.ProjectDailyCost; import io.hops.metadata.yarn.entity.quota.ProjectDailyId; import io.hops.metadata.yarn.entity.quota.ProjectQuota; import io.hops.metadata.yarn.entity.rmstatestore.ApplicationState; import io.hops.transaction.handler.LightWeightRequestHandler; import io.hops.util.HopsWorksHelper; import io.hops.util.RMStorageFactory; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.util.ConverterUtils; public class QuotaService extends AbstractService { private static final Log LOG = LogFactory.getLog(QuotaService.class); private Thread quotaSchedulingThread; private volatile boolean stopped = false; private long minNumberOfTicks = 1; private long batchTime; private int batchSize; private int minVcores; private int minMemory; private float basePrice; ApplicationStateDataAccess appStatDS = (ApplicationStateDataAccess) RMStorageFactory. getDataAccess(ApplicationStateDataAccess.class); Map<String, String> applicationOwnerCache = new HashMap<>(); Map<String, ContainerCheckPoint> containersCheckPoints; Set<String> recovered = new HashSet<>(); BlockingQueue<ContainerLog> eventContainersLogs = new LinkedBlockingQueue<>(); public QuotaService() { super("quota scheduler service"); } @Override protected void serviceStart() throws Exception { assert !stopped : "starting when already stopped"; LOG.info("Starting a new quota schedular service"); recover(); quotaSchedulingThread = new Thread(new WorkingThread()); quotaSchedulingThread.setName("Quota scheduling service"); quotaSchedulingThread.start(); super.serviceStart(); } @Override protected void serviceStop() throws Exception { stopped = true; if (quotaSchedulingThread != null) { quotaSchedulingThread.interrupt(); } super.serviceStop(); LOG.info("Stopped the quota schedular service."); } @Override public void serviceInit(Configuration conf) throws Exception { minNumberOfTicks = conf.getInt(YarnConfiguration.QUOTA_MIN_TICKS_CHARGE, YarnConfiguration.DEFAULT_QUOTA_MIN_TICKS_CHARGE); batchTime = conf.getLong(YarnConfiguration.QUOTA_BATCH_TIME, YarnConfiguration.DEFAULT_QUOTA_BATCH_TIME); batchSize = conf.getInt(YarnConfiguration.QUOTA_BATCH_SIZE, YarnConfiguration.DEFAULT_QUOTA_BATCH_SIZE); minVcores= conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); minMemory= conf.getInt(YarnConfiguration.QUOTA_MINIMUM_CHARGED_MB, YarnConfiguration.DEFAULT_QUOTA_MINIMUM_CHARGED_MB); basePrice= conf.getFloat(YarnConfiguration.QUOTA_BASE_PRICE, YarnConfiguration.DEFAULT_QUOTA_BASE_PRICE); } public void insertEvents(Collection<ContainerLog> containersLogs) { for (ContainerLog cl : containersLogs) { eventContainersLogs.add(cl); } } private class WorkingThread implements Runnable { @Override public void run() { LOG.info("Quota Scheduler started"); while (!stopped && !Thread.currentThread().isInterrupted()) { try{ final List<ContainerLog> containersLogs = new ArrayList<>(); Long start = System.currentTimeMillis(); long duration = 0; //batch logs to reduce the number of roundtrips to the database //can probably be removed once we have the ndb asynchronous library do { ContainerLog log = eventContainersLogs.poll(Math.max(1, batchTime - duration), TimeUnit.MILLISECONDS); if (log != null) { containersLogs.add(log); } duration = System.currentTimeMillis() - start; } while (duration < batchTime && containersLogs.size() < batchSize); computeAndApplyCharge(containersLogs, false); }catch(InterruptedException | IOException ex){ LOG.error(ex,ex); } } LOG.info("Quota scheduler thread is exiting gracefully"); } } protected void computeAndApplyCharge( final Collection<ContainerLog> ContainersLogs, final boolean isRecover) throws IOException { LightWeightRequestHandler quotaSchedulerHandler = new LightWeightRequestHandler(YARNOperationType.TEST) { @Override public Object performTask() throws IOException { connector.beginTransaction(); connector.writeLock(); computeAndApplyChargeInt(ContainersLogs, isRecover); connector.commit(); return null; } }; quotaSchedulerHandler.handle(); } private void computeAndApplyChargeInt( final Collection<ContainerLog> ContainersLogs, final boolean isRecover) throws StorageException { //Get Data ** ProjectQuota ** ProjectQuotaDataAccess pqDA = (ProjectQuotaDataAccess) RMStorageFactory.getDataAccess( ProjectQuotaDataAccess.class); Map<String, ProjectQuota> projectsQuotaMap = pqDA.getAll(); final long curentDay = TimeUnit.DAYS.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS); Map<String, ProjectQuota> chargedProjects = new HashMap<>(); Map<ProjectDailyId, ProjectDailyCost> chargedProjectsDailyCost = new HashMap<>(); List<ContainerLog> toBeRemovedContainersLogs = new ArrayList<>(); List<ContainerCheckPoint> toBePercistedContainerCheckPoint = new ArrayList<>(); List<ContainerCheckPoint> toBeRemovedContainerCheckPoint = new ArrayList<>(); // Calculate the quota for (ContainerLog containerLog : ContainersLogs) { if (!isRecover && recovered.remove(containerLog.getContainerid())) { //we have already charged this project when recovering we should //not charge it two times continue; } if (isRecover) { recovered.add(containerLog.getContainerid()); } // Get ApplicationId from ContainerId ContainerId containerId = ConverterUtils.toContainerId(containerLog.getContainerid()); ApplicationId appId = containerId.getApplicationAttemptId(). getApplicationId(); //Get ProjectId from ApplicationId in ** ApplicationState Table ** String appOwner = applicationOwnerCache.get(appId.toString()); if (appOwner == null) { ApplicationState appState = (ApplicationState) appStatDS. findByApplicationId(appId.toString()); if (appState == null) { LOG.error("Application not found: " + appId.toString() + " for container " + containerLog.getContainerid()); continue; } else { if (applicationOwnerCache.size() > 100000) { //if the cahs is too big empty it and it will be refilled with //the active applications //TODO make a proper chash applicationOwnerCache = new HashMap<>(); } appOwner = appState.getUser(); applicationOwnerCache.put(appId.toString(), appOwner); } } String projectName = HopsWorksHelper.getProjectName(appOwner); String user = HopsWorksHelper.getUserName(appOwner); //comput used ticks Long checkpoint = containerLog.getStart(); float currentMultiplicator = containerLog.getMultiplicator(); ContainerCheckPoint lastCheckPoint = containersCheckPoints.get( containerLog.getContainerid()); if (lastCheckPoint != null) { checkpoint = lastCheckPoint.getCheckPoint(); currentMultiplicator = lastCheckPoint.getMultiplicator(); } long nbRunningTicks = containerLog.getStop() - checkpoint; // Decide what to do with the ticks if (nbRunningTicks > 0) { if (containerLog.getExitstatus() == ContainerExitStatus.CONTAINER_RUNNING_STATE) { //The container as been running for more than one checkpoint duration ContainerCheckPoint newCheckpoint = new ContainerCheckPoint( containerLog.getContainerid(), containerLog.getStop(), currentMultiplicator); containersCheckPoints. put(containerLog.getContainerid(), newCheckpoint); toBePercistedContainerCheckPoint.add(newCheckpoint); LOG.debug("charging project still running " + projectName + " for container " + containerLog.getContainerid() + " current ticks " + nbRunningTicks + "(" + containerLog.getStart() + ", " + containerLog.getStop() + ", " + checkpoint + ") current multiplicator " + currentMultiplicator); float charge = computeCharge(nbRunningTicks, currentMultiplicator, containerLog.getNbVcores(), containerLog.getMemoryUsed()); chargeProjectQuota(chargedProjects, projectsQuotaMap, projectName, user, containerLog.getContainerid(), charge); //** ProjectDailyCost charging** chargeProjectDailyCost(chargedProjectsDailyCost, projectName, user, curentDay, charge); } else { //The container has finished running toBeRemovedContainersLogs.add((ContainerLog) containerLog); if (checkpoint != containerLog.getStart()) { toBeRemovedContainerCheckPoint.add(new ContainerCheckPoint( containerLog.getContainerid())); containersCheckPoints.remove(containerLog.getContainerid()); } //** ProjectQuota charging** LOG.debug("charging project finished " + projectName + " for container " + containerLog.getContainerid() + " current ticks " + nbRunningTicks + " current multiplicator " + currentMultiplicator); float charge = computeCharge(nbRunningTicks, currentMultiplicator, containerLog.getNbVcores(), containerLog.getMemoryUsed()); chargeProjectQuota(chargedProjects, projectsQuotaMap, projectName, user, containerLog.getContainerid(), charge); //** ProjectDailyCost charging** chargeProjectDailyCost(chargedProjectsDailyCost, projectName, user, curentDay, charge); } } else if (checkpoint == containerLog.getStart() && containerLog. getExitstatus() == ContainerExitStatus.CONTAINER_RUNNING_STATE) { //create a checkPoint at start to store multiplicator. ContainerCheckPoint newCheckpoint = new ContainerCheckPoint( containerLog.getContainerid(), containerLog.getStart(), currentMultiplicator); containersCheckPoints.put(containerLog.getContainerid(), newCheckpoint); toBePercistedContainerCheckPoint.add(newCheckpoint); } } // Delet the finished ContainersLogs ContainersLogsDataAccess csDA = (ContainersLogsDataAccess) RMStorageFactory. getDataAccess(ContainersLogsDataAccess.class); csDA.removeAll(toBeRemovedContainersLogs); //Add and remove Containers checkpoints ContainersCheckPointsDataAccess ccpDA = (ContainersCheckPointsDataAccess) RMStorageFactory.getDataAccess( ContainersCheckPointsDataAccess.class); ccpDA.addAll(toBePercistedContainerCheckPoint); ccpDA.removeAll(toBeRemovedContainerCheckPoint); if (LOG.isDebugEnabled()) { // Show all charged project for (ProjectQuota _cpq : chargedProjects.values()) { LOG.debug("RIZ:: Charged projects: " + _cpq.toString() + " charge amount:" + _cpq.getTotalUsedQuota()); } } // Add all the changed project quota to NDB pqDA.addAll(chargedProjects.values()); ProjectsDailyCostDataAccess pdcDA = (ProjectsDailyCostDataAccess) RMStorageFactory.getDataAccess( ProjectsDailyCostDataAccess.class); pdcDA.addAll(chargedProjectsDailyCost.values()); } Map<ProjectDailyId, ProjectDailyCost> projectsDailyCostCache; long cashDay = -1; private void chargeProjectQuota( Map<String, ProjectQuota> chargedProjectsQuota, Map<String, ProjectQuota> projectsQuotaMap, String projectid, String user, String containerId, float charge) { LOG.info("Quota: project " + projectid + " user " + user + " has been charged " + charge + " for container: " + containerId); ProjectQuota projectQuota = (ProjectQuota) projectsQuotaMap.get(projectid); if (projectQuota != null) { projectQuota.decrementQuota(charge); chargedProjectsQuota.put(projectid, projectQuota); } else { LOG.error("Project not found: " + projectid); } } private void chargeProjectDailyCost( Map<ProjectDailyId, ProjectDailyCost> chargedProjectsDailyCost, String projectid, String user, long day, float charge) { LOG.debug("Quota: project " + projectid + " user " + user + " has used " + charge + " credits, on day: " + day); if (cashDay != day) { projectsDailyCostCache = new HashMap<>(); cashDay = day; } ProjectDailyId key = new ProjectDailyId(projectid, user, day); ProjectDailyCost projectDailyCost = projectsDailyCostCache.get(key); if (projectDailyCost == null) { projectDailyCost = new ProjectDailyCost(projectid, user, day, 0); projectsDailyCostCache.put(key, projectDailyCost); } projectDailyCost.incrementCharge(charge); chargedProjectsDailyCost.put(key, projectDailyCost); } private float computeCharge(long ticks, float multiplicator, int nbVcores, int memoryUsed) { if (ticks < minNumberOfTicks) { ticks = minNumberOfTicks; } //the pricePerTick is set for a minimum sized container, the price to pay is //proportional to the container size on the most used resource float vcoresUsage = (float) nbVcores / minVcores; float memoryUsage = (float) memoryUsed / minMemory; float credit = (float) ticks * Math.max(vcoresUsage, memoryUsage) * multiplicator * basePrice; return credit; } public void recover() throws IOException { final long day = TimeUnit.DAYS.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS); LightWeightRequestHandler recoveryHandler = new LightWeightRequestHandler( YARNOperationType.TEST) { @Override public Object performTask() throws IOException { connector.beginTransaction(); connector.writeLock(); ProjectsDailyCostDataAccess pdcDA = (ProjectsDailyCostDataAccess) RMStorageFactory. getDataAccess(ProjectsDailyCostDataAccess.class); projectsDailyCostCache = pdcDA.getByDay(day); ContainersCheckPointsDataAccess ccpDA = (ContainersCheckPointsDataAccess) RMStorageFactory. getDataAccess(ContainersCheckPointsDataAccess.class); containersCheckPoints = ccpDA.getAll(); //Get Data ** ContainersLogs ** ContainersLogsDataAccess csDA = (ContainersLogsDataAccess) RMStorageFactory.getDataAccess( ContainersLogsDataAccess.class); Map<String, ContainerLog> hopContainersLogs = csDA.getAll(); connector.commit(); return hopContainersLogs; } }; final Map<String, ContainerLog> hopContainersLogs = (Map<String, ContainerLog>) recoveryHandler.handle(); //run logic on all computeAndApplyCharge(hopContainersLogs.values(), true); } }