package com.nvarghese.beowulf.scs.categorizers.impl; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import org.apache.http.client.utils.URIBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.code.morphia.Datastore; import com.nvarghese.beowulf.common.http.txn.AbstractHttpTransaction; import com.nvarghese.beowulf.common.http.txn.HttpMethodType; import com.nvarghese.beowulf.common.http.txn.HttpTransactionFactory; import com.nvarghese.beowulf.common.http.txn.HttpTxnDAO; import com.nvarghese.beowulf.common.http.txn.HttpTxnDocument; import com.nvarghese.beowulf.common.http.txn.TransactionSource; import com.nvarghese.beowulf.common.scan.model.WebScanDocument; import com.nvarghese.beowulf.common.utils.HttpUtils; import com.nvarghese.beowulf.common.webtest.WebTestType; import com.nvarghese.beowulf.scs.categorizers.TokenSingleSetTransactionCategorizer; import com.nvarghese.beowulf.scs.categorizers.dao.DirectoryCategorizerDAO; import com.nvarghese.beowulf.scs.categorizers.model.DirectoryCategorizerDocument; public class DirectoryCategorizer extends TokenSingleSetTransactionCategorizer { private DirectoryCategorizerDAO directoryCategorizerDAO; static Logger logger = LoggerFactory.getLogger(DirectoryCategorizer.class); public DirectoryCategorizer(Datastore ds, WebScanDocument webScanDocument) { super(ds, webScanDocument, WebTestType.DIRECTORY_TEST); if (ds != null) { directoryCategorizerDAO = new DirectoryCategorizerDAO(ds); if (directoryCategorizerDAO.getDirectoryCategorizerDocument() == null) directoryCategorizerDAO.createDirectoryCategorizerDocument(new DirectoryCategorizerDocument()); } } public String[] getTokens(AbstractHttpTransaction transaction) { String tokens[] = {}; HttpTxnDAO txnDAO = new HttpTxnDAO(ds); String path = transaction.getResourcePath(); if (HttpUtils.fileExists(transaction.getResponseStatusCode()) || path.equals("/")) { ArrayList<String> dirs = new ArrayList<String>(); int lastSlashIndex = -1; int slashIndex; while ((slashIndex = path.indexOf("/", lastSlashIndex + 1)) >= 0) { if (!(slashIndex - lastSlashIndex == 1 && lastSlashIndex > 0)) { String directory = path.substring(0, slashIndex + 1); try { URIBuilder uriBuilder = new URIBuilder(transaction.getURI()); uriBuilder.setPath(directory); URI testUri = uriBuilder.build(); if (!directoryCategorizerDAO.isDirectoryNamePresent(directory)) { AbstractHttpTransaction dirTest = HttpTransactionFactory.createTransaction(HttpMethodType.GET, testUri, null, null, TransactionSource.CATEGORIZER); dirTest.execute(); // persist transaction HttpTxnDocument txnDocument = transaction.toHttpTxnDocument(); txnDAO.createHttpTxnDocument(txnDocument); if (HttpUtils.fileExists(dirTest.getResponseStatusCode())) { directoryCategorizerDAO.addDirectoryName(directory); dirs.add(directory); } else { logger.warn("Invalid directory passed to categorizer: {}", testUri); } } } catch (URISyntaxException e) { logger.error("Problem with URI syntax. Reason: {}", e.getMessage(), e); } } else { logger.warn("Odd URL detected for the transaction with URI: `{}`", transaction.getURI()); } lastSlashIndex = slashIndex; } tokens = dirs.toArray(tokens); } return tokens; } }