package uk.bl.monitrix.database.cassandra; import play.Configuration; import play.Logger; import play.Play; /** * Central access point to Mongo config options, DB table and field names, etc. * @author Rainer Simon <rainer.simon@ait.ac.at> */ public class CassandraProperties { private static Configuration config = Play.application().configuration(); /** Database settings, as configured in application.conf **/ public static final String HOST = config.getString("cassandra.host"); public static int DB_PORT; public static final String KEYSPACE = config.getString("cassandra.keyspace"); /** Database collection names **/ public static final String COLLECTION_ALERT_LOG = "alert_log"; public static final String COLLECTION_CRAWL_LOG = "crawl_log"; public static final String COLLECTION_COMPRESSABILITY_HISTOGRAM = "compressability_histogram"; public static final String COLLECTION_INGEST_SCHEDULE = "ingest_schedule"; public static final String COLLECTION_KNOWN_HOSTS = "known_hosts"; public static final String COLLECTION_KNOWN_TLDS = "known_tlds"; public static final String COLLECTION_CRAWL_STATS = "crawl_stats"; public static final String COLLECTION_VIRUS_LOG = "virus_log"; /** Database field keys (Alert log) **/ public static final String FIELD_ALERT_LOG_TIMESTAMP = "timestamp"; public static final String FIELD_ALERT_LOG_TIMESTAMP_HR = "timestamp_hour"; public static final String FIELD_ALERT_LOG_OFFENDING_HOST = "offending_host"; public static final String FIELD_ALERT_LOG_ALERT_TYPE = "alert_type"; public static final String FIELD_ALERT_LOG_DESCRIPTION = "alert_description"; /** Database field keys (Crawl log) **/ public static final String FIELD_CRAWL_LOG_LOG_ID = "log_id"; public static final String FIELD_CRAWL_LOG_TIMESTAMP = "timestamp"; public static final String FIELD_CRAWL_LOG_LONG_TIMESTAMP = "long_timestamp"; public static final String FIELD_CRAWL_LOG_COARSE_TIMESTAMP = "coarse_timestamp"; public static final String FIELD_CRAWL_LOG_STATUS_CODE = "status_code"; public static final String FIELD_CRAWL_LOG_DOWNLOADED_BYTES = "downloaded_bytes"; public static final String FIELD_CRAWL_LOG_URL = "uri"; public static final String FIELD_CRAWL_LOG_HOST = "host"; public static final String FIELD_CRAWL_LOG_DOMAIN = "domain"; public static final String FIELD_CRAWL_LOG_SUBDOMAIN = "subdomain"; public static final String FIELD_CRAWL_LOG_DISCOVERY_PATH = "discovery_path"; public static final String FIELD_CRAWL_LOG_REFERER = "referer"; public static final String FIELD_CRAWL_LOG_CONTENT_TYPE = "content_type"; public static final String FIELD_CRAWL_LOG_WORKER_THREAD = "worker_thread"; public static final String FIELD_CRAWL_LOG_FETCH_TS = "fetch_ts"; public static final String FIELD_CRAWL_LOG_HASH = "hash"; public static final String FIELD_CRAWL_LOG_ANNOTATIONS = "annotations"; public static final String FIELD_CRAWL_LOG_ANNOTATIONS_TOKENIZED = "annotations_tokenized"; public static final String FIELD_CRAWL_LOG_IP = "ip_address"; public static final String FIELD_CRAWL_LOG_LINE = "line"; public static final String FIELD_CRAWL_LOG_COMPRESSABILITY = "compressability"; public static final String FIELD_COMPRESSABILITY_BUCKET = "bucket"; public static final String FIELD_COMPRESSABILITY_COUNT = "url_count"; /** Database field keys (Ingest schedule) **/ public static final String FIELD_INGEST_CRAWLER_PATH = "log_path"; public static final String FIELD_INGEST_CRAWL_ID = "crawl_id"; public static final String FIELD_INGEST_START_TS = "start_ts"; public static final String FIELD_INGEST_END_TS = "end_ts"; public static final String FIELD_INGEST_INGESTED_LINES = "ingested_lines"; public static final String FIELD_INGEST_REVISIT_RECORDS = "revisit_records"; public static final String FIELD_INGEST_IS_MONITORED = "is_monitored"; /** Database field keys (Known Hosts collection) **/ public static final String FIELD_KNOWN_HOSTS_HOSTNAME = "host"; public static final String FIELD_KNOWN_HOSTS_TLD = "tld"; public static final String FIELD_KNOWN_HOSTS_DOMAIN = "domain"; public static final String FIELD_KNOWN_HOSTS_SUBDOMAIN = "subdomain"; public static final String FIELD_KNOWN_HOSTS_FIRST_ACCESS = "first_access"; public static final String FIELD_KNOWN_HOSTS_LAST_ACCESS = "last_access"; public static final String FIELD_KNOWN_HOSTS_CRAWLERS = "crawlers"; public static final String FIELD_KNOWN_HOSTS_CRAWLED_URLS = "crawled_urls"; public static final String FIELD_KNOWN_HOSTS_SUCCESSFULLY_FETCHED_URLS = "successfully_fetched_urls"; public static final String FIELD_KNOWN_HOSTS_AVG_FETCH_DURATION = "avg_fetch_duration"; public static final String FIELD_KNOWN_HOSTS_AVG_RETRY_RATE = "avg_retry_rate"; public static final String FIELD_KNOWN_HOSTS_FETCH_STATUS_CODES = "fetch_status_codes"; public static final String FIELD_KNOWN_HOSTS_CONTENT_TYPES = "content_types"; public static final String FIELD_KNOWN_HOSTS_VIRUS_STATS = "virus_stats"; public static final String FIELD_KNOWN_HOSTS_REDIRECT_PERCENTAGE = "redirect_percentage"; public static final String FIELD_KNOWN_HOSTS_ROBOTS_BLOCK_PERCENTAGE = "robots_block_percentage"; public static final String FIELD_KNOWN_HOSTS_TEXT_TO_NONTEXT_RATIO = "text_to_nontext_ratio"; /** Database field keys (Known TLDs) **/ public static final String FIELD_KNOWN_TLDS_TLD = "tld"; public static final String FIELD_KNOWN_TLDS_COUNT = "count"; /** Database field keys (Crawl Stats collection) **/ public static final String FIELD_CRAWL_STATS_TIMESTAMP = "stat_ts"; public static final String FIELD_CRAWL_STATS_CRAWL_ID = "crawl_id"; public static final String FIELD_CRAWL_STATS_DOWNLOAD_VOLUME = "downloaded_bytes"; public static final String FIELD_CRAWL_STATS_NUMBER_OF_URLS_CRAWLED = "uris_crawled"; public static final String FIELD_CRAWL_STATS_NEW_HOSTS_CRAWLED = "new_hosts"; public static final String FIELD_CRAWL_STATS_COMPLETED_HOSTS = "completed_hosts"; /** Database field keys (Virus Log collection **/ public static final String FIELD_VIRUS_LOG_NAME = "virus_name"; public static final String FIELD_VIRUS_LOG_OCCURENCES = "occurences"; /** Bulk insert chunk size **/ public static final int BULK_INSERT_CHUNK_SIZE = 20000; /** Resolution of the data pre-aggregation raster (in milliseconds) **/ public static final int PRE_AGGREGATION_RESOLUTION_MILLIS = 60 * 1000; static { try { DB_PORT = Integer.parseInt(config.getString("cassandra.port")); } catch (Throwable t) { DB_PORT = 9160; Logger.warn("Error reading cassandra.port from application.conf - defaulting to "+DB_PORT); } } }