diff --git a/.gitignore b/.gitignore index 377f10e2b..5c8e1935c 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,6 @@ backend/headlessBundle.js backend/headlessBundle.js.map npm-shrinkwrap.unsafe.json + +#AWS Credentials +aws.json diff --git a/app/components/Settings/Tabs/Tab.react.js b/app/components/Settings/Tabs/Tab.react.js index a62d4f05d..b907f4179 100644 --- a/app/components/Settings/Tabs/Tab.react.js +++ b/app/components/Settings/Tabs/Tab.react.js @@ -36,6 +36,8 @@ export default class ConnectionTab extends Component { label = connectionObject.label || connectionObject.id || connectionObject.database; } else if (connectionObject.dialect === DIALECTS.ELASTICSEARCH) { label = `Elasticsearch (${connectionObject.host})`; + } else if (connectionObject.dialect === DIALECTS.ATHENA) { + label = `Athena (${connectionObject.database})`; } else if (connectionObject.dialect === DIALECTS.SQLITE) { label = connectionObject.storage; } else if (connectionObject.dialect === DIALECTS.DATA_WORLD) { diff --git a/app/constants/constants.js b/app/constants/constants.js index 187433f3c..c24d8c3e7 100644 --- a/app/constants/constants.js +++ b/app/constants/constants.js @@ -16,6 +16,7 @@ export const DIALECTS = { APACHE_IMPALA: 'apache impala', APACHE_DRILL: 'apache drill', DATA_WORLD: 'data.world', + ATHENA: 'athena', CSV: 'csv' }; @@ -30,6 +31,7 @@ export const SQL_DIALECTS_USING_EDITOR = [ 'apache spark', 'apache impala', 'data.world', + 'athena', 'csv' ]; @@ -189,6 +191,7 @@ export const CONNECTION_CONFIG = { 'type': 'password' } ], // TODO - password options for apache drill + [DIALECTS.DATA_WORLD]: [ { 'label': 'Dataset/Project URL', @@ -202,6 +205,36 @@ export const CONNECTION_CONFIG = { 'type': 'password', 'description': 'Your data.world read/write token. It can be obtained from https://data.world/settings/advanced' } + ], + [DIALECTS.ATHENA]: [ + { + 'label': 'S3 Access Key', 'value': 'accessKey', 'type': 'password' + }, + { + 'label': 'S3 Secret Access Key', 'value': 'secretAccessKey', 'type': 'password' + }, + { + 'label': 'AWS Region', 'value': 'region', 'type': 'text', + 'description': 'The AWS region (i.e. us-east-1) where the database resides' + }, + { + 'label': 'S3 Bucket', 'value': 'outputS3Bucket', 'type': 'text', + 'description': 'The Athena connector will store query results in this location.' + }, + { + 'label': 'Database', 'value': 'database', 'type': 'text' + }, + { + 'label': 'Query Interval', 'value': 'queryInterval', 'type': 'number', + 'description': 'The Interval (In Milliseconds) that Falcon will check to see \ + if the Athena Query is done. Default 2 seconds' + }, + { + 'label': 'SSL Enabled', 'value': 'sslEnabled', 'type': 'checkbox', + 'description': 'Does your database require that you connect to it via SSL? \ + Note that this is just the connection between this app and your database; \ + connections to plot.ly or your plotly instance are always encrypted.' + } ] }; @@ -220,7 +253,8 @@ export const LOGOS = { [DIALECTS.SQLITE]: 'images/sqlite-logo.png', [DIALECTS.S3]: 'images/s3-logo.png', [DIALECTS.APACHE_DRILL]: 'images/apache_drill-logo.png', - [DIALECTS.DATA_WORLD]: 'images/dataworld-logo.png' + [DIALECTS.DATA_WORLD]: 'images/dataworld-logo.png', + [DIALECTS.ATHENA]: 'images/athena-logo.png' }; export function PREVIEW_QUERY(connection, table, elasticsearchIndex) { @@ -237,6 +271,7 @@ export function PREVIEW_QUERY(connection, table, elasticsearchIndex) { case DIALECTS.POSTGRES: case DIALECTS.DATA_WORLD: case DIALECTS.REDSHIFT: + case DIALECTS.ATHENA: return `SELECT * FROM ${table} LIMIT 1000`; case DIALECTS.MSSQL: return `SELECT TOP 1000 * FROM ${connection.database}.dbo.${table}`; @@ -411,6 +446,14 @@ export const SAMPLE_DBS = { dialect: 'sqlite', storage: `${__dirname}/plotly_datasets.db` }, + [DIALECTS.ATHENA]: { + s3Outputlocation: 'plotly-s3-connector-test', + accessKey: 'AKIAIMHMSHTGARJYSKMQ', + secretAccessKey: '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', + region: 'us-east-1', + database: 'falcon', + queryTimeout: 5000 + }, [DIALECTS.DATA_WORLD]: { url: 'https://data.world/rflprr/reported-lyme-disease-cases-by-state' } diff --git a/app/images/athena-logo.LICENSE b/app/images/athena-logo.LICENSE new file mode 100644 index 000000000..aeec80d4b --- /dev/null +++ b/app/images/athena-logo.LICENSE @@ -0,0 +1,97 @@ +Revised September 22, 2017 + +1. Introduction. These Amazon Web Services (“AWS”) Trademark Guidelines (the “Trademark Guidelines”) form an integral part of the AWS Customer Agreement (the “Agreement”) between Amazon Web Services, Inc. or its affiliates (“AWS,” “we,” “us” or “our”) and you or the entity you represent (“you”). These Trademark Guidelines provide you a limited permission to use the AWS Marks (as defined in Section 2 below), in connection with (as applicable): (a) your use of the Services (as defined in the Agreement), or in connection with software products designed to be used with the Services, or (b) as otherwise agreed with AWS in writing, on the terms set forth herein and in the Agreement, until such time as we may terminate such permission, which we may do at any time, in our sole discretion, as set forth in Section 4 below. The AWS Marks are some of our most valuable assets and these Trademark Guidelines are intended to preserve the value attached to the AWS Marks. + +2. Definition. For the purposes of these Trademark Guidelines, "AWS Marks" means the following trademarks, service marks, service or trade names, logos, product names, or designations of AWS and its affiliates: (i) the "Powered by AWS" logo in the two forms shown below (the "Logo"); and (ii) "Amazon Web Services", "AWS", "Amazon[(Chinese characters)] AWS", “Alexa Site Thumbnail”, “Alexa Top Sites”, “Alexa Web Information Service”, “Alexa Web Search”, “Amazon Athena”, “Amazon Aurora”, “Amazon Chime”, “Amazon CloudFront”, “Amazon CloudSearch”, “Amazon CloudWatch”, “Amazon Cognito”, “Amazon Connect”, “Amazon DevPay”, “Amazon DynamoDB”, “Amazon DynamoDB “Accelerator”, “Amazon EC2”, “Amazon Elastic Beanstalk”, “Amazon Elastic Compute Cloud”, “Amazon ElastiCache”, “Amazon Flexible Payments Service”, “Amazon FPS”, “Amazon Fulfillment Web Service”, “Amazon FWS”, “Amazon GameLift”, “Amazon Glacier”, “Amazon Inspector”, “Amazon Kinesis”, “Amazon Lex”, “Amazon Lightsail”, “Amazon Lumberyard”, “Amazon Machine Learning”, “Amazon Macie”, “Amazon Mechanical Turk”, “Amazon Pinpoint”, “Amazon Polly”, “Amazon Quicksight”, “Amazon RDS”, “Amazon Redshift”, “Amazon Rekognition”, “Amazon Relational Database”, “Amazon Route 53”, “Amazon S3”, “Amazon Simple Email Service”, “Amazon Simple Notification Service”, “Amazon Simple Queue Service”, “Amazon Simple Storage Service”, “Amazon SimpleDB”, “Amazon SQS”, “Amazon Virtual Private Cloud”, “Amazon VPC”, “Amazon WorkDocs”, “Amazon WorkMail”, “AWS CloudFormation”, “AWS CloudHSM”, “AWS CloudTrail”, “AWS CodeBuild”, “AWS CodeCommit”, “AWS CodeDeploy”, “AWS CodePipeline”, “AWS CodeStar”, “AWS Direct Connect”, “AWS Glue”, “AWS Greengrass”, “AWS IoT Button”, “AWS Lambda”, “AWS Marketplace”, “AWS Migration Hub”, “AWS Premium Support”, “AWS Shield”, “AWS Snowball”, “AWS Snowball Edge”, “AWS Snowmobile”, “AWS Step Functions”, “AWS X-Ray”, “CloudFront”, “DevPay”, “DynamoDB”, “EC2”, “Elasticache”, “Mechanical Turk”, “SimpleDB”, “SQS”, and any other AWS Marks and Services made available from time to time. + +PB_AWS_logo_RGB +PB_AWS_logo_RGB_WHT +3. Limited Permission. Provided that you are (a) an AWS developer in good standing with a current and valid account for use of the Services or (b) otherwise authorized by AWS in writing, and provided, further, that you comply at all times with the terms of both the Agreement and these Trademark Guidelines, we grant you a limited, non-exclusive, revocable, non-transferable permission, under our intellectual property rights in and to the AWS Marks, and only to the limited extent of our intellectual property rights in and to the AWS Marks, to use the AWS Marks for the following limited purpose, and only for such limited purpose: you may utilize the Logo or the appropriate form(s) of the “for” or equivalent naming convention or URL naming convention, as set forth in Section 9 below, to: (i) identify Your Content (as defined in the Agreement) as using the Services; or (ii) to identify software tools or applications that you create and distribute that are intended for use in connection with the Services. Without limitation of any provision in the Agreement, you acknowledge that any use that you elect to make of the AWS Marks, even if permitted hereunder, is at your sole risk and that we shall have no liability or responsibility in connection therewith. Your limited permission to use the AWS Marks is a limited permission and you may not use the AWS Marks for any other purpose. You may not transfer, assign or sublicense your limited permission to use the AWS Marks to any other person or entity. Your use of the AWS Marks shall comply with: (i) the most up-to-date versions of the Agreement and these Trademark Guidelines; and (ii) any other terms, conditions or policies that we may issue from time to time to govern use of the AWS Marks. Your limited permission to use the AWS Marks hereunder shall automatically terminate and you must immediately stop using the AWS Marks if at any time: (i) the Agreement is terminated; (ii) Your Content no longer uses any of the Services, or your software product cannot be used with any of the Services, as applicable; or (iii) you cease to be a registered AWS developer. + +4. Modification and Termination. You understand and agree that, without prior notice to you and at our sole discretion: (i) we may modify these Trademark Guidelines at any time; (ii) we may modify or terminate your limited permission to use the AWS Marks, at any time in our sole discretion, for any reason or for no reason at all; and (iii) we reserve the right to take any and all actions including, without limitation, legal proceedings, against any use of the AWS Marks that does not comply with the terms of the Agreement or these Trademark Guidelines. + +5. No Affiliation or Endorsement. You will not display the AWS Marks in any manner that implies that you are related to, affiliated with, sponsored or endorsed by us, or in a manner that could reasonably be interpreted to suggest that Your Content, web site, product or service, has been authored or edited by us, or represents our views or opinions. + +6. No Disparagement. You may only use the AWS Marks in a manner designed to maintain the highest standard, quality and reputation that is associated with the AWS Marks and you will not use the AWS Marks to disparage us or our products or services. + +7. No Dominant Display; AWS Mark Differentiation. You may not display any AWS Mark as the largest or most prominent trademark in any materials (including, without limitation, any web site or product literature) associated with Your Content, software tool or other software application. When using any AWS Mark (other than the Logo, with respect to which the formatting requirements are set forth in Section 8 below, or in a URL), you must distinguish the AWS Mark from the name of Your Content and/or other surrounding text by capitalizing the first letter of the AWS Mark, capitalizing or italicizing the entire AWS Mark, placing the AWS Mark in quotes, or using a different style or color of font for the AWS Mark. + +8. Formatting Requirements with Respect to the “Powered by AWS” Logo. +a. No Modification. We will make the Logo image available to you from the co-marketing page in the AWS Site located at http://aws.amazon.com/co-marketing. You may not remove, distort or modify any element of the Logo; provided however, you may transform the file format itself, for ease of use. + +b. Color. The Logo may be represented in the following formats: FULL COLOR (i) light backgrounds—squid ink type with Amazon Orange smile; (ii) dark backgrounds— white type with Amazon Orange smile; or for single-color applications; SINGLE COLOR (iii) light backgrounds—Squid Ink type with Squid Ink smile (preferred); GRAYSCALE (iv) light backgrounds—Black type with Black smile; (v) dark backgrounds— white type with white smile. No alternate color representation or combination will be acceptable. + +When in FULL COLOR: +The graphic element (smile) must be in Amazon Orange +HEX: #FF9900 +RGB: 255—153—0 +CMYK: 0—45—95—0 +PMS: COATED 1375 C + UNCOATED 137 U + +The type must be in Squid Ink +HEX: #232F3E +RGB: 35—47—62 +CMYK: 53—36—0—86 +PMS: COATED 432 C + +When in SINGLE COLOR: + +Light Backgrounds: The graphic element (smile) and type must be in Squid Ink +HEX: #232F3E +RGB: 35—47—62 +CMYK: 53—36—0—86 +PMS: COATED 432 C + +When in GRAYSCALE: + +Light Backgrounds: The graphic element (smile) and type must be in Black +HEX: #000000 +RGB: 0—0—0 +CMYK: 0—0—0—100 +PMS: Black + +Dark Backgrounds: The graphic element (smile) and type must be in White +HEX: #FFFFFF +RGB: 255—255—255 +CMYK: 0—0—0—0 +PMS: White + +c. Spacing. The Logo must appear by itself, with reasonable spacing (at least the height of the “Powered by AWS” logo) between each side of the “Powered by AWS” logo and other graphic or textual elements. + +d. Size. The Logo, as shown in Section 2 of these Trademark Guidelines, indicates the minimum size at which you may display it to ensure that the type and trademark notations are legible. The minimum size for the “Powered by AWS” logo shall be: Inches: 2.625” x 1” Pixels: 190 × 70, or Millimeters: 67mm × 24mm. + +9. Permissible Uses of the AWS Marks. Except for the Logo (with respect to which the formatting requirements are set forth above), you may only use the AWS Marks: (i) in a relational phrase using “for” or one of the limited number of equivalent naming conventions, as set forth below; or (ii) to the right of the top level domain name in a URL in the format set forth below. + +• Relational Phrases. +Example of Permissible Use: +“Application” for EC2 +Equivalents: +You may replace “for” in the example above with any of the following, so long as the term you use is accurate when used with the AWS Marks you use: “for use with”; “with”; “compatible with”; “works with”; “powered by”; “built on”; “built with”; “developed on”; “developed with.” +You may replace “EC2” in the examples above with any of the AWS Marks, so long as your usage of the AWS Marks is accurate. + +• URLs. +Example of Permissible Use: +www.applicationdomain.com/aws +Equivalents: +You may replace “aws” in the example above with any of the AWS Marks, so long as your usage of the AWS Marks is accurate. + +10. Hyperlinking. You shall link each use of the AWS Marks directly to the following URL, wherever technically feasible: http://aws.amazon.com. You may, alternatively, link to an AWS detail page for a Service used by Your Content, and if you do so, you must link to the primary URL for the Service (e.g., http://aws.amazon.com/[name of Service]. For example, the primary URL for Amazon Elastic Compute Cloud (Amazon EC2) is http://aws.amazon.com/ec2. You may open the URL in a new browser window. You may not link the AWS Marks to any web site other than the primary URL for the applicable Service. You may not frame or mirror any of our web site pages. + +11. No Combination. You may not hyphenate, combine or abbreviate the AWS Marks. You shall not incorporate the AWS Marks into the name of your organization, or your services, products, trademark or logos. The foregoing prohibition includes the use of the AWS Marks in the name of any application, service or product or in a URL to the left of the top-level domain name (e.g., ”.com”, ”.net”, ”.uk”, etc.). For example, URLs such as “alexaaa.mydomain.com”, “mymechanicalturk.net” or “EC2plus2.com” are expressly prohibited. + +12. Adwords. AWS and its affiliates are unable to provide a blanket authorization for use of our trademarks via the Google Adwords program. As long as your situation fits within Google’s “Reseller and information site policy”, you should be able to use the AWS trademarks in a fair use way within your adcopy without our formal authorization. + +13. Attribution. You must include the following statement in any materials that include the AWS Marks: “Amazon Web Services, the “Powered by AWS” logo, [and name any other AWS Marks used in such materials] are trademarks of Amazon.com, Inc. or its affiliates in the United States and/or other countries." + +14. No Misleading Use. You may not display the AWS Marks in any manner that is misleading, unfair, defamatory, infringing, libelous, disparaging, obscene or otherwise objectionable as determined by us in our sole discretion. + +15. Trade Dress. You may not imitate the trade dress or “look and feel” of any of our web sites or pages contained in any of our web sites, including without limitation, the branding, color combinations, fonts, graphic designs, product icons or other elements associated with us. + +16. Compliance with Law; Appropriate Activities. You may not use the AWS Marks in any manner that violates any United States or foreign, federal, state, provincial, municipal, local or other, law or regulation. Without limiting the foregoing, or any provision in the Agreement, you may not display any AWS Mark on your site if your site contains or displays adult content or promotes illegal activities, gambling, or the sale of tobacco or alcohol to persons under twenty-one (21) years of age. + +17. Reservation of Rights. Except for the limited permission specified in Section 3 above, nothing in the Agreement or these Trademark Guidelines shall grant or be deemed to grant you any right, license, title or interest in or to any AWS Mark or any of our or our affiliates’ other trademarks, service marks, trade names, logos, product names, service names, legends, other designations, or abbreviations of any of the foregoing. You acknowledge and agree that we and our affiliates retain any and all intellectual property and other proprietary rights in and to the AWS Marks. All use by you of the AWS Marks including any goodwill associated therewith, shall inure to the benefit of Amazon. + +18. No Challenges. You agree that you will not, at any time, challenge or encourage, assist or otherwise induce third parties to challenge the AWS Marks (except to the extent such restriction is prohibited by law) or our registration thereof, nor shall you attempt to register any trademarks, service marks, trade names, logos, product names, service names, legends, domain names, other designations, or abbreviations of any of the foregoing, or other distinctive brand features that are confusingly similar in any way (including, but not limited to, sound, appearance and spelling) to the AWS Marks. + +19. Contact Information. If you have questions regarding your obligations under these Trademark Guidelines or questions about any AWS Mark, please contact or write to us at: Amazon.com, Inc., Attention: Trademarks, PO Box 81226, Seattle, WA 98108-1226. \ No newline at end of file diff --git a/app/images/athena-logo.png b/app/images/athena-logo.png new file mode 100644 index 000000000..3ec06e277 Binary files /dev/null and b/app/images/athena-logo.png differ diff --git a/backend/persistent/datastores/Datastores.js b/backend/persistent/datastores/Datastores.js index 8817079e8..d7bef60c1 100644 --- a/backend/persistent/datastores/Datastores.js +++ b/backend/persistent/datastores/Datastores.js @@ -7,6 +7,7 @@ import * as ApacheLivy from './livy'; import * as ApacheImpala from './impala'; import * as DataWorld from './dataworld'; import * as DatastoreMock from './datastoremock'; +import * as Athena from './athena'; const CSV = require('./csv'); @@ -36,6 +37,7 @@ function getDatastoreClient(connection) { } const {dialect} = connection; + if (dialect === 'elasticsearch') { return Elasticsearch; } else if (dialect === 's3') { @@ -52,6 +54,8 @@ function getDatastoreClient(connection) { return IbmDb2; } else if (dialect === 'data.world') { return DataWorld; + } else if (dialect === 'athena') { + return Athena; } return Sql; } diff --git a/backend/persistent/datastores/athena.js b/backend/persistent/datastores/athena.js new file mode 100644 index 000000000..1e98fa830 --- /dev/null +++ b/backend/persistent/datastores/athena.js @@ -0,0 +1,88 @@ +import {executeQuery} from './drivers/athena'; + +const SHOW_TABLES_QUERY = 'SHOW TABLES'; +const SHOW_SCHEMA_QUERY = + 'SELECT table_name, column_name, data_type FROM information_schema.columns WHERE table_schema'; +const ATHENA_DEFAULT_QUERY = 'SELECT table_name FROM information_schema.columns LIMIT 1'; +const DEFAULT_QUERY_INTERVAL = 2000; + +/* + * The connection function will validate the parameters and return the connection + * parameters + * @param {object} connection + * @param {string} connection.accessKey - AWS Access Key + * @param {string} connection.secretAccessKey - AWS Secret Key + * @param {string} connection.region - AWS Region + * @param {string} connection.database - Database name to connect to + * @param {string} connection.outputS3Bucket - Location where Athena will output resutls of query + * @param {number} connection.timeout - The timeout interval when the query should stop + * @param {boolean} + * @returns {Promise} that resolves connection + */ +export function connect(connection) { + if (!connection.timeout && connection.timeout < 0) { + connection.timeout = DEFAULT_QUERY_INTERVAL; + } + + return query(ATHENA_DEFAULT_QUERY, connection) + .then(() => connection); +} + +/** + * The following method will execute a query against the specified connection + * @param {object} queryObject - The SQL to query against the connection + * @param {object} connection - Connection parameters + * @returns {Promise} that resolves to { columnnames, rows } + */ +export function query(queryObject, connection) { + connection.sqlStatement = queryObject; + + return executeQuery(connection).then(dataSet => { + let columnnames = []; + let rows = []; + + if (dataSet && dataSet.length > 0) { + // First row contains the column names + columnnames = dataSet[0].Data.map(columnname => columnname.VarCharValue); + + // Loop through the remaining rows to extract data + rows = dataSet.slice(1).map(row => row.Data.map(element => element.VarCharValue)); + } + + return {columnnames, rows}; + }); +} + +/** + * Should return a list of tables and their columns that are defined within the database. + * @param {object} connection - Connection parameters + * @param {string} connection.accessKey - AWS Access Key + * @param {string} connection.secretAccessKey - AWS Secret Key + * @param {string} connection.region - AWS Region + * @param {string} connection.database - Database name to connect to + * @param {string} connection.outputS3Bucket - Location will Athena will output resutls of query + * @returns {Promise} that resolves to { columnnames, rows } + */ +export function schemas(connection) { + const sqlStatement = `${SHOW_SCHEMA_QUERY} = '${connection.database}'` ; + return query(sqlStatement, connection); +} + + +/** + * Should return a list of tables that are in the database + * @param {object} connection - Connection Parameters + * @param {string} connection.accessKey - AWS Access Key + * @param {string} connection.secretAccessKey - AWS Secret Key + * @param {string} connection.region - AWS Region + * @param {string} connection.database - Database name to connect to + * @param {string} connection.outputS3Bucket - Location will Athena will output resutls of query + * @returns {Promise} that resolves to { columnnames, rows } + */ +export function tables(connection) { + connection.sqlStatement = SHOW_TABLES_QUERY; + return executeQuery(connection).then(dataSet => { + const tableNames = dataSet.slice(1).map(row => row.Data[0].VarCharValue); + return tableNames; + }); +} diff --git a/backend/persistent/datastores/drivers/athena.js b/backend/persistent/datastores/drivers/athena.js new file mode 100644 index 000000000..5cdb1b0f5 --- /dev/null +++ b/backend/persistent/datastores/drivers/athena.js @@ -0,0 +1,233 @@ +'use strict'; + +const AWS = require('aws-sdk'); +import Logger from '../../../logger'; +const NUMBER_OF_RETRIES = 50; + +/** + * The following function will create an AWS Athena Client + * @param {object} connection - AWS Athena Connection Parameters + * @param {string} connection.accessKey - AWS Access Key + * @param {string} connection.secretAccessKey - AWS Secret Key + * @param {string} connection.region - AWS Region + * @returns {object} AWS Athena Client + */ +export function createAthenaClient(connection) { + const connectionParams = { + apiVersion: '2017-05-18', + accessKeyId: connection.accessKey, + secretAccessKey: connection.secretAccessKey, + region: connection.region, + maxRetries: NUMBER_OF_RETRIES + }; + + if (connection.sslEnabled) { + connectionParams.sslEnabled = connection.sslEnabled; + } + const athenaClient = new AWS.Athena(connectionParams); + + return athenaClient; +} + +/** + * The following method will execute the sql statement to query the + * athena database + * @param {object} athenaClient - Object created using create athena client + * @param {object} params - Connection Parameters + * @param {string} params.database - Database name to connect to + * @param {string} params.sqlStatement - SQL statement to execute + * @param {string} params.outputS3Bucket - Location will Athena will output resutls of query + * @return {string} requestId + */ +function startQuery(athenaClient, params) { + const client = athenaClient; + + const queryParams = { + QueryString: params.sqlStatement, + ResultConfiguration: { + OutputLocation: params.outputS3Bucket, + EncryptionConfiguration: { + EncryptionOption: 'SSE_S3' + } + }, + QueryExecutionContext: { + Database: params.database + } + }; + return new Promise(function(resolve, reject) { + return client.startQueryExecution(queryParams, (err, data) => { + if (err) { + Logger.log(`Unexpected Error starting Athena Query ${err}`); + return reject(err); + } + const queryId = data.QueryExecutionId; + return resolve(queryId); + }); + }); +} + +/** + * The following method will check to see if the query results + * have completed. It will return -1 if the query errored, 0 + * if it is still executing or 1 if it has completed + * @param {object} athenaClient - Object created using create athena client + * @param {string} queryExecutionId - AWS Query Execution Id + * @returns {int} -1 : Error, 0 : Still running, 1 : Completed + */ +function queryResultsCompleted(athenaClient, queryExecutionId) { + const client = athenaClient; + + const queryParams = { + QueryExecutionId: queryExecutionId + }; + + return new Promise(function(resolve, reject) { + return client.getQueryExecution(queryParams, (err, data) => { + if (err) { + Logger.log(`Unexpected Error getting Athena Query Execution Status ${err}`); + return reject(-1); + } + const state = data.QueryExecution.Status.State; + let queryState = 0, queryStatus = ''; + switch (state) { + case 'QUEUED': + queryState = 0; + queryStatus = 'Query is queued'; + break; + case 'RUNNING': + queryState = 0; + queryStatus = 'Query still executing'; + break; + case 'SUCCEEDED': + queryState = 1; + queryStatus = 'Query completed successfully'; + break; + case 'FAILED': + queryState = -1; + queryStatus = data.QueryExecution.Status.StateChangeReason; + break; + case 'CANCELLED': + queryState = -1; + queryStatus = 'Query was cancelled'; + break; + default: + queryState = -1; + queryStatus = 'Unknown error executing the query'; + break; + } + + const rst = { + queryState, + queryStatus + }; + return resolve(rst); + }); + }); +} + +/** + * The following method will stop the query execution based on the query id + * @param {object} athenaClient - Object created using create athena client + * @param {string} queryExecutionId - AWS Athena Query Id + * @returns {Promise} That resolves to AWS Stop Request + */ +export function stopQuery(athenaClient, queryExecutionId) { + const client = athenaClient; + + const queryParams = { + QueryExecutionId: queryExecutionId + }; + + return new Promise(function(resolve, reject) { + return client.stopQueryExecution(queryParams, (err, data) => { + if (err) { + Logger.log(`Unexpected Error stoping Athena Query Execution ${err}`); + return reject(err); + } + return resolve(data); + }); + }); +} + +/** + * The following method will get the query results based on the query id + * @param {object} athenaClient - Object created using create athena client + * @param {string} queryExecutionId - AWS Athena Query Id + * @returns {Promise} Resolves to AWS Query Response + */ +function getQueryResults(athenaClient, queryExecutionId) { + const client = athenaClient; + + const queryParams = { + QueryExecutionId: queryExecutionId + }; + + return new Promise(function(resolve, reject) { + client.getQueryResults(queryParams, (err, data) => { + if (err) { + return reject(err); + } + return resolve(data); + }); + }); +} + +/** + * The following function will execute a query against athena. It will first start + * by starting the query request. It will then start a timer to periodically + * check to see if the query results have completed. If the Query Timeout has + * Exceeded it will reject the query. If the it receives data before the query + * times out it will return the response. + * @param {object} queryParams - Query Parameters + * @param {string} queryParams.accessKey - AWS Access Key + * @param {string} queryParams.secretAccessKey - AWS Secret Key + * @param {string} queryParams.region - AWS Region + * @param {string} queryParams.region - AWS Region + * @param {string} queryParams.dbName - Database name to connect to + * @param {string} queryParams.sqlStatement - SQL statement to execute + * @param {string} queryParams.s3Outputlocation - Location will Athena will output resutls of query + * @param {number} queryParams.queryInterval - The timeout interval when the query should stop + * @param {AWS Athena} queryParams.athenaClient - The AWS Athena Client + * @returns {Promise} resolve to AWS Query Response + */ +export function executeQuery(queryParams) { + const client = createAthenaClient(queryParams); + + return new Promise(function(resolve, reject) { + return startQuery(client, queryParams).then(queryExecutionId => { + + // Define the wait interval + let retryInterval = queryParams.queryInterval; + + // If retry interval is not defined or less 0 set retry to 1000 + if ((!retryInterval) || (retryInterval < 1)) { + retryInterval = 1000; + } + + const checkQueryStatus = () => { + queryResultsCompleted(client, queryExecutionId).then(queryResult => { + if (queryResult.queryState < 0) { + return reject(new Error(queryResult.queryStatus)); + } else if (queryResult.queryState === 1) { + return getQueryResults(client, queryExecutionId).then(rst => { + + if (rst && rst.ResultSet && rst.ResultSet.Rows) { + return resolve(rst.ResultSet.Rows); + } + return resolve([]); + }); + } + // Loop again + return setTimeout(checkQueryStatus, retryInterval); + }).catch(err => { + return reject(err); + }); + }; + + checkQueryStatus(); + + }).catch(err => { + return reject(err); + }); + }); +} diff --git a/package.json b/package.json index 9ee0329f1..38e8e7ff2 100644 --- a/package.json +++ b/package.json @@ -37,6 +37,7 @@ "test-unit-ibmdb": "cross-env NODE_ENV=test BABEL_DISABLE_CACHE=1 electron-mocha --full-trace --timeout 90000 --compilers js:babel-register test/backend/datastores.ibmdb.spec.js", "test-unit-impala": "cross-env NODE_ENV=test BABEL_DISABLE_CACHE=1 electron-mocha --full-trace --timeout 90000 --compilers js:babel-register test/backend/datastores.impala.spec.js", "test-unit-livy": "cross-env NODE_ENV=test BABEL_DISABLE_CACHE=1 electron-mocha --full-trace --timeout 90000 --compilers js:babel-register test/backend/datastores.livy.spec.js", + "test-unit-athena": "cross-env NODE_ENV=test BABEL_DISABLE_CACHE=1 electron-mocha --full-trace --timeout 90000 --compilers js:babel-register test/backend/datastores.athena.spec.js", "test-unit-oauth2": "cross-env NODE_ENV=test BABEL_DISABLE_CACHE=1 electron-mocha --full-trace --timeout 90000 --compilers js:babel-register test/backend/routes.oauth2.spec.js", "test-unit-plotly": "cross-env NODE_ENV=test BABEL_DISABLE_CACHE=1 electron-mocha --full-trace --timeout 90000 --compilers js:babel-register test/backend/PlotlyAPI.spec.js", "test-unit-scheduler": "cross-env NODE_ENV=test BABEL_DISABLE_CACHE=1 electron-mocha --full-trace --timeout 90000 --compilers js:babel-register test/backend/QueryScheduler.spec.js", diff --git a/test/athena/SETUP_ATHENA.md b/test/athena/SETUP_ATHENA.md new file mode 100644 index 000000000..a8ec852b2 --- /dev/null +++ b/test/athena/SETUP_ATHENA.md @@ -0,0 +1,47 @@ +# Additional Resources +The following is a link to AWS Athena documentation on setting up an athena table and database + +[AWS Athean](https://docs.aws.amazon.com/athena/latest/ug/getting-started.html) + +In addition, there is a live tutorial built into the AWS Athena Console that provides +step by step instructions with pictures on how to create a database and table (including +sample data) + +# Setup +The following is the high level instructions for setting up an Athena Database and Table. + +## Requirements: +1. A valid AWS Account and access to the AWS Console +2. Experience working within the AWS Console +3. Experience creating S3 Buckets and pushing content to it +4. Experience with AWS IAMs for creating users and roles + + +## Instructions: +1. If you don't already have an S3 bucket create an S3 bucket and copy the athena-sample.csv to it +2. In the AWS Console Select Athena Service +3. If this is your first time create a database with Athena click on Getting Started +4. Click on Create Table +5. It will give you the option for Automatically (with Glue) or Manually. Choose Manually +6. For Database choose Create a new database +7. Enter in Database Name, Tablet Name, and the S3 Location where you uploaded the CSV file and then Next +8. Choose CSV for the option and Click on Next +9. Add the following 4 columns with the following information and then click on Next: +i. location - string +ii. alcohol - double, +10. Skip adding Partitions and click on Create Table +11. It will generate Create External Skip (see sample below). Click on Run Query (this will create table) + +```sql +CREATE EXTERNAL TABLE IF NOT EXISTS alcohol_consumption_by_country_2010 ( + `location` string, + `alcohol` double, +) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'serialization.format' = ',', + 'field.delim' = ',' +) LOCATION 's3://plot.ly-athena/test-csv/' +TBLPROPERTIES ('has_encrypted_data'='false'); +``` +12. Run sample SQL to see the results ( select * from alcohol_consumption_by_country_2010 ) and it should return sample data \ No newline at end of file diff --git a/test/athena/athena-sample.csv b/test/athena/athena-sample.csv new file mode 100644 index 000000000..d637c43a2 --- /dev/null +++ b/test/athena/athena-sample.csv @@ -0,0 +1,192 @@ +location, alcohol +Belarus, 17.5 +Moldova, 16.8 +Lithuania, 15.4 +Russia, 15.1 +Romania, 14.4 +Ukraine, 13.9 +Andorra, 13.8 +Hungary, 13.3 +Czech Republic, 13 +Slovakia, 13 +Portugal, 12.9 +Serbia, 12.6 +Grenada, 12.5 +Poland, 12.5 +Latvia, 12.3 +Finland, 12.3 +South Korea, 12.3 +France, 12.2 +Australia, 12.2 +Croatia, 12.2 +Ireland, 11.9 +Luxembourg, 11.9 +Germany, 11.8 +Slovenia, 11.6 +United Kingdom, 11.6 +Denmark, 11.4 +Bulgaria, 11.4 +Spain, 11.2 +Belgium, 11 +South Africa, 11 +New Zealand, 10.9 +Gabon, 10.9 +Namibia, 10.8 +Switzerland, 10.7 +Saint Lucia, 10.4 +Austria, 10.3 +Estonia, 10.3 +Greece, 10.3 +Kazakhstan, 10.3 +Canada, 10.2 +Nigeria, 10.1 +Netherlands, 9.9 +Uganda, 9.8 +Rwanda, 9.8 +Chile, 9.6 +Argentina, 9.3 +Burundi, 9.3 +United States, 9.2 +Cyprus, 9.2 +Sweden, 9.2 +Venezuela, 8.9 +Paraguay, 8.8 +Brazil, 8.7 +Sierra Leone, 8.7 +Montenegro, 8.7 +Belize, 8.5 +Cameroon, 8.4 +Botswana, 8.4 +Saint Kitts and Nevis, 8.2 +Guyana, 8.1 +Peru, 8.1 +Panama, 8 +Niue, 8 +Palau, 7.9 +Norway, 7.7 +Tanzania, 7.7 +Georgia, 7.7 +Uruguay, 7.6 +Angola, 7.5 +Laos, 7.3 +Japan, 7.2 +Mexico, 7.2 +Ecuador, 7.2 +Dominica, 7.1 +Iceland, 7.1 +Thailand, 7.1 +Bosnia and Herzegovina, 7.1 +Sao Tome and Principe, 7.1 +Malta, 7 +Albania, 7 +Bahamas, 6.9 +Dominican Republic, 6.9 +Mongolia, 6.9 +Cape Verde, 6.9 +Barbados, 6.8 +Burkina Faso, 6.8 +Italy, 6.7 +Trinidad and Tobago, 6.7 +China, 6.7 +Macedonia, 6.7 +Saint Vincent and the Grenadines, 6.6 +Equatorial Guinea, 6.6 +Suriname, 6.6 +Vietnam, 6.6 +Lesotho, 6.5 +Haiti, 6.4 +Cook Islands, 6.4 +Colombia, 6.2 +Ivory Coast, 6 +Bolivia, 5.9 +Swaziland, 5.7 +Zimbabwe, 5.7 +Seychelles, 5.6 +Cambodia, 5.5 +Puerto Rico, 5.4 +Netherlands Antilles, 5.4 +Philippines, 5.4 +Costa Rica, 5.4 +Armenia, 5.3 +Cuba, 5.2 +Nicaragua, 5 +Jamaica, 4.9 +Ghana, 4.8 +Liberia, 4.7 +Uzbekistan, 4.6 +Chad, 4.4 +United Arab Emirates, 4.3 +Kyrgyzstan, 4.3 +India, 4.3 +Turkmenistan, 4.3 +Kenya, 4.3 +Ethiopia, 4.2 +Honduras, 4 +Guinea-Bissau, 4 +Zambia, 4 +Republic of the Congo, 3.9 +Guatemala, 3.8 +Central African Republic, 3.8 +North Korea, 3.7 +Sri Lanka, 3.7 +Mauritius, 3.6 +Samoa, 3.6 +Democratic Republic of the Congo, 3.6 +Nauru, 3.5 +Gambia, 3.4 +Federated States of Micronesia, 3.3 +El Salvador, 3.2 +Fiji, 3 +Papua New Guinea, 3 +Kiribati, 3 +Tajikistan, 2.8 +Israel, 2.8 +Sudan, 2.7 +Malawi, 2.5 +Lebanon, 2.4 +Azerbaijan, 2.3 +Mozambique, 2.3 +Togo, 2.3 +Nepal, 2.2 +Brunei, 2.1 +Benin, 2.1 +Singapore, 2 +Turkey, 2 +Madagascar, 1.8 +Solomon Islands, 1.7 +Tonga, 1.6 +Tunisia, 1.5 +Tuvalu, 1.5 +Qatar, 1.5 +Vanuatu, 1.4 +Djibouti, 1.3 +Malaysia, 1.3 +Syria, 1.2 +Maldives, 1.2 +Mali, 1.1 +Eritrea, 1.1 +Algeria, 1 +Iran, 1 +Oman, 0.9 +Brunei, 0.9 +Morocco, 0.9 +Jordan, 0.7 +Bhutan, 0.7 +Guinea, 0.7 +Burma, 0.7 +Afghanistan, 0.7 +Senegal, 0.6 +Indonesia, 0.6 +Timor-Leste, 0.6 +Iraq, 0.5 +Somalia, 0.5 +Egypt, 0.4 +Niger, 0.3 +Yemen, 0.3 +Comoros, 0.2 +Saudi Arabia, 0.2 +Bangladesh, 0.2 +Kuwait, 0.1 +Libya, 0.1 +Mauritania, 0.1 +Pakistan, 0.1 diff --git a/test/backend/datastores.athena.spec.js b/test/backend/datastores.athena.spec.js new file mode 100644 index 000000000..584c489fb --- /dev/null +++ b/test/backend/datastores.athena.spec.js @@ -0,0 +1,255 @@ +// do not use import, otherwise other test units won't be able to reactivate nock +const nock = require('nock'); + +import {assert} from 'chai'; +import uuid from 'uuid'; + +import {connect, schemas, query, tables} from '../../backend/persistent/datastores/athena'; + +describe('Athena:', function () { + const URL = 'https://athena.us-east-1.amazonaws.com:443'; + const PATH = '/'; + + // Connection object shared by all the tests + const conn = { + region: 'us-east-1', + accessKey: 'XXXXXXXX', + secretAccessKey: 'XXXXXAAAA', + database: 'PLOT.LY-TEST', + outputS3Bucket: 's3://aws-athena-query-results-11111111-us-east-1/', + queryInterval: 1000, + maxRetries: 50 + }; + + before(function() { + // Enable nock if it has been disabled by other specs + if (!nock.isActive()) nock.activate(); + }); + + after(function() { + // Disable nock + nock.restore(); + }); + + it('connect() succeeds', function() { + const queryStatement = 'SELECT table_name FROM information_schema.columns LIMIT 1'; + const columnNames = []; + const rows = []; + + mockAthenaResponses(queryStatement, columnNames, rows); + + return connect(conn).then(function(connection) { + assert.isDefined(connection, 'connection is defined'); + }); + }); + + it('schemas() retrieves schemas of all tables', function() { + const queryStatement = ` + SELECT table_name, column_name, data_type + FROM information_schema.columns + WHERE table_schema = '${conn.database}' + `; + + const columnNames = [ + 'table_name', + 'column_name', + 'data_type' + ]; + + const rows = [ + ['table_name', 'column_name', 'data_type'], + ['clean_logs', 'serialnumber', 'varchar'], + ['clean_logs', 'email', 'varchar'], + ['clean_logs', 'company', 'varchar'], + ['clean_logs', 'platform', 'varchar'], + ['test', 'company', 'varchar'], + ['test', 'log_type', 'varchar'], + ['test', 'product', 'varchar'], + ['test', 'timestamp', 'timestamp'], + ['clean_logs_json', 'company', 'varchar'], + ['clean_logs_json', 'loglevel', 'varchar'], + ['clean_logs_json', 'type', 'varchar'], + ['clean_logs_json', 'subtype', 'varchar'], + ['glue_cleaned_logs', 'company', 'varchar'], + ['glue_cleaned_logs', 'loglevel', 'varchar'], + ['glue_cleaned_logs', 'type', 'varchar'], + ['glue_cleaned_logs', 'subtype', 'varchar'] + ]; + + mockAthenaResponses(queryStatement, columnNames, rows); + + return schemas(conn).then(function(results) { + assert.deepEqual(results.columnnames, columnNames, 'Unexpected column names'); + assert.deepEqual(results.rows, rows, 'Unexpected rows'); + }); + }); + + it('query() executes a query', function() { + const queryStatement = "select serialnumber from clean_logs where serialnumber != '' limit 10"; + + const columnNames = [ + 'serialnumber' + ]; + + const rows = [ + [ '9e2c31ec-b944-4baf-aaa5-4c672a0c048f-2017-06-22-14:49:36.458' ], + [ '864041030069248' ], + [ '3190d6df-325d-431b-aea7-a7fb8f306b9d-2017-07-18-19:00:27.284' ], + [ 'd72afd2e-be50-436e-88bd-60e9ee2841bf-2017-06-21-19:58:04.638' ], + [ '355757082591351' ], + [ '05c4cb35-0cec-4877-a645-f9dd8eba4940-2017-09-11-16:18:32.274' ], + [ '357125071914133' ], + [ '864041030068687' ], + [ '862789030222843' ], + [ '864875034115451' ] + ]; + + mockAthenaResponses(queryStatement, columnNames, rows); + + return query(queryStatement, conn).then(function(results) { + assert.deepEqual(results.columnnames, columnNames, 'Unexpected column names'); + assert.deepEqual(results.rows, rows, 'Unexpected rows'); + }); + }); + + it('tables() executes a query', function() { + const queryStatement = 'SHOW TABLES'; + + const columnNames = [ + 'table_name' + ]; + + const rows = [ + [ 'clean_logs' ], + [ 'clean_logs_json' ], + [ 'glue_cleaned_logs' ], + [ 'test' ] + ]; + + mockAthenaResponses(queryStatement, columnNames, rows); + + return tables(conn).then(function(results) { + const expectedRows = rows.map(row => row[0]); + assert.deepEqual(results, expectedRows, 'Unexpected rows'); + }); + }); + + function mockAthenaResponses(queryStatement, columnNames, rows) { + const {database, outputS3Bucket} = conn.database; + const queryExecutionId = uuid.v4(); + const submissionDateTime = 1522797420.024; + + // mock connect response + nock(URL).post(PATH).reply(200, { + QueryExecutionId: queryExecutionId + }); + + nock(URL).post(PATH).reply(200, { + 'QueryExecution': { + 'Query': queryStatement, + 'QueryExecutionContext': {'Database': database}, + 'QueryExecutionId': queryExecutionId, + 'ResultConfiguration': { + 'EncryptionConfiguration': {'EncryptionOption': 'SSE_S3'}, + 'OutputLocation': outputS3Bucket + }, + 'Statistics': {}, + 'Status': { + 'State': 'RUNNING', + 'SubmissionDateTime': submissionDateTime + } + }, + 'QueryExecutionDetail': { + 'OutputLocation': outputS3Bucket, + 'Query': queryStatement, + 'QueryExecutionContext': {'Database': database}, + 'QueryExecutionId': queryExecutionId, + 'ResultConfiguration': { + 'EncryptionConfiguration': {'EncryptionOption': 'SSE_S3'}, + 'OutputLocation': outputS3Bucket + }, + 'Stats': {}, + 'Status': { + 'State': 'RUNNING', + 'SubmissionDateTime': submissionDateTime + } + } + }); + + nock(URL).post(PATH).reply(200, { + 'QueryExecution': { + 'Query': queryStatement, + 'QueryExecutionContext': {'Database': database}, + 'QueryExecutionId': queryExecutionId, + 'ResultConfiguration': { + 'EncryptionConfiguration': {'EncryptionOption': 'SSE_S3'}, + 'OutputLocation': outputS3Bucket + }, + 'Statistics': {}, + 'Status': { + 'State': 'SUCCEEDED', + 'SubmissionDateTime': submissionDateTime + } + }, + 'QueryExecutionDetail': { + 'OutputLocation': outputS3Bucket, + 'Query': queryStatement, + 'QueryExecutionContext': {'Database': database}, + 'QueryExecutionId': queryExecutionId, + 'ResultConfiguration': { + 'EncryptionConfiguration': {'EncryptionOption': 'SSE_S3'}, + 'OutputLocation': outputS3Bucket + }, + 'Stats': {}, + 'Status': { + 'State': 'SUCCEEDED', + 'SubmissionDateTime': submissionDateTime + } + } + }); + + const headerAndRows = [columnNames].concat(rows); + + const columnInfos = columnNames.map(function(name) { + return { + 'CaseSensitive': true, + 'CatalogName': 'hive', + 'Label': name, + 'Name': name, + 'Nullable': 'UNKNOWN', + 'Precision': 2147483647, + 'Scale': 0, + 'SchemaName': '', + 'TableName': '', + 'Type': 'varchar' + }; + }); + + const resultRows = headerAndRows.map(function(row) { + return { + 'Data': row + }; + }); + + const resultRows2 = headerAndRows.map(function(row) { + return { + 'Data': row.map(function(value) { + return {'VarCharValue': value}; + }) + }; + }); + + nock(URL).post(PATH).reply(200, { + 'ResultSet': { + 'ColumnInfos': columnInfos, + 'ResultRows': resultRows, + 'ResultSetMetadata': { + 'ColumnInfo': columnInfos + }, + 'Rows': resultRows2 + }, + 'UpdateCount': 0, + 'UpdateType': '' + }); + } +});