| --- |
| # ---------------------------------------------------------------------------- |
| # |
| # *** AUTO GENERATED CODE *** Type: MMv1 *** |
| # |
| # ---------------------------------------------------------------------------- |
| # |
| # This file is automatically generated by Magic Modules and manual |
| # changes will be clobbered when the file is regenerated. |
| # |
| # Please read more about how to change this file in |
| # .github/CONTRIBUTING.md. |
| # |
| # ---------------------------------------------------------------------------- |
| subcategory: "Dataplex" |
| description: |- |
| Represents a user-visible job which provides the insights for the related data source. |
| --- |
| |
| # google_dataplex_datascan |
| |
| Represents a user-visible job which provides the insights for the related data source. |
| |
| |
| To get more information about Datascan, see: |
| |
| * [API documentation](https://cloud.google.com/dataplex/docs/reference/rest) |
| * How-to Guides |
| * [Official Documentation](https://cloud.google.com/dataplex/docs) |
| |
| ## Example Usage - Dataplex Datascan Basic Profile |
| |
| |
| ```hcl |
| resource "google_dataplex_datascan" "basic_profile" { |
| location = "us-central1" |
| data_scan_id = "dataprofile-basic" |
| |
| data { |
| resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare" |
| } |
| |
| execution_spec { |
| trigger { |
| on_demand {} |
| } |
| } |
| |
| data_profile_spec {} |
| |
| project = "my-project-name" |
| } |
| ``` |
| ## Example Usage - Dataplex Datascan Full Profile |
| |
| |
| ```hcl |
| resource "google_dataplex_datascan" "full_profile" { |
| location = "us-central1" |
| display_name = "Full Datascan Profile" |
| data_scan_id = "dataprofile-full" |
| description = "Example resource - Full Datascan Profile" |
| labels = { |
| author = "billing" |
| } |
| |
| data { |
| resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare" |
| } |
| |
| execution_spec { |
| trigger { |
| schedule { |
| cron = "TZ=America/New_York 1 1 * * *" |
| } |
| } |
| } |
| |
| data_profile_spec { |
| sampling_percent = 80 |
| row_filter = "word_count > 10" |
| include_fields { |
| field_names = ["word_count"] |
| } |
| exclude_fields { |
| field_names = ["property_type"] |
| } |
| post_scan_actions { |
| bigquery_export { |
| results_table = "//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export" |
| } |
| } |
| } |
| |
| project = "my-project-name" |
| |
| depends_on = [ |
| google_bigquery_dataset.source |
| ] |
| } |
| |
| resource "google_bigquery_dataset" "source" { |
| dataset_id = "dataplex_dataset" |
| friendly_name = "test" |
| description = "This is a test description" |
| location = "US" |
| delete_contents_on_destroy = true |
| } |
| ``` |
| ## Example Usage - Dataplex Datascan Basic Quality |
| |
| |
| ```hcl |
| resource "google_dataplex_datascan" "basic_quality" { |
| location = "us-central1" |
| data_scan_id = "dataquality-basic" |
| |
| data { |
| resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare" |
| } |
| |
| execution_spec { |
| trigger { |
| on_demand {} |
| } |
| } |
| |
| data_quality_spec { |
| rules { |
| dimension = "VALIDITY" |
| name = "rule1" |
| description = "rule 1 for validity dimension" |
| table_condition_expectation { |
| sql_expression = "COUNT(*) > 0" |
| } |
| } |
| } |
| |
| project = "my-project-name" |
| } |
| ``` |
| ## Example Usage - Dataplex Datascan Full Quality |
| |
| |
| ```hcl |
| resource "google_dataplex_datascan" "full_quality" { |
| location = "us-central1" |
| display_name = "Full Datascan Quality" |
| data_scan_id = "dataquality-full" |
| description = "Example resource - Full Datascan Quality" |
| labels = { |
| author = "billing" |
| } |
| |
| data { |
| resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations" |
| } |
| |
| execution_spec { |
| trigger { |
| schedule { |
| cron = "TZ=America/New_York 1 1 * * *" |
| } |
| } |
| field = "modified_date" |
| } |
| |
| data_quality_spec { |
| sampling_percent = 5 |
| row_filter = "station_id > 1000" |
| rules { |
| column = "address" |
| dimension = "VALIDITY" |
| threshold = 0.99 |
| non_null_expectation {} |
| } |
| |
| rules { |
| column = "council_district" |
| dimension = "VALIDITY" |
| ignore_null = true |
| threshold = 0.9 |
| range_expectation { |
| min_value = 1 |
| max_value = 10 |
| strict_min_enabled = true |
| strict_max_enabled = false |
| } |
| } |
| |
| rules { |
| column = "power_type" |
| dimension = "VALIDITY" |
| ignore_null = false |
| regex_expectation { |
| regex = ".*solar.*" |
| } |
| } |
| |
| rules { |
| column = "property_type" |
| dimension = "VALIDITY" |
| ignore_null = false |
| set_expectation { |
| values = ["sidewalk", "parkland"] |
| } |
| } |
| |
| |
| rules { |
| column = "address" |
| dimension = "UNIQUENESS" |
| uniqueness_expectation {} |
| } |
| |
| rules { |
| column = "number_of_docks" |
| dimension = "VALIDITY" |
| statistic_range_expectation { |
| statistic = "MEAN" |
| min_value = 5 |
| max_value = 15 |
| strict_min_enabled = true |
| strict_max_enabled = true |
| } |
| } |
| |
| rules { |
| column = "footprint_length" |
| dimension = "VALIDITY" |
| row_condition_expectation { |
| sql_expression = "footprint_length > 0 AND footprint_length <= 10" |
| } |
| } |
| |
| rules { |
| dimension = "VALIDITY" |
| table_condition_expectation { |
| sql_expression = "COUNT(*) > 0" |
| } |
| } |
| |
| rules { |
| dimension = "VALIDITY" |
| sql_assertion { |
| sql_statement = "select * from bigquery-public-data.austin_bikeshare.bikeshare_stations where station_id is null" |
| } |
| } |
| } |
| |
| |
| project = "my-project-name" |
| } |
| ``` |
| |
| ## Argument Reference |
| |
| The following arguments are supported: |
| |
| |
| * `data` - |
| (Required) |
| The data source for DataScan. |
| Structure is [documented below](#nested_data). |
| |
| * `execution_spec` - |
| (Required) |
| DataScan execution settings. |
| Structure is [documented below](#nested_execution_spec). |
| |
| * `location` - |
| (Required) |
| The location where the data scan should reside. |
| |
| * `data_scan_id` - |
| (Required) |
| DataScan identifier. Must contain only lowercase letters, numbers and hyphens. Must start with a letter. Must end with a number or a letter. |
| |
| |
| <a name="nested_data"></a>The `data` block supports: |
| |
| * `entity` - |
| (Optional) |
| The Dataplex entity that represents the data source(e.g. BigQuery table) for Datascan. |
| |
| * `resource` - |
| (Optional) |
| The service-qualified full resource name of the cloud resource for a DataScan job to scan against. The field could be: |
| (Cloud Storage bucket for DataDiscoveryScan)BigQuery table of type "TABLE" for DataProfileScan/DataQualityScan. |
| |
| <a name="nested_execution_spec"></a>The `execution_spec` block supports: |
| |
| * `trigger` - |
| (Required) |
| Spec related to how often and when a scan should be triggered. |
| Structure is [documented below](#nested_trigger). |
| |
| * `field` - |
| (Optional) |
| The unnested field (of type Date or Timestamp) that contains values which monotonically increase over time. If not specified, a data scan will run for all data in the table. |
| |
| |
| <a name="nested_trigger"></a>The `trigger` block supports: |
| |
| * `on_demand` - |
| (Optional) |
| The scan runs once via dataScans.run API. |
| |
| * `schedule` - |
| (Optional) |
| The scan is scheduled to run periodically. |
| Structure is [documented below](#nested_schedule). |
| |
| |
| <a name="nested_schedule"></a>The `schedule` block supports: |
| |
| * `cron` - |
| (Required) |
| Cron schedule for running scans periodically. This field is required for Schedule scans. |
| |
| - - - |
| |
| |
| * `description` - |
| (Optional) |
| Description of the scan. |
| |
| * `display_name` - |
| (Optional) |
| User friendly display name. |
| |
| * `labels` - |
| (Optional) |
| User-defined labels for the scan. A list of key->value pairs. |
| |
| **Note**: This field is non-authoritative, and will only manage the labels present in your configuration. |
| Please refer to the field `effective_labels` for all of the labels present on the resource. |
| |
| * `data_quality_spec` - |
| (Optional) |
| DataQualityScan related setting. |
| Structure is [documented below](#nested_data_quality_spec). |
| |
| * `data_profile_spec` - |
| (Optional) |
| DataProfileScan related setting. |
| Structure is [documented below](#nested_data_profile_spec). |
| |
| * `project` - (Optional) The ID of the project in which the resource belongs. |
| If it is not provided, the provider project is used. |
| |
| |
| <a name="nested_data_quality_spec"></a>The `data_quality_spec` block supports: |
| |
| * `sampling_percent` - |
| (Optional) |
| The percentage of the records to be selected from the dataset for DataScan. |
| Value can range between 0.0 and 100.0 with up to 3 significant decimal digits. |
| Sampling is not applied if `sampling_percent` is not specified, 0 or 100. |
| |
| * `row_filter` - |
| (Optional) |
| A filter applied to all rows in a single DataScan job. The filter needs to be a valid SQL expression for a WHERE clause in BigQuery standard SQL syntax. Example: col1 >= 0 AND col2 < 10 |
| |
| * `post_scan_actions` - |
| (Optional) |
| Actions to take upon job completion. |
| Structure is [documented below](#nested_post_scan_actions). |
| |
| * `rules` - |
| (Optional) |
| The list of rules to evaluate against a data source. At least one rule is required. |
| Structure is [documented below](#nested_rules). |
| |
| |
| <a name="nested_post_scan_actions"></a>The `post_scan_actions` block supports: |
| |
| * `bigquery_export` - |
| (Optional) |
| If set, results will be exported to the provided BigQuery table. |
| Structure is [documented below](#nested_bigquery_export). |
| |
| |
| <a name="nested_bigquery_export"></a>The `bigquery_export` block supports: |
| |
| * `results_table` - |
| (Optional) |
| The BigQuery table to export DataQualityScan results to. |
| Format://bigquery.googleapis.com/projects/PROJECT_ID/datasets/DATASET_ID/tables/TABLE_ID |
| |
| <a name="nested_rules"></a>The `rules` block supports: |
| |
| * `column` - |
| (Optional) |
| The unnested column which this rule is evaluated against. |
| |
| * `ignore_null` - |
| (Optional) |
| Rows with null values will automatically fail a rule, unless ignoreNull is true. In that case, such null rows are trivially considered passing. Only applicable to ColumnMap rules. |
| |
| * `dimension` - |
| (Required) |
| The dimension a rule belongs to. Results are also aggregated at the dimension level. Supported dimensions are ["COMPLETENESS", "ACCURACY", "CONSISTENCY", "VALIDITY", "UNIQUENESS", "INTEGRITY"] |
| |
| * `threshold` - |
| (Optional) |
| The minimum ratio of passing_rows / total_rows required to pass this rule, with a range of [0.0, 1.0]. 0 indicates default value (i.e. 1.0). |
| |
| * `name` - |
| (Optional) |
| A mutable name for the rule. |
| The name must contain only letters (a-z, A-Z), numbers (0-9), or hyphens (-). |
| The maximum length is 63 characters. |
| Must start with a letter. |
| Must end with a number or a letter. |
| |
| * `description` - |
| (Optional) |
| Description of the rule. |
| The maximum length is 1,024 characters. |
| |
| * `range_expectation` - |
| (Optional) |
| ColumnMap rule which evaluates whether each column value lies between a specified range. |
| Structure is [documented below](#nested_range_expectation). |
| |
| * `non_null_expectation` - |
| (Optional) |
| ColumnMap rule which evaluates whether each column value is null. |
| |
| * `set_expectation` - |
| (Optional) |
| ColumnMap rule which evaluates whether each column value is contained by a specified set. |
| Structure is [documented below](#nested_set_expectation). |
| |
| * `regex_expectation` - |
| (Optional) |
| ColumnMap rule which evaluates whether each column value matches a specified regex. |
| Structure is [documented below](#nested_regex_expectation). |
| |
| * `uniqueness_expectation` - |
| (Optional) |
| Row-level rule which evaluates whether each column value is unique. |
| |
| * `statistic_range_expectation` - |
| (Optional) |
| ColumnAggregate rule which evaluates whether the column aggregate statistic lies between a specified range. |
| Structure is [documented below](#nested_statistic_range_expectation). |
| |
| * `row_condition_expectation` - |
| (Optional) |
| Table rule which evaluates whether each row passes the specified condition. |
| Structure is [documented below](#nested_row_condition_expectation). |
| |
| * `table_condition_expectation` - |
| (Optional) |
| Table rule which evaluates whether the provided expression is true. |
| Structure is [documented below](#nested_table_condition_expectation). |
| |
| * `sql_assertion` - |
| (Optional) |
| Table rule which evaluates whether any row matches invalid state. |
| Structure is [documented below](#nested_sql_assertion). |
| |
| |
| <a name="nested_range_expectation"></a>The `range_expectation` block supports: |
| |
| * `min_value` - |
| (Optional) |
| The minimum column value allowed for a row to pass this validation. At least one of minValue and maxValue need to be provided. |
| |
| * `max_value` - |
| (Optional) |
| The maximum column value allowed for a row to pass this validation. At least one of minValue and maxValue need to be provided. |
| |
| * `strict_min_enabled` - |
| (Optional) |
| Whether each value needs to be strictly greater than ('>') the minimum, or if equality is allowed. |
| Only relevant if a minValue has been defined. Default = false. |
| |
| * `strict_max_enabled` - |
| (Optional) |
| Whether each value needs to be strictly lesser than ('<') the maximum, or if equality is allowed. |
| Only relevant if a maxValue has been defined. Default = false. |
| |
| <a name="nested_set_expectation"></a>The `set_expectation` block supports: |
| |
| * `values` - |
| (Required) |
| Expected values for the column value. |
| |
| <a name="nested_regex_expectation"></a>The `regex_expectation` block supports: |
| |
| * `regex` - |
| (Required) |
| A regular expression the column value is expected to match. |
| |
| <a name="nested_statistic_range_expectation"></a>The `statistic_range_expectation` block supports: |
| |
| * `statistic` - |
| (Required) |
| column statistics. |
| Possible values are: `STATISTIC_UNDEFINED`, `MEAN`, `MIN`, `MAX`. |
| |
| * `min_value` - |
| (Optional) |
| The minimum column statistic value allowed for a row to pass this validation. |
| At least one of minValue and maxValue need to be provided. |
| |
| * `max_value` - |
| (Optional) |
| The maximum column statistic value allowed for a row to pass this validation. |
| At least one of minValue and maxValue need to be provided. |
| |
| * `strict_min_enabled` - |
| (Optional) |
| Whether column statistic needs to be strictly greater than ('>') the minimum, or if equality is allowed. |
| Only relevant if a minValue has been defined. Default = false. |
| |
| * `strict_max_enabled` - |
| (Optional) |
| Whether column statistic needs to be strictly lesser than ('<') the maximum, or if equality is allowed. |
| Only relevant if a maxValue has been defined. Default = false. |
| |
| <a name="nested_row_condition_expectation"></a>The `row_condition_expectation` block supports: |
| |
| * `sql_expression` - |
| (Required) |
| The SQL expression. |
| |
| <a name="nested_table_condition_expectation"></a>The `table_condition_expectation` block supports: |
| |
| * `sql_expression` - |
| (Required) |
| The SQL expression. |
| |
| <a name="nested_sql_assertion"></a>The `sql_assertion` block supports: |
| |
| * `sql_statement` - |
| (Required) |
| The SQL statement. |
| |
| <a name="nested_data_profile_spec"></a>The `data_profile_spec` block supports: |
| |
| * `sampling_percent` - |
| (Optional) |
| The percentage of the records to be selected from the dataset for DataScan. |
| Value can range between 0.0 and 100.0 with up to 3 significant decimal digits. |
| Sampling is not applied if `sampling_percent` is not specified, 0 or 100. |
| |
| * `row_filter` - |
| (Optional) |
| A filter applied to all rows in a single DataScan job. The filter needs to be a valid SQL expression for a WHERE clause in BigQuery standard SQL syntax. Example: col1 >= 0 AND col2 < 10 |
| |
| * `post_scan_actions` - |
| (Optional) |
| Actions to take upon job completion. |
| Structure is [documented below](#nested_post_scan_actions). |
| |
| * `include_fields` - |
| (Optional) |
| The fields to include in data profile. |
| If not specified, all fields at the time of profile scan job execution are included, except for ones listed in `exclude_fields`. |
| Structure is [documented below](#nested_include_fields). |
| |
| * `exclude_fields` - |
| (Optional) |
| The fields to exclude from data profile. |
| If specified, the fields will be excluded from data profile, regardless of `include_fields` value. |
| Structure is [documented below](#nested_exclude_fields). |
| |
| |
| <a name="nested_post_scan_actions"></a>The `post_scan_actions` block supports: |
| |
| * `bigquery_export` - |
| (Optional) |
| If set, results will be exported to the provided BigQuery table. |
| Structure is [documented below](#nested_bigquery_export). |
| |
| |
| <a name="nested_bigquery_export"></a>The `bigquery_export` block supports: |
| |
| * `results_table` - |
| (Optional) |
| The BigQuery table to export DataProfileScan results to. |
| Format://bigquery.googleapis.com/projects/PROJECT_ID/datasets/DATASET_ID/tables/TABLE_ID |
| |
| <a name="nested_include_fields"></a>The `include_fields` block supports: |
| |
| * `field_names` - |
| (Optional) |
| Expected input is a list of fully qualified names of fields as in the schema. |
| Only top-level field names for nested fields are supported. |
| For instance, if 'x' is of nested field type, listing 'x' is supported but 'x.y.z' is not supported. Here 'y' and 'y.z' are nested fields of 'x'. |
| |
| <a name="nested_exclude_fields"></a>The `exclude_fields` block supports: |
| |
| * `field_names` - |
| (Optional) |
| Expected input is a list of fully qualified names of fields as in the schema. |
| Only top-level field names for nested fields are supported. |
| For instance, if 'x' is of nested field type, listing 'x' is supported but 'x.y.z' is not supported. Here 'y' and 'y.z' are nested fields of 'x'. |
| |
| ## Attributes Reference |
| |
| In addition to the arguments listed above, the following computed attributes are exported: |
| |
| * `id` - an identifier for the resource with format `projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}` |
| |
| * `name` - |
| The relative resource name of the scan, of the form: projects/{project}/locations/{locationId}/dataScans/{datascan_id}, where project refers to a project_id or project_number and locationId refers to a GCP region. |
| |
| * `uid` - |
| System generated globally unique ID for the scan. This ID will be different if the scan is deleted and re-created with the same name. |
| |
| * `state` - |
| Current state of the DataScan. |
| |
| * `create_time` - |
| The time when the scan was created. |
| |
| * `update_time` - |
| The time when the scan was last updated. |
| |
| * `execution_status` - |
| Status of the data scan execution. |
| Structure is [documented below](#nested_execution_status). |
| |
| * `type` - |
| The type of DataScan. |
| |
| * `terraform_labels` - |
| The combination of labels configured directly on the resource |
| and default labels configured on the provider. |
| |
| * `effective_labels` - |
| All of labels (key/value pairs) present on the resource in GCP, including the labels configured through Terraform, other clients and services. |
| |
| |
| <a name="nested_execution_status"></a>The `execution_status` block contains: |
| |
| * `latest_job_end_time` - |
| (Output) |
| The time when the latest DataScanJob started. |
| |
| * `latest_job_start_time` - |
| (Output) |
| The time when the latest DataScanJob ended. |
| |
| ## Timeouts |
| |
| This resource provides the following |
| [Timeouts](https://developer.hashicorp.com/terraform/plugin/sdkv2/resources/retries-and-customizable-timeouts) configuration options: |
| |
| - `create` - Default is 5 minutes. |
| - `update` - Default is 5 minutes. |
| - `delete` - Default is 5 minutes. |
| |
| ## Import |
| |
| |
| Datascan can be imported using any of these accepted formats: |
| |
| * `projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}` |
| * `{{project}}/{{location}}/{{data_scan_id}}` |
| * `{{location}}/{{data_scan_id}}` |
| * `{{data_scan_id}}` |
| |
| |
| In Terraform v1.5.0 and later, use an [`import` block](https://developer.hashicorp.com/terraform/language/import) to import Datascan using one of the formats above. For example: |
| |
| ```tf |
| import { |
| id = "projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}" |
| to = google_dataplex_datascan.default |
| } |
| ``` |
| |
| When using the [`terraform import` command](https://developer.hashicorp.com/terraform/cli/commands/import), Datascan can be imported using one of the formats above. For example: |
| |
| ``` |
| $ terraform import google_dataplex_datascan.default projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}} |
| $ terraform import google_dataplex_datascan.default {{project}}/{{location}}/{{data_scan_id}} |
| $ terraform import google_dataplex_datascan.default {{location}}/{{data_scan_id}} |
| $ terraform import google_dataplex_datascan.default {{data_scan_id}} |
| ``` |
| |
| ## User Project Overrides |
| |
| This resource supports [User Project Overrides](https://registry.terraform.io/providers/hashicorp/google/latest/docs/guides/provider_reference#user_project_override). |