| --- |
| # ---------------------------------------------------------------------------- |
| # |
| # *** AUTO GENERATED CODE *** Type: MMv1 *** |
| # |
| # ---------------------------------------------------------------------------- |
| # |
| # This file is automatically generated by Magic Modules and manual |
| # changes will be clobbered when the file is regenerated. |
| # |
| # Please read more about how to change this file in |
| # .github/CONTRIBUTING.md. |
| # |
| # ---------------------------------------------------------------------------- |
| subcategory: "Dataplex" |
| description: |- |
| A Dataplex task represents the work that you want Dataplex to do on a schedule. |
| --- |
| |
| # google\_dataplex\_task |
| |
| A Dataplex task represents the work that you want Dataplex to do on a schedule. It encapsulates code, parameters, and the schedule. |
| |
| |
| To get more information about Task, see: |
| |
| * [API documentation](https://cloud.google.com/dataplex/docs/reference/rest/v1/projects.locations.lakes.tasks) |
| * How-to Guides |
| * [Official Documentation](https://cloud.google.com/dataplex/docs) |
| |
| ## Example Usage - Dataplex Task Basic |
| |
| |
| ```hcl |
| data "google_project" "project" { |
| |
| } |
| |
| resource "google_dataplex_lake" "example" { |
| name = "tf-test-lake%{random_suffix}" |
| location = "us-central1" |
| project = "my-project-name" |
| } |
| |
| |
| resource "google_dataplex_task" "example" { |
| |
| task_id = "tf-test-task%{random_suffix}" |
| location = "us-central1" |
| lake = google_dataplex_lake.example.name |
| |
| description = "Test Task Basic" |
| |
| display_name = "task-basic" |
| |
| labels = { "count": "3" } |
| |
| trigger_spec { |
| type = "RECURRING" |
| disabled = false |
| max_retries = 3 |
| start_time = "2023-10-02T15:01:23Z" |
| schedule = "1 * * * *" |
| } |
| |
| execution_spec { |
| service_account = "${data.google_project.project.number}-compute@developer.gserviceaccount.com" |
| project = "my-project-name" |
| max_job_execution_lifetime = "100s" |
| kms_key = "234jn2kjn42k3n423" |
| } |
| |
| spark { |
| python_script_file = "gs://dataproc-examples/pyspark/hello-world/hello-world.py" |
| |
| } |
| |
| project = "my-project-name" |
| |
| } |
| ``` |
| ## Example Usage - Dataplex Task Spark |
| |
| |
| ```hcl |
| # VPC network |
| resource "google_compute_network" "default" { |
| name = "tf-test-workstation-cluster%{random_suffix}" |
| auto_create_subnetworks = true |
| } |
| |
| data "google_project" "project" { |
| |
| } |
| |
| resource "google_dataplex_lake" "example_spark" { |
| name = "tf-test-lake%{random_suffix}" |
| location = "us-central1" |
| project = "my-project-name" |
| } |
| |
| |
| resource "google_dataplex_task" "example_spark" { |
| |
| task_id = "tf-test-task%{random_suffix}" |
| location = "us-central1" |
| lake = google_dataplex_lake.example_spark.name |
| trigger_spec { |
| type = "ON_DEMAND" |
| } |
| |
| description = "task-spark-terraform" |
| |
| execution_spec { |
| service_account = "${data.google_project.project.number}-compute@developer.gserviceaccount.com" |
| args = { |
| TASK_ARGS = "--output_location,gs://spark-job/task-result, --output_format, json" |
| } |
| |
| } |
| |
| spark { |
| infrastructure_spec { |
| batch { |
| executors_count = 2 |
| max_executors_count = 100 |
| } |
| container_image { |
| image = "test-image" |
| java_jars = ["test-java-jars.jar"] |
| python_packages = ["gs://bucket-name/my/path/to/lib.tar.gz"] |
| properties = { "name": "wrench", "mass": "1.3kg", "count": "3" } |
| } |
| vpc_network { |
| network_tags = ["test-network-tag"] |
| sub_network = google_compute_network.default.id |
| } |
| } |
| file_uris = ["gs://terrafrom-test/test.csv"] |
| archive_uris = ["gs://terraform-test/test.csv"] |
| sql_script = "show databases" |
| } |
| |
| project = "my-project-name" |
| |
| } |
| ``` |
| ## Example Usage - Dataplex Task Notebook |
| |
| |
| ```hcl |
| # VPC network |
| resource "google_compute_network" "default" { |
| name = "tf-test-workstation-cluster%{random_suffix}" |
| auto_create_subnetworks = true |
| } |
| |
| |
| data "google_project" "project" { |
| |
| } |
| |
| resource "google_dataplex_lake" "example_notebook" { |
| name = "tf-test-lake%{random_suffix}" |
| location = "us-central1" |
| project = "my-project-name" |
| } |
| |
| |
| resource "google_dataplex_task" "example_notebook" { |
| |
| task_id = "tf-test-task%{random_suffix}" |
| location = "us-central1" |
| lake = google_dataplex_lake.example_notebook.name |
| trigger_spec { |
| type = "RECURRING" |
| schedule = "1 * * * *" |
| } |
| |
| execution_spec { |
| service_account = "${data.google_project.project.number}-compute@developer.gserviceaccount.com" |
| args = { |
| TASK_ARGS = "--output_location,gs://spark-job-jars-anrajitha/task-result, --output_format, json" |
| } |
| } |
| notebook { |
| notebook = "gs://terraform-test/test-notebook.ipynb" |
| infrastructure_spec { |
| batch { |
| executors_count = 2 |
| max_executors_count = 100 |
| } |
| container_image { |
| image = "test-image" |
| java_jars = ["test-java-jars.jar"] |
| python_packages = ["gs://bucket-name/my/path/to/lib.tar.gz"] |
| properties = { "name": "wrench", "mass": "1.3kg", "count": "3" } |
| } |
| vpc_network { |
| network_tags = ["test-network-tag"] |
| network = google_compute_network.default.id |
| } |
| } |
| file_uris = ["gs://terraform-test/test.csv"] |
| archive_uris = ["gs://terraform-test/test.csv"] |
| |
| } |
| project = "my-project-name" |
| |
| |
| } |
| ``` |
| |
| ## Argument Reference |
| |
| The following arguments are supported: |
| |
| |
| * `trigger_spec` - |
| (Required) |
| Configuration for the cluster |
| Structure is [documented below](#nested_trigger_spec). |
| |
| * `execution_spec` - |
| (Required) |
| Configuration for the cluster |
| Structure is [documented below](#nested_execution_spec). |
| |
| |
| <a name="nested_trigger_spec"></a>The `trigger_spec` block supports: |
| |
| * `type` - |
| (Required) |
| Trigger type of the user-specified Task |
| Possible values are: `ON_DEMAND`, `RECURRING`. |
| |
| * `start_time` - |
| (Optional) |
| The first run of the task will be after this time. If not specified, the task will run shortly after being submitted if ON_DEMAND and based on the schedule if RECURRING. |
| |
| * `disabled` - |
| (Optional) |
| Prevent the task from executing. This does not cancel already running tasks. It is intended to temporarily disable RECURRING tasks. |
| |
| * `max_retries` - |
| (Optional) |
| Number of retry attempts before aborting. Set to zero to never attempt to retry a failed task. |
| |
| * `schedule` - |
| (Optional) |
| Cron schedule (https://en.wikipedia.org/wiki/Cron) for running tasks periodically. To explicitly set a timezone to the cron tab, apply a prefix in the cron tab: 'CRON_TZ=${IANA_TIME_ZONE}' or 'TZ=${IANA_TIME_ZONE}'. The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone database. For example, CRON_TZ=America/New_York 1 * * * *, or TZ=America/New_York 1 * * * *. This field is required for RECURRING tasks. |
| |
| <a name="nested_execution_spec"></a>The `execution_spec` block supports: |
| |
| * `args` - |
| (Optional) |
| The arguments to pass to the task. The args can use placeholders of the format ${placeholder} as part of key/value string. These will be interpolated before passing the args to the driver. Currently supported placeholders: - ${taskId} - ${job_time} To pass positional args, set the key as TASK_ARGS. The value should be a comma-separated string of all the positional arguments. To use a delimiter other than comma, refer to https://cloud.google.com/sdk/gcloud/reference/topic/escaping. In case of other keys being present in the args, then TASK_ARGS will be passed as the last argument. An object containing a list of 'key': value pairs. Example: { 'name': 'wrench', 'mass': '1.3kg', 'count': '3' }. |
| |
| * `service_account` - |
| (Required) |
| Service account to use to execute a task. If not provided, the default Compute service account for the project is used. |
| |
| * `project` - |
| (Optional) |
| The project in which jobs are run. By default, the project containing the Lake is used. If a project is provided, the ExecutionSpec.service_account must belong to this project. |
| |
| * `max_job_execution_lifetime` - |
| (Optional) |
| The maximum duration after which the job execution is expired. A duration in seconds with up to nine fractional digits, ending with 's'. Example: '3.5s'. |
| |
| * `kms_key` - |
| (Optional) |
| The Cloud KMS key to use for encryption, of the form: projects/{project_number}/locations/{locationId}/keyRings/{key-ring-name}/cryptoKeys/{key-name}. |
| |
| - - - |
| |
| |
| * `description` - |
| (Optional) |
| User-provided description of the task. |
| |
| * `display_name` - |
| (Optional) |
| User friendly display name. |
| |
| * `labels` - |
| (Optional) |
| User-defined labels for the task. |
| |
| **Note**: This field is non-authoritative, and will only manage the labels present in your configuration. |
| Please refer to the field `effective_labels` for all of the labels present on the resource. |
| |
| * `spark` - |
| (Optional) |
| A service with manual scaling runs continuously, allowing you to perform complex initialization and rely on the state of its memory over time. |
| Structure is [documented below](#nested_spark). |
| |
| * `notebook` - |
| (Optional) |
| A service with manual scaling runs continuously, allowing you to perform complex initialization and rely on the state of its memory over time. |
| Structure is [documented below](#nested_notebook). |
| |
| * `location` - |
| (Optional) |
| The location in which the task will be created in. |
| |
| * `lake` - |
| (Optional) |
| The lake in which the task will be created in. |
| |
| * `task_id` - |
| (Optional) |
| The task Id of the task. |
| |
| * `project` - (Optional) The ID of the project in which the resource belongs. |
| If it is not provided, the provider project is used. |
| |
| |
| <a name="nested_spark"></a>The `spark` block supports: |
| |
| * `file_uris` - |
| (Optional) |
| Cloud Storage URIs of files to be placed in the working directory of each executor. |
| |
| * `archive_uris` - |
| (Optional) |
| Cloud Storage URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. |
| |
| * `infrastructure_spec` - |
| (Optional) |
| Infrastructure specification for the execution. |
| Structure is [documented below](#nested_infrastructure_spec). |
| |
| * `main_jar_file_uri` - |
| (Optional) |
| The Cloud Storage URI of the jar file that contains the main class. The execution args are passed in as a sequence of named process arguments (--key=value). |
| |
| * `main_class` - |
| (Optional) |
| The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in jar_file_uris. The execution args are passed in as a sequence of named process arguments (--key=value). |
| |
| * `python_script_file` - |
| (Optional) |
| The Gcloud Storage URI of the main Python file to use as the driver. Must be a .py file. The execution args are passed in as a sequence of named process arguments (--key=value). |
| |
| * `sql_script_file` - |
| (Optional) |
| A reference to a query file. This can be the Cloud Storage URI of the query file or it can the path to a SqlScript Content. The execution args are used to declare a set of script variables (set key='value';). |
| |
| * `sql_script` - |
| (Optional) |
| The query text. The execution args are used to declare a set of script variables (set key='value';). |
| |
| |
| <a name="nested_infrastructure_spec"></a>The `infrastructure_spec` block supports: |
| |
| * `batch` - |
| (Optional) |
| Compute resources needed for a Task when using Dataproc Serverless. |
| Structure is [documented below](#nested_batch). |
| |
| * `container_image` - |
| (Optional) |
| Container Image Runtime Configuration. |
| Structure is [documented below](#nested_container_image). |
| |
| * `vpc_network` - |
| (Optional) |
| Vpc network. |
| Structure is [documented below](#nested_vpc_network). |
| |
| |
| <a name="nested_batch"></a>The `batch` block supports: |
| |
| * `executors_count` - |
| (Optional) |
| Total number of job executors. Executor Count should be between 2 and 100. [Default=2] |
| |
| * `max_executors_count` - |
| (Optional) |
| Max configurable executors. If maxExecutorsCount > executorsCount, then auto-scaling is enabled. Max Executor Count should be between 2 and 1000. [Default=1000] |
| |
| <a name="nested_container_image"></a>The `container_image` block supports: |
| |
| * `image` - |
| (Optional) |
| Container image to use. |
| |
| * `java_jars` - |
| (Optional) |
| A list of Java JARS to add to the classpath. Valid input includes Cloud Storage URIs to Jar binaries. For example, gs://bucket-name/my/path/to/file.jar |
| |
| * `python_packages` - |
| (Optional) |
| A list of python packages to be installed. Valid formats include Cloud Storage URI to a PIP installable library. For example, gs://bucket-name/my/path/to/lib.tar.gz |
| |
| * `properties` - |
| (Optional) |
| Override to common configuration of open source components installed on the Dataproc cluster. The properties to set on daemon config files. Property keys are specified in prefix:property format, for example core:hadoop.tmp.dir. For more information, see Cluster properties. |
| |
| <a name="nested_vpc_network"></a>The `vpc_network` block supports: |
| |
| * `network_tags` - |
| (Optional) |
| List of network tags to apply to the job. |
| |
| * `network` - |
| (Optional) |
| The Cloud VPC network in which the job is run. By default, the Cloud VPC network named Default within the project is used. |
| |
| * `sub_network` - |
| (Optional) |
| The Cloud VPC sub-network in which the job is run. |
| |
| <a name="nested_notebook"></a>The `notebook` block supports: |
| |
| * `notebook` - |
| (Required) |
| Path to input notebook. This can be the Cloud Storage URI of the notebook file or the path to a Notebook Content. The execution args are accessible as environment variables (TASK_key=value). |
| |
| * `infrastructure_spec` - |
| (Optional) |
| Infrastructure specification for the execution. |
| Structure is [documented below](#nested_infrastructure_spec). |
| |
| * `file_uris` - |
| (Optional) |
| Cloud Storage URIs of files to be placed in the working directory of each executor. |
| |
| * `archive_uris` - |
| (Optional) |
| Cloud Storage URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. |
| |
| |
| <a name="nested_infrastructure_spec"></a>The `infrastructure_spec` block supports: |
| |
| * `batch` - |
| (Optional) |
| Compute resources needed for a Task when using Dataproc Serverless. |
| Structure is [documented below](#nested_batch). |
| |
| * `container_image` - |
| (Optional) |
| Container Image Runtime Configuration. |
| Structure is [documented below](#nested_container_image). |
| |
| * `vpc_network` - |
| (Optional) |
| Vpc network. |
| Structure is [documented below](#nested_vpc_network). |
| |
| |
| <a name="nested_batch"></a>The `batch` block supports: |
| |
| * `executors_count` - |
| (Optional) |
| Total number of job executors. Executor Count should be between 2 and 100. [Default=2] |
| |
| * `max_executors_count` - |
| (Optional) |
| Max configurable executors. If maxExecutorsCount > executorsCount, then auto-scaling is enabled. Max Executor Count should be between 2 and 1000. [Default=1000] |
| |
| <a name="nested_container_image"></a>The `container_image` block supports: |
| |
| * `image` - |
| (Optional) |
| Container image to use. |
| |
| * `java_jars` - |
| (Optional) |
| A list of Java JARS to add to the classpath. Valid input includes Cloud Storage URIs to Jar binaries. For example, gs://bucket-name/my/path/to/file.jar |
| |
| * `python_packages` - |
| (Optional) |
| A list of python packages to be installed. Valid formats include Cloud Storage URI to a PIP installable library. For example, gs://bucket-name/my/path/to/lib.tar.gz |
| |
| * `properties` - |
| (Optional) |
| Override to common configuration of open source components installed on the Dataproc cluster. The properties to set on daemon config files. Property keys are specified in prefix:property format, for example core:hadoop.tmp.dir. For more information, see Cluster properties. |
| |
| <a name="nested_vpc_network"></a>The `vpc_network` block supports: |
| |
| * `network_tags` - |
| (Optional) |
| List of network tags to apply to the job. |
| |
| * `network` - |
| (Optional) |
| The Cloud VPC network in which the job is run. By default, the Cloud VPC network named Default within the project is used. |
| |
| * `sub_network` - |
| (Optional) |
| The Cloud VPC sub-network in which the job is run. |
| |
| ## Attributes Reference |
| |
| In addition to the arguments listed above, the following computed attributes are exported: |
| |
| * `id` - an identifier for the resource with format `projects/{{project}}/locations/{{location}}/lakes/{{lake}}/tasks/{{task_id}}` |
| |
| * `name` - |
| The relative resource name of the task, of the form: projects/{project_number}/locations/{locationId}/lakes/{lakeId}/ tasks/{name}. |
| |
| * `uid` - |
| System generated globally unique ID for the task. This ID will be different if the task is deleted and re-created with the same name. |
| |
| * `create_time` - |
| The time when the task was created. |
| |
| * `update_time` - |
| The time when the task was last updated. |
| |
| * `state` - |
| Current state of the task. |
| |
| * `execution_status` - |
| Configuration for the cluster |
| Structure is [documented below](#nested_execution_status). |
| |
| * `terraform_labels` - |
| The combination of labels configured directly on the resource |
| and default labels configured on the provider. |
| |
| * `effective_labels` - |
| All of labels (key/value pairs) present on the resource in GCP, including the labels configured through Terraform, other clients and services. |
| |
| |
| <a name="nested_execution_status"></a>The `execution_status` block contains: |
| |
| * `update_time` - |
| (Output) |
| Last update time of the status. |
| |
| * `latest_job` - |
| (Output) |
| latest job execution. |
| Structure is [documented below](#nested_latest_job). |
| |
| |
| <a name="nested_latest_job"></a>The `latest_job` block contains: |
| |
| * `name` - |
| (Output) |
| The relative resource name of the job, of the form: projects/{project_number}/locations/{locationId}/lakes/{lakeId}/tasks/{taskId}/jobs/{jobId}. |
| |
| * `uid` - |
| (Output) |
| System generated globally unique ID for the job. |
| |
| * `start_time` - |
| (Output) |
| The time when the job was started. |
| |
| * `end_time` - |
| (Output) |
| The time when the job ended. |
| |
| * `state` - |
| (Output) |
| Execution state for the job. |
| |
| * `retry_count` - |
| (Output) |
| The number of times the job has been retried (excluding the initial attempt). |
| |
| * `service` - |
| (Output) |
| The underlying service running a job. |
| |
| * `service_job` - |
| (Output) |
| The full resource name for the job run under a particular service. |
| |
| * `message` - |
| (Output) |
| Additional information about the current state. |
| |
| ## Timeouts |
| |
| This resource provides the following |
| [Timeouts](https://developer.hashicorp.com/terraform/plugin/sdkv2/resources/retries-and-customizable-timeouts) configuration options: |
| |
| - `create` - Default is 5 minutes. |
| - `update` - Default is 5 minutes. |
| - `delete` - Default is 5 minutes. |
| |
| ## Import |
| |
| |
| Task can be imported using any of these accepted formats: |
| |
| * `projects/{{project}}/locations/{{location}}/lakes/{{lake}}/tasks/{{task_id}}` |
| * `{{project}}/{{location}}/{{lake}}/{{task_id}}` |
| * `{{location}}/{{lake}}/{{task_id}}` |
| |
| |
| In Terraform v1.5.0 and later, use an [`import` block](https://developer.hashicorp.com/terraform/language/import) to import Task using one of the formats above. For example: |
| |
| ```tf |
| import { |
| id = "projects/{{project}}/locations/{{location}}/lakes/{{lake}}/tasks/{{task_id}}" |
| to = google_dataplex_task.default |
| } |
| ``` |
| |
| When using the [`terraform import` command](https://developer.hashicorp.com/terraform/cli/commands/import), Task can be imported using one of the formats above. For example: |
| |
| ``` |
| $ terraform import google_dataplex_task.default projects/{{project}}/locations/{{location}}/lakes/{{lake}}/tasks/{{task_id}} |
| $ terraform import google_dataplex_task.default {{project}}/{{location}}/{{lake}}/{{task_id}} |
| $ terraform import google_dataplex_task.default {{location}}/{{lake}}/{{task_id}} |
| ``` |
| |
| ## User Project Overrides |
| |
| This resource supports [User Project Overrides](https://registry.terraform.io/providers/hashicorp/google/latest/docs/guides/provider_reference#user_project_override). |