-
Notifications
You must be signed in to change notification settings - Fork 14.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AIP-72: Add support to get Variables in task SDK to author tasks #45458
base: main
Are you sure you want to change the base?
Changes from all commits
f87beb8
8b7c8ee
79287f5
609cc9e
be9f2aa
e05c8fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,8 +31,11 @@ | |
TaskCallbackRequest, | ||
) | ||
from airflow.configuration import conf | ||
from airflow.models import Variable | ||
from airflow.models.dagbag import DagBag | ||
from airflow.sdk.execution_time.comms import GetConnection, GetVariable | ||
from airflow.sdk.api.datamodels._generated import VariableResponse | ||
from airflow.sdk.execution_time import task_runner | ||
from airflow.sdk.execution_time.comms import GetConnection, GetVariable, VariableResult | ||
from airflow.sdk.execution_time.supervisor import WatchedSubprocess | ||
from airflow.serialization.serialized_objects import LazyDeserializedDAG, SerializedDAG | ||
from airflow.stats import Stats | ||
|
@@ -43,26 +46,27 @@ | |
from airflow.typing_compat import Self | ||
from airflow.utils.context import Context | ||
|
||
COMMS_DECODER: task_runner.CommsDecoder[ToChild, ToParent] | ||
|
||
|
||
def _parse_file_entrypoint(): | ||
import os | ||
|
||
import structlog | ||
|
||
from airflow.sdk.execution_time import task_runner | ||
# Parse DAG file, send JSON back up! | ||
|
||
comms_decoder = task_runner.CommsDecoder[DagFileParseRequest, DagFileParsingResult]( | ||
global COMMS_DECODER | ||
COMMS_DECODER = task_runner.CommsDecoder[ToChild, ToParent]( | ||
input=sys.stdin, | ||
decoder=TypeAdapter[DagFileParseRequest](DagFileParseRequest), | ||
decoder=TypeAdapter[ToChild](ToChild), | ||
) | ||
msg = comms_decoder.get_message() | ||
comms_decoder.request_socket = os.fdopen(msg.requests_fd, "wb", buffering=0) | ||
msg = COMMS_DECODER.get_message() | ||
COMMS_DECODER.request_socket = os.fdopen(msg.requests_fd, "wb", buffering=0) | ||
|
||
log = structlog.get_logger(logger_name="task") | ||
|
||
result = _parse_file(msg, log) | ||
comms_decoder.send_request(log, result) | ||
COMMS_DECODER.send_request(log, result) | ||
|
||
|
||
def _parse_file(msg: DagFileParseRequest, log: FilteringBoundLogger) -> DagFileParsingResult: | ||
|
@@ -180,6 +184,11 @@ class DagFileParsingResult(BaseModel): | |
Field(discriminator="type"), | ||
] | ||
|
||
ToChild = Annotated[ | ||
Union[DagFileParseRequest, VariableResult], | ||
Field(discriminator="type"), | ||
] | ||
|
||
|
||
@attrs.define() | ||
class DagFileProcessorProcess(WatchedSubprocess): | ||
|
@@ -234,8 +243,16 @@ def _handle_request(self, msg: ToParent, log: FilteringBoundLogger) -> None: # | |
if isinstance(msg, DagFileParsingResult): | ||
self.parsing_result = msg | ||
return | ||
# GetVariable etc -- parsing a dag can run top level code that asks for an Airflow Variable | ||
super()._handle_request(msg, log) | ||
Comment on lines
-237
to
-238
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We wont really need this, because for cases of variables, connecitons, we will have to interact with the DB model directly. If we go to |
||
elif isinstance(msg, GetVariable): | ||
key = msg.key | ||
try: | ||
value = Variable.get(key) | ||
except KeyError: | ||
log.exception("Variable: %s does not exist", key) | ||
raise | ||
var_result = VariableResult.from_variable_response(VariableResponse(key=key, value=value)) | ||
resp = var_result.model_dump_json(exclude_unset=True).encode() | ||
self.stdin.write(resp + b"\n") | ||
|
||
@property | ||
def is_ready(self) -> bool: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
from __future__ import annotations | ||
|
||
from airflow import DAG | ||
from airflow.models.baseoperator import BaseOperator | ||
from airflow.sdk import Variable | ||
|
||
value = Variable.get(key="my_var") | ||
|
||
|
||
class CustomOperator(BaseOperator): | ||
def execute(self, context): | ||
print(f"Variable defined at top level of dag has value: {value}") | ||
|
||
|
||
with DAG(dag_id="example_get_variable_using_task_sdk") as dag: | ||
CustomOperator(task_id="print_top_level_variable") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,12 +21,12 @@ | |
import structlog | ||
|
||
from airflow.sdk.exceptions import AirflowRuntimeError, ErrorType | ||
from airflow.sdk.execution_time.variable import _get_variable | ||
from airflow.sdk.types import NOTSET | ||
|
||
if TYPE_CHECKING: | ||
from airflow.sdk.definitions.connection import Connection | ||
from airflow.sdk.definitions.variable import Variable | ||
from airflow.sdk.execution_time.comms import ConnectionResult, VariableResult | ||
from airflow.sdk.execution_time.comms import ConnectionResult | ||
|
||
|
||
def _convert_connection_result_conn(conn_result: ConnectionResult) -> Connection: | ||
|
@@ -36,16 +36,6 @@ def _convert_connection_result_conn(conn_result: ConnectionResult) -> Connection | |
return Connection(**conn_result.model_dump(exclude={"type"}, by_alias=True)) | ||
|
||
|
||
def _convert_variable_result_to_variable(var_result: VariableResult, deserialize_json: bool) -> Variable: | ||
from airflow.sdk.definitions.variable import Variable | ||
|
||
if deserialize_json: | ||
import json | ||
|
||
var_result.value = json.loads(var_result.value) # type: ignore | ||
return Variable(**var_result.model_dump(exclude={"type"})) | ||
|
||
|
||
def _get_connection(conn_id: str) -> Connection: | ||
# TODO: This should probably be moved to a separate module like `airflow.sdk.execution_time.comms` | ||
# or `airflow.sdk.execution_time.connection` | ||
|
@@ -66,26 +56,6 @@ def _get_connection(conn_id: str) -> Connection: | |
return _convert_connection_result_conn(msg) | ||
|
||
|
||
def _get_variable(key: str, deserialize_json: bool) -> Variable: | ||
# TODO: This should probably be moved to a separate module like `airflow.sdk.execution_time.comms` | ||
# or `airflow.sdk.execution_time.variable` | ||
# A reason to not move it to `airflow.sdk.execution_time.comms` is that it | ||
# will make that module depend on Task SDK, which is not ideal because we intend to | ||
# keep Task SDK as a separate package than execution time mods. | ||
from airflow.sdk.execution_time.comms import ErrorResponse, GetVariable | ||
from airflow.sdk.execution_time.task_runner import SUPERVISOR_COMMS | ||
|
||
log = structlog.get_logger(logger_name="task") | ||
SUPERVISOR_COMMS.send_request(log=log, msg=GetVariable(key=key)) | ||
msg = SUPERVISOR_COMMS.get_message() | ||
if isinstance(msg, ErrorResponse): | ||
raise AirflowRuntimeError(msg) | ||
|
||
if TYPE_CHECKING: | ||
assert isinstance(msg, VariableResult) | ||
return _convert_variable_result_to_variable(msg, deserialize_json) | ||
Comment on lines
-69
to
-86
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is the right time to move these helpers to |
||
|
||
|
||
class ConnectionAccessor: | ||
"""Wrapper to access Connection entries in template.""" | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING | ||
|
||
import structlog | ||
|
||
from airflow.sdk.exceptions import AirflowRuntimeError | ||
|
||
if TYPE_CHECKING: | ||
from airflow.sdk.definitions.variable import Variable | ||
from airflow.sdk.execution_time.comms import VariableResult | ||
|
||
|
||
def _convert_variable_result_to_variable(var_result: VariableResult, deserialize_json: bool) -> Variable: | ||
from airflow.sdk.definitions.variable import Variable | ||
|
||
if deserialize_json: | ||
import json | ||
|
||
var_result.value = json.loads(var_result.value) # type: ignore | ||
return Variable(**var_result.model_dump(exclude={"type"})) | ||
|
||
|
||
def _get_variable(key: str, deserialize_json: bool = False) -> Variable: | ||
from airflow.sdk.execution_time.comms import ErrorResponse, GetVariable | ||
|
||
try: | ||
# We check the hypothesis if the request for variable came from task. | ||
from airflow.sdk.execution_time.task_runner import SUPERVISOR_COMMS as COMMS # type: ignore | ||
except ImportError: | ||
# If not, hypothesis is false and this request is from dag level. | ||
from airflow.dag_processing.processor import COMMS_DECODER as COMMS # type: ignore | ||
|
||
log = structlog.get_logger(logger_name="task") | ||
COMMS.send_request(log=log, msg=GetVariable(key=key)) | ||
msg = COMMS.get_message() | ||
if isinstance(msg, ErrorResponse): | ||
raise AirflowRuntimeError(msg) | ||
|
||
if TYPE_CHECKING: | ||
assert isinstance(msg, VariableResult) | ||
return _convert_variable_result_to_variable(msg, deserialize_json) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We wont really need this, because for cases of variables, connecitons, we will have to interact with the DB model directly. If we go to
super(). _handle_request
, it brings the SDK API client into picture, which shouldn't be needed for DAG level stuff