flytekit.types.schema.types
| Property |
Type |
Description |
MESSAGEPACK |
str |
|
T |
TypeVar |
|
def generate_ordered_files(
directory: os.PathLike,
n: int,
) -> typing.Generator[str, None, None]
| Parameter |
Type |
Description |
directory |
os.PathLike |
|
n |
int |
|
class FlyteSchema(
local_path: typing.Optional[str],
remote_path: typing.Optional[str],
supported_mode: SchemaOpenMode,
downloader: typing.Optional[typing.Callable],
)
| Parameter |
Type |
Description |
local_path |
typing.Optional[str] |
|
remote_path |
typing.Optional[str] |
|
supported_mode |
SchemaOpenMode |
|
downloader |
typing.Optional[typing.Callable] |
|
| Property |
Type |
Description |
local_path |
None |
|
supported_mode |
None |
|
def deserialize_flyte_schema(
info,
) -> FlyteSchema
| Parameter |
Type |
Description |
info |
|
|
def from_dict(
d,
dialect,
)
| Parameter |
Type |
Description |
d |
|
|
dialect |
|
|
def from_json(
data: typing.Union[str, bytes, bytearray],
decoder: collections.abc.Callable[[typing.Union[str, bytes, bytearray]], dict[typing.Any, typing.Any]],
from_dict_kwargs: typing.Any,
) -> ~T
| Parameter |
Type |
Description |
data |
typing.Union[str, bytes, bytearray] |
|
decoder |
collections.abc.Callable[[typing.Union[str, bytes, bytearray]], dict[typing.Any, typing.Any]] |
|
from_dict_kwargs |
typing.Any |
|
def open(
dataframe_fmt: typing.Optional[type],
override_mode: typing.Optional[SchemaOpenMode],
) -> typing.Union[SchemaReader, SchemaWriter]
Returns a reader or writer depending on the mode of the object when created. This mode can be
overridden, but will depend on whether the override can be performed. For example, if the Object was
created in a read-mode a “write mode” override is not allowed.
if the object was created in write-mode, a read is allowed.
| Parameter |
Type |
Description |
dataframe_fmt |
typing.Optional[type] |
Type of the dataframe for example pandas.DataFrame etc |
override_mode |
typing.Optional[SchemaOpenMode] |
overrides the default mode (Read, Write) SchemaOpenMode.READ, SchemaOpenMode.Write So if you have written to a schema and want to re-open it for reading, you can use this mode. A ReadOnly Schema object cannot be opened in write mode. |
def serialize_flyte_schema()
def to_json(
encoder: collections.abc.Callable[[typing.Any], typing.Union[str, bytes, bytearray]],
to_dict_kwargs: typing.Any,
) -> typing.Union[str, bytes, bytearray]
| Parameter |
Type |
Description |
encoder |
collections.abc.Callable[[typing.Any], typing.Union[str, bytes, bytearray]] |
|
to_dict_kwargs |
typing.Any |
|
def FlyteSchemaTransformer()
| Property |
Type |
Description |
is_async |
None |
|
name |
None |
|
python_type |
None |
This returns the python type |
type_assertions_enabled |
None |
Indicates if the transformer wants type assertions to be enabled at the core type engine layer |
def assert_type(
t: Type[FlyteSchema],
v: typing.Any,
)
| Parameter |
Type |
Description |
t |
Type[FlyteSchema] |
|
v |
typing.Any |
|
def async_to_literal(
ctx: FlyteContext,
python_val: FlyteSchema,
python_type: Type[FlyteSchema],
expected: LiteralType,
) -> Literal
Converts a given python_val to a Flyte Literal, assuming the given python_val matches the declared python_type.
Implementers should refrain from using type(python_val) instead rely on the passed in python_type. If these
do not match (or are not allowed) the Transformer implementer should raise an AssertionError, clearly stating
what was the mismatch
| Parameter |
Type |
Description |
ctx |
FlyteContext |
A FlyteContext, useful in accessing the filesystem and other attributes |
python_val |
FlyteSchema |
The actual value to be transformed |
python_type |
Type[FlyteSchema] |
The assumed type of the value (this matches the declared type on the function) |
expected |
LiteralType |
Expected Literal Type |
def async_to_python_value(
ctx: FlyteContext,
lv: Literal,
expected_python_type: Type[FlyteSchema],
) -> FlyteSchema
Converts the given Literal to a Python Type. If the conversion cannot be done an AssertionError should be raised
| Parameter |
Type |
Description |
ctx |
FlyteContext |
FlyteContext |
lv |
Literal |
The received literal Value |
expected_python_type |
Type[FlyteSchema] |
Expected native python type that should be returned |
def dict_to_flyte_schema(
dict_obj: typing.Dict[str, str],
expected_python_type: Type[FlyteSchema],
) -> FlyteSchema
| Parameter |
Type |
Description |
dict_obj |
typing.Dict[str, str] |
|
expected_python_type |
Type[FlyteSchema] |
|
def from_binary_idl(
binary_idl_object: Binary,
expected_python_type: Type[FlyteSchema],
) -> FlyteSchema
If the input is from flytekit, the Life Cycle will be as follows:
Life Cycle:
binary IDL -> resolved binary -> bytes -> expected Python object
(flytekit customized (propeller processing) (flytekit binary IDL) (flytekit customized
serialization) deserialization)
Example Code:
@dataclass
class DC:
fs: FlyteSchema
@workflow
def wf(dc: DC):
t_fs(dc.fs)
Note:
- The deserialization is the same as put a flyte schema in a dataclass, which will deserialize by the mashumaro’s API.
Related PR:
| Parameter |
Type |
Description |
binary_idl_object |
Binary |
|
expected_python_type |
Type[FlyteSchema] |
|
def from_generic_idl(
generic: Struct,
expected_python_type: Type[FlyteSchema],
) -> FlyteSchema
If the input is from Flyte Console, the Life Cycle will be as follows:
Life Cycle:
json str -> protobuf struct -> resolved protobuf struct -> expected Python object
(console user input) (console output) (propeller) (flytekit customized deserialization)
Example Code:
@dataclass
class DC:
fs: FlyteSchema
@workflow
def wf(dc: DC):
t_fs(dc.fs)
Note:
- The deserialization is the same as put a flyte schema in a dataclass, which will deserialize by the mashumaro’s API.
Related PR:
| Parameter |
Type |
Description |
generic |
Struct |
|
expected_python_type |
Type[FlyteSchema] |
|
def get_literal_type(
t: Type[FlyteSchema],
) -> LiteralType
Converts the python type to a Flyte LiteralType
| Parameter |
Type |
Description |
t |
Type[FlyteSchema] |
|
def guess_python_type(
literal_type: LiteralType,
) -> Type[FlyteSchema]
Converts the Flyte LiteralType to a python object type.
| Parameter |
Type |
Description |
literal_type |
LiteralType |
|
def isinstance_generic(
obj,
generic_alias,
)
| Parameter |
Type |
Description |
obj |
|
|
generic_alias |
|
|
def schema_match(
schema: dict,
) -> bool
Check if a JSON schema fragment matches this transformer’s python_type.
For BaseModel subclasses, automatically compares the schema’s title, type, and
required fields against the type’s own JSON schema. For other types, returns
False by default — override if needed.
| Parameter |
Type |
Description |
schema |
dict |
|
def to_html(
ctx: FlyteContext,
python_val: T,
expected_python_type: Type[T],
) -> str
Converts any python val (dataframe, int, float) to a html string, and it will be wrapped in the HTML div
| Parameter |
Type |
Description |
ctx |
FlyteContext |
|
python_val |
T |
|
expected_python_type |
Type[T] |
|
def to_literal(
ctx: FlyteContext,
python_val: typing.Any,
python_type: Type[T],
expected: LiteralType,
) -> Literal
Converts a given python_val to a Flyte Literal, assuming the given python_val matches the declared python_type.
Implementers should refrain from using type(python_val) instead rely on the passed in python_type. If these
do not match (or are not allowed) the Transformer implementer should raise an AssertionError, clearly stating
what was the mismatch
| Parameter |
Type |
Description |
ctx |
FlyteContext |
A FlyteContext, useful in accessing the filesystem and other attributes |
python_val |
typing.Any |
The actual value to be transformed |
python_type |
Type[T] |
The assumed type of the value (this matches the declared type on the function) |
expected |
LiteralType |
Expected Literal Type |
def to_python_value(
ctx: FlyteContext,
lv: Literal,
expected_python_type: Type[T],
) -> Optional[T]
Converts the given Literal to a Python Type. If the conversion cannot be done an AssertionError should be raised
| Parameter |
Type |
Description |
ctx |
FlyteContext |
FlyteContext |
lv |
Literal |
The received literal Value |
expected_python_type |
Type[T] |
Expected native python type that should be returned |
class LocalIOSchemaReader(
from_path: str,
cols: typing.Optional[typing.Dict[str, type]],
fmt: SchemaFormat,
)
| Parameter |
Type |
Description |
from_path |
str |
|
cols |
typing.Optional[typing.Dict[str, type]] |
|
fmt |
SchemaFormat |
|
| Property |
Type |
Description |
column_names |
None |
|
from_path |
None |
|
| Parameter |
Type |
Description |
kwargs |
**kwargs |
|
def iter(
kwargs,
) -> typing.Generator[T, None, None]
| Parameter |
Type |
Description |
kwargs |
**kwargs |
|
class LocalIOSchemaWriter(
to_local_path: str,
cols: typing.Optional[typing.Dict[str, type]],
fmt: SchemaFormat,
)
| Parameter |
Type |
Description |
to_local_path |
str |
|
cols |
typing.Optional[typing.Dict[str, type]] |
|
fmt |
SchemaFormat |
|
| Property |
Type |
Description |
column_names |
None |
|
to_path |
None |
|
def write(
dfs,
kwargs,
)
| Parameter |
Type |
Description |
dfs |
|
|
kwargs |
**kwargs |
|
This is the core Engine that handles all schema sub-systems. All schema types needs to be registered with this
to allow direct support for that type in FlyteSchema.
e.g. of possible supported types are Pandas.DataFrame, Spark.DataFrame, Vaex.DataFrame, etc.
def get_handler(
t: Type,
) -> SchemaHandler
| Parameter |
Type |
Description |
t |
Type |
|
def register_handler(
h: SchemaHandler,
)
Register a new handler that can create a SchemaReader and SchemaWriter for the expected type.
| Parameter |
Type |
Description |
h |
SchemaHandler |
|
Represents the schema storage format (at rest).
Currently only parquet is supported
class SchemaHandler(
name: str,
object_type: Type,
reader: Type[SchemaReader],
writer: Type[SchemaWriter],
handles_remote_io: bool,
)
| Parameter |
Type |
Description |
name |
str |
|
object_type |
Type |
|
reader |
Type[SchemaReader] |
|
writer |
Type[SchemaWriter] |
|
handles_remote_io |
bool |
|
Base SchemaReader to handle any readers (that can manage their own IO or otherwise)
Use the simplified base LocalIOSchemaReader for non distributed dataframes
class SchemaReader(
from_path: str,
cols: typing.Optional[typing.Dict[str, type]],
fmt: SchemaFormat,
)
| Parameter |
Type |
Description |
from_path |
str |
|
cols |
typing.Optional[typing.Dict[str, type]] |
|
fmt |
SchemaFormat |
|
| Property |
Type |
Description |
column_names |
None |
|
from_path |
None |
|
| Parameter |
Type |
Description |
kwargs |
**kwargs |
|
def iter(
kwargs,
) -> typing.Generator[T, None, None]
| Parameter |
Type |
Description |
kwargs |
**kwargs |
|
class SchemaWriter(
to_path: str,
cols: typing.Optional[typing.Dict[str, type]],
fmt: SchemaFormat,
)
| Parameter |
Type |
Description |
to_path |
str |
|
cols |
typing.Optional[typing.Dict[str, type]] |
|
fmt |
SchemaFormat |
|
| Property |
Type |
Description |
column_names |
None |
|
to_path |
None |
|
def write(
dfs,
kwargs,
)
| Parameter |
Type |
Description |
dfs |
|
|
kwargs |
**kwargs |
|