File List HWM#

class etl_entities.hwm.file_list_hwm.FileListHWM(*, source: <pydantic.fields.DeferredType object at 0x7f7824a96940>, value: ~typing.FrozenSet[~etl_entities.instance.path.relative_path.RelativePath] = None, modified_time: <pydantic.fields.DeferredType object at 0x7f7824a96af0> = None, process: <pydantic.fields.DeferredType object at 0x7f7824a96bb0> = None)#

File List HWM type

Parameters:
sourceetl_entities.instance.path.remote_folder.RemoteFolder

Folder instance

valuefrozenset of pathlib.PosixPath, default: empty set

HWM value

modified_timedatetime.datetime, default: current datetime

HWM value modification time

processetl_entities.process.process.Process, default: current process

Process instance

Examples

from etl_entities import FileListHWM, RemoteFolder

folder = RemoteFolder(name="/absolute/path", instance="ftp://ftp.server:21")

hwm = FileListHWM(
    source=folder,
    value=["some/path", "another.file"],
)
Attributes:
name

Name of HWM

qualified_name

Unique name of HWM

Methods

copy(*[, include, exclude, update, deep])

Duplicate a model, optionally choose which fields to include, exclude and change.

covers(value)

Return True if input value is already covered by HWM

deserialize(inp)

Return HWM from dict representation

dict(*[, include, exclude, by_alias, ...])

Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.

json(*[, include, exclude, by_alias, ...])

Generate a JSON representation of the model, include and exclude arguments as per dict().

serialize()

Return dict representation of HWM

set_value(value)

Replaces current HWM value with the passed one, and return HWM.

update(value)

Updates current HWM value with some implementation-specific logic, and return HWM.

__abs__() frozenset[AbsolutePath]#

Returns set of files with absolute paths

Returns:
resultfrosenzet of pathlib.PosixPath

Copy of HWM with updated value

Examples

from etl_entities import FileListHWM, Folder, AbsolutePath

hwm = FileListHWM(value=["some/path"], source=Folder(name="/absolute/path", ...), ...)

assert abs(hwm) == frozenset(AbsolutePath("/absolute/path/some/path"))
__add__(value: str | os.PathLike | Iterable[str | os.PathLike])#

Adds path or paths to HWM value, and return copy of HWM

Returns:
resultFileListHWM

HWM copy with new value

Examples

from etl_entities import FileListHWM

hwm1 = FileListHWM(value=["some/path"], ...)
hwm2 = FileListHWM(value=["some/path", "another.file"], ...)

assert hwm1 + "another.file" == hwm2
# same as FileListHWM(value=hwm1.value + "another.file", ...)
__bool__()#

Check if HWM value is set

Returns:
resultbool

False if value is empty, True otherwise

Examples

from etl_entities import FileListHWM

hwm = FileListHWM(value=["some/path.py"], ...)
assert hwm  # same as bool(hwm.value)

hwm = FileListHWM(value=[], ...)
assert not hwm
__iter__()#

Iterate over files in FileListHWM.

Returns:
resultIterator[RelativePath]

Files in HWM, order is not preserved

Examples

from etl_entities import FileListHWM, RelativePath

hwm1 = FileListHWM(value=["some", "another"], ...)
hwm2 = FileListHWM(value=[], ...)

assert set(hwm1) == {RelativePath("some"), RelativePath("another")}
assert set(hwm2) == set()
__len__()#

Return number of files in the HWM.

Returns:
resultint

Number of files

Examples

from etl_entities import FileListHWM

hwm1 = FileListHWM(value=["some", "another"], ...)
hwm2 = FileListHWM(value=[], ...)

assert len(hwm1) == 2
assert len(hwm2) == 0
__sub__(value: str | os.PathLike | Iterable[str | os.PathLike])#

Remove path or paths from HWM value, and return copy of HWM

Returns:
resultFileListHWM

HWM copy with new value

Examples

from etl_entities import FileListHWM

hwm1 = FileListHWM(value=["some/path"], ...)
hwm2 = FileListHWM(value=["some/path", "another.file"], ...)

assert hwm1 - "another.file" == hwm2
# same as FileListHWM(value=hwm1.value - "another.file", ...)
copy(*, include: AbstractSetIntStr | MappingIntStrAny | None = None, exclude: AbstractSetIntStr | MappingIntStrAny | None = None, update: DictStrAny | None = None, deep: bool = False) Model#

Duplicate a model, optionally choose which fields to include, exclude and change.

Parameters:
  • include – fields to include in new model

  • exclude – fields to exclude from new model, as with values this takes precedence over include

  • update – values to change/add in the new model. Note: the data is not validated before creating the new model: you should trust this data

  • deep – set to True to make a deep copy of the model

Returns:

new model instance

covers(value: str | os.PathLike) bool#

Return True if input value is already covered by HWM

Examples

from etl_entities import FileListHWM

hwm = FileListHWM(value=["some/path.py"], ...)

assert hwm.covers("some/path.py")  # path in HWM
assert not hwm.covers("another/path.py")  # path not in HWM
classmethod deserialize(inp: dict)#

Return HWM from dict representation

Returns:
resultHWM

Deserialized HWM

Examples

from etl_entities import IntHWM

assert IntHWM.deserialize(
    {
        "value": "1",
        "type": "int",
        "column": {"name": ..., "partition": ...},
        "source": ...,
        "process": ...,
    }
) == IntHWM(value=1, ...)

IntHWM.deserialize({"type": "date"})  # raises ValueError
dict(*, include: AbstractSetIntStr | MappingIntStrAny | None = None, exclude: AbstractSetIntStr | MappingIntStrAny | None = None, by_alias: bool = False, skip_defaults: bool | None = None, exclude_unset: bool = False, exclude_defaults: bool = False, exclude_none: bool = False) DictStrAny#

Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.

json(*, include: AbstractSetIntStr | MappingIntStrAny | None = None, exclude: AbstractSetIntStr | MappingIntStrAny | None = None, by_alias: bool = False, skip_defaults: bool | None = None, exclude_unset: bool = False, exclude_defaults: bool = False, exclude_none: bool = False, encoder: Callable[[Any], Any] | None = None, models_as_dict: bool = True, **dumps_kwargs: Any) unicode#

Generate a JSON representation of the model, include and exclude arguments as per dict().

encoder is an optional function to supply as default to json.dumps(), other arguments as per json.dumps().

property name: str#

Name of HWM

Returns:
valuestr

Static value "file_list"

property qualified_name: str#

Unique name of HWM

serialize() dict#

Return dict representation of HWM

Returns:
resultdict

Serialized HWM

Examples

from etl_entities import IntHWM

hwm = IntHWM(value=1, ...)
assert hwm.serialize() == {
    "value": "1",
    "type": "int",
    "column": {"name": ..., "partition": ...},
    "source": ...,
    "process": ...,
}
set_value(value: ValueType) HWM#

Replaces current HWM value with the passed one, and return HWM.

Note

Changes HWM value in place instead of returning new one

Returns:
resultHWM

Self

Examples

from etl_entities import IntHWM

hwm = IntHWM(value=1, ...)

hwm.set_value(2)
assert hwm.value == 2
update(value: str | os.PathLike | Iterable[str | os.PathLike])#

Updates current HWM value with some implementation-specific logic, and return HWM.

Note

Changes HWM value in place

Returns:
resultFileListHWM

Self

Examples

from etl_entities import FileListHWM

hwm = FileListHWM(value=["some/existing_path.py"], ...)

# new paths are appended
hwm.update("some/new_path.py")
assert hwm.value == [
    "some/existing_path.py",
    "some/new_path.py",
]

# existing paths do nothing
hwm.update("some/existing_path.py")
assert hwm.value == [
    "some/existing_path.py",
    "some/new_path.py",
]