File List HWM#

class etl_entities.old_hwm.file_list_hwm.FileListHWM(*, source: <pydantic.v1.fields.DeferredType object at 0x7ffb11e24170>, value: ~typing.FrozenSet[~etl_entities.instance.path.relative_path.RelativePath] = None, modified_time: <pydantic.v1.fields.DeferredType object at 0x7ffb11e24440> = None, process: <pydantic.v1.fields.DeferredType object at 0x7ffb11e24560> = None)#

File List HWM type

Deprecated since version 2.0.0: Use etl_entities.hwm.file.file_list_hwm.FileListHWM instead

Parameters:
sourceetl_entities.instance.path.remote_folder.RemoteFolder

Folder instance

valuefrozenset of pathlib.PosixPath, default: empty set

HWM value

modified_timedatetime.datetime, default: current datetime

HWM value modification time

processetl_entities.process.process.Process, default: current process

Process instance

Examples

from etl_entities.old_hwm import FileListHWM
from etl_entities.source import RemoteFolder

folder = RemoteFolder(name="/absolute/path", instance="ftp://ftp.server:21")

old_hwm = FileListHWM(
    source=folder,
    value=["some/path", "another.file"],
)
__abs__() frozenset[AbsolutePath]#

Returns set of files with absolute paths

Returns:
resultfrosenzet of pathlib.PosixPath

Copy of HWM with updated value

Examples

from etl_entities.old_hwm import FileListHWM
from etl_entities.source import RemoteFolder
from etl_entities.instance import AbsolutePath

old_hwm = FileListHWM(
    value=["some/path"], source=RemoteFolder(name="/absolute/path", ...), ...
)

assert abs(old_hwm) == frozenset(AbsolutePath("/absolute/path/some/path"))
__add__(value: str | PathLike | Iterable[str | PathLike])#

Adds path or paths to HWM value, and return copy of HWM

Parameters:
valuestr or pathlib.PosixPath or typing.Iterable of them

Path or collection of paths to be added to value

Returns:
resultFileListHWM

HWM copy with new value

Examples

from etl_entities.old_hwm import FileListHWM

hwm1 = FileListHWM(value=["some/path"], ...)
hwm2 = FileListHWM(value=["some/path", "another.file"], ...)

assert hwm1 + "another.file" == hwm2
# same as FileListHWM(value=hwm1.value + "another.file", ...)
__bool__()#

Check if HWM value is set

Returns:
resultbool

False if value is empty, True otherwise

Examples

from etl_entities.old_hwm import FileListHWM

old_hwm = FileListHWM(value=["some/path.py"], ...)
assert old_hwm  # same as bool(old_hwm.value)

old_hwm = FileListHWM(value=[], ...)
assert not old_hwm
__contains__(item)#

Checks if path is present in value

Returns:
resultbool

True if path is present in value, False otherwise

Examples

from etl_entities.old_hwm import FileListHWM
from etl_entities.source import RemoteFolder
from etl_entities.instance import AbsolutePath

old_hwm = FileListHWM(
    value=["some/path"], source=Folder(name="/absolute/path", ...), ...
)

assert "some/path" in old_hwm
assert "/absolute/path/some/path" in old_hwm
__iter__()#

Iterate over files in FileListHWM.

Returns:
resultIterator[RelativePath]

Files in HWM, order is not preserved

Examples

from etl_entities.old_hwm import FileListHWM
from etl_entities.instance import RelativePath

hwm1 = FileListHWM(value=["some", "another"], ...)
hwm2 = FileListHWM(value=[], ...)

assert set(hwm1) == {RelativePath("some"), RelativePath("another")}
assert set(hwm2) == set()
__len__()#

Return number of files in the HWM.

Returns:
resultint

Number of files

Examples

from etl_entities.old_hwm import FileListHWM

hwm1 = FileListHWM(value=["some", "another"], ...)
hwm2 = FileListHWM(value=[], ...)

assert len(hwm1) == 2
assert len(hwm2) == 0
__sub__(value: str | PathLike | Iterable[str | PathLike])#

Remove path or paths from HWM value, and return copy of HWM

Parameters:
valuestr or pathlib.PosixPath or typing.Iterable of them

Path or collection of paths to be added to value

Returns:
resultFileListHWM

HWM copy with new value

Examples

from etl_entities.old_hwm import FileListHWM

hwm1 = FileListHWM(value=["some/path"], ...)
hwm2 = FileListHWM(value=["some/path", "another.file"], ...)

assert hwm1 - "another.file" == hwm2
# same as FileListHWM(value=hwm1.value - "another.file", ...)
copy(*, include: AbstractSetIntStr | MappingIntStrAny | None = None, exclude: AbstractSetIntStr | MappingIntStrAny | None = None, update: DictStrAny | None = None, deep: bool = False) Model#

Duplicate a model, optionally choose which fields to include, exclude and change.

Parameters:
  • include – fields to include in new model

  • exclude – fields to exclude from new model, as with values this takes precedence over include

  • update – values to change/add in the new model. Note: the data is not validated before creating the new model: you should trust this data

  • deep – set to True to make a deep copy of the model

Returns:

new model instance

covers(value: str | PathLike) bool#

Return True if input value is already covered by HWM

Examples

from etl_entities.old_hwm import FileListHWM

old_hwm = FileListHWM(value=["some/path.py"], ...)

assert old_hwm.covers("some/path.py")  # path in HWM
assert not old_hwm.covers("another/path.py")  # path not in HWM
classmethod deserialize(inp: dict)#

Return HWM from dict representation

Returns:
resultHWM

Deserialized HWM

Examples

from etl_entities.old_hwm import IntHWM

assert IntHWM.deserialize(
    {
        "value": "1",
        "type": "int",
        "column": {"name": ..., "partition": ...},
        "source": ...,
        "process": ...,
    }
) == IntHWM(value=1, ...)

IntHWM.deserialize({"type": "date"})  # raises ValueError
dict(*, include: AbstractSetIntStr | MappingIntStrAny | None = None, exclude: AbstractSetIntStr | MappingIntStrAny | None = None, by_alias: bool = False, skip_defaults: bool | None = None, exclude_unset: bool = False, exclude_defaults: bool = False, exclude_none: bool = False) DictStrAny#

Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.

json(*, include: AbstractSetIntStr | MappingIntStrAny | None = None, exclude: AbstractSetIntStr | MappingIntStrAny | None = None, by_alias: bool = False, skip_defaults: bool | None = None, exclude_unset: bool = False, exclude_defaults: bool = False, exclude_none: bool = False, encoder: Callable[[Any], Any] | None = None, models_as_dict: bool = True, **dumps_kwargs: Any) str#

Generate a JSON representation of the model, include and exclude arguments as per dict().

encoder is an optional function to supply as default to json.dumps(), other arguments as per json.dumps().

property name: str#

Name of HWM

Returns:
valuestr

Static value "file_list"

property qualified_name: str#

Unique name of HWM

serialize() dict#

Return dict representation of HWM

Returns:
resultdict

Serialized HWM

Examples

from etl_entities.old_hwm import IntHWM

old_hwm = IntHWM(value=1, ...)
assert old_hwm.serialize() == {
    "value": "1",
    "type": "int",
    "column": {"name": ..., "partition": ...},
    "source": ...,
    "process": ...,
}
set_value(value: ValueType) HWM#

Replaces current HWM value with the passed one, and return HWM.

Note

Changes HWM value in place instead of returning new one

Returns:
resultHWM

Self

Examples

from etl_entities.old_hwm import IntHWM

old_hwm = IntHWM(value=1, ...)

old_hwm.set_value(2)
assert old_hwm.value == 2
update(value: str | PathLike | Iterable[str | PathLike])#

Updates current HWM value with some implementation-specific logic, and return HWM.

Note

Changes HWM value in place

Returns:
resultFileListHWM

Self

Examples

from etl_entities.old_hwm import FileListHWM

old_hwm = FileListHWM(value=["some/existing_path.py"], ...)

# new paths are appended
old_hwm.update("some/new_path.py")
assert old_hwm.value == [
    "some/existing_path.py",
    "some/new_path.py",
]

# existing paths do nothing
old_hwm.update("some/existing_path.py")
assert old_hwm.value == [
    "some/existing_path.py",
    "some/new_path.py",
]