File List HWM#
- class etl_entities.hwm.file_list_hwm.FileListHWM(*, source: <pydantic.fields.DeferredType object at 0x7f7824a96940>, value: ~typing.FrozenSet[~etl_entities.instance.path.relative_path.RelativePath] = None, modified_time: <pydantic.fields.DeferredType object at 0x7f7824a96af0> = None, process: <pydantic.fields.DeferredType object at 0x7f7824a96bb0> = None)#
File List HWM type
- Parameters:
- source
etl_entities.instance.path.remote_folder.RemoteFolder
Folder instance
- value
frozenset
ofpathlib.PosixPath
, default: empty set HWM value
- modified_time
datetime.datetime
, default: current datetime HWM value modification time
- process
etl_entities.process.process.Process
, default: current process Process instance
- source
Examples
from etl_entities import FileListHWM, RemoteFolder folder = RemoteFolder(name="/absolute/path", instance="ftp://ftp.server:21") hwm = FileListHWM( source=folder, value=["some/path", "another.file"], )
- Attributes:
name
Name of HWM
qualified_name
Unique name of HWM
Methods
copy
(*[, include, exclude, update, deep])Duplicate a model, optionally choose which fields to include, exclude and change.
covers
(value)Return
True
if input value is already covered by HWMdeserialize
(inp)Return HWM from dict representation
dict
(*[, include, exclude, by_alias, ...])Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
json
(*[, include, exclude, by_alias, ...])Generate a JSON representation of the model, include and exclude arguments as per dict().
Return dict representation of HWM
set_value
(value)Replaces current HWM value with the passed one, and return HWM.
update
(value)Updates current HWM value with some implementation-specific logic, and return HWM.
- __abs__() frozenset[AbsolutePath] #
Returns set of files with absolute paths
- Returns:
- result
frosenzet
ofpathlib.PosixPath
Copy of HWM with updated value
- result
Examples
from etl_entities import FileListHWM, Folder, AbsolutePath hwm = FileListHWM(value=["some/path"], source=Folder(name="/absolute/path", ...), ...) assert abs(hwm) == frozenset(AbsolutePath("/absolute/path/some/path"))
- __add__(value: str | os.PathLike | Iterable[str | os.PathLike])#
Adds path or paths to HWM value, and return copy of HWM
- Returns:
- resultFileListHWM
HWM copy with new value
Examples
from etl_entities import FileListHWM hwm1 = FileListHWM(value=["some/path"], ...) hwm2 = FileListHWM(value=["some/path", "another.file"], ...) assert hwm1 + "another.file" == hwm2 # same as FileListHWM(value=hwm1.value + "another.file", ...)
- __bool__()#
Check if HWM value is set
- Returns:
- resultbool
False
ifvalue
is empty,True
otherwise
Examples
from etl_entities import FileListHWM hwm = FileListHWM(value=["some/path.py"], ...) assert hwm # same as bool(hwm.value) hwm = FileListHWM(value=[], ...) assert not hwm
- __iter__()#
Iterate over files in FileListHWM.
- Returns:
- resultIterator[RelativePath]
Files in HWM, order is not preserved
Examples
from etl_entities import FileListHWM, RelativePath hwm1 = FileListHWM(value=["some", "another"], ...) hwm2 = FileListHWM(value=[], ...) assert set(hwm1) == {RelativePath("some"), RelativePath("another")} assert set(hwm2) == set()
- __len__()#
Return number of files in the HWM.
- Returns:
- resultint
Number of files
Examples
from etl_entities import FileListHWM hwm1 = FileListHWM(value=["some", "another"], ...) hwm2 = FileListHWM(value=[], ...) assert len(hwm1) == 2 assert len(hwm2) == 0
- __sub__(value: str | os.PathLike | Iterable[str | os.PathLike])#
Remove path or paths from HWM value, and return copy of HWM
- Returns:
- resultFileListHWM
HWM copy with new value
Examples
from etl_entities import FileListHWM hwm1 = FileListHWM(value=["some/path"], ...) hwm2 = FileListHWM(value=["some/path", "another.file"], ...) assert hwm1 - "another.file" == hwm2 # same as FileListHWM(value=hwm1.value - "another.file", ...)
- copy(*, include: AbstractSetIntStr | MappingIntStrAny | None = None, exclude: AbstractSetIntStr | MappingIntStrAny | None = None, update: DictStrAny | None = None, deep: bool = False) Model #
Duplicate a model, optionally choose which fields to include, exclude and change.
- Parameters:
include – fields to include in new model
exclude – fields to exclude from new model, as with values this takes precedence over include
update – values to change/add in the new model. Note: the data is not validated before creating the new model: you should trust this data
deep – set to True to make a deep copy of the model
- Returns:
new model instance
- covers(value: str | os.PathLike) bool #
Return
True
if input value is already covered by HWMExamples
from etl_entities import FileListHWM hwm = FileListHWM(value=["some/path.py"], ...) assert hwm.covers("some/path.py") # path in HWM assert not hwm.covers("another/path.py") # path not in HWM
- classmethod deserialize(inp: dict)#
Return HWM from dict representation
- Returns:
- resultHWM
Deserialized HWM
Examples
from etl_entities import IntHWM assert IntHWM.deserialize( { "value": "1", "type": "int", "column": {"name": ..., "partition": ...}, "source": ..., "process": ..., } ) == IntHWM(value=1, ...) IntHWM.deserialize({"type": "date"}) # raises ValueError
- dict(*, include: AbstractSetIntStr | MappingIntStrAny | None = None, exclude: AbstractSetIntStr | MappingIntStrAny | None = None, by_alias: bool = False, skip_defaults: bool | None = None, exclude_unset: bool = False, exclude_defaults: bool = False, exclude_none: bool = False) DictStrAny #
Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
- json(*, include: AbstractSetIntStr | MappingIntStrAny | None = None, exclude: AbstractSetIntStr | MappingIntStrAny | None = None, by_alias: bool = False, skip_defaults: bool | None = None, exclude_unset: bool = False, exclude_defaults: bool = False, exclude_none: bool = False, encoder: Callable[[Any], Any] | None = None, models_as_dict: bool = True, **dumps_kwargs: Any) unicode #
Generate a JSON representation of the model, include and exclude arguments as per dict().
encoder is an optional function to supply as default to json.dumps(), other arguments as per json.dumps().
- property name: str#
Name of HWM
- Returns:
- valuestr
Static value
"file_list"
- property qualified_name: str#
Unique name of HWM
- serialize() dict #
Return dict representation of HWM
- Returns:
- resultdict
Serialized HWM
Examples
from etl_entities import IntHWM hwm = IntHWM(value=1, ...) assert hwm.serialize() == { "value": "1", "type": "int", "column": {"name": ..., "partition": ...}, "source": ..., "process": ..., }
- set_value(value: ValueType) HWM #
Replaces current HWM value with the passed one, and return HWM.
Note
Changes HWM value in place instead of returning new one
- Returns:
- resultHWM
Self
Examples
from etl_entities import IntHWM hwm = IntHWM(value=1, ...) hwm.set_value(2) assert hwm.value == 2
- update(value: str | os.PathLike | Iterable[str | os.PathLike])#
Updates current HWM value with some implementation-specific logic, and return HWM.
Note
Changes HWM value in place
- Returns:
- resultFileListHWM
Self
Examples
from etl_entities import FileListHWM hwm = FileListHWM(value=["some/existing_path.py"], ...) # new paths are appended hwm.update("some/new_path.py") assert hwm.value == [ "some/existing_path.py", "some/new_path.py", ] # existing paths do nothing hwm.update("some/existing_path.py") assert hwm.value == [ "some/existing_path.py", "some/new_path.py", ]