-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix materialize + S3 not working. (#734)
* Fix materialize + S3 not working. Existing code was relying on python pathlib to do most operations rather than running them through the pyarrow filesystem. Fix that, and add a test with a fake pyarrow filesystem to make sure we run with a path that does not exist in the local filesystem. * Also make AutoMaterialize work with pyarrow non-local filesystems.
- Loading branch information
1 parent
c989a63
commit 72c33ae
Showing
4 changed files
with
255 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import io | ||
|
||
from pyarrow.fs import FileSystem, FileSelector, FileInfo, FileType | ||
|
||
|
||
class InMemPyArrowFileSystem(FileSystem): | ||
def __init__(self): | ||
self._fs = {} | ||
|
||
def copy_file(self, src, dest): | ||
raise NotImplementedError() | ||
|
||
def create_dir(self, path, *, recursive=True): | ||
assert isinstance(path, str) | ||
# We're blob-like, so create dir is noop. | ||
pass | ||
|
||
def delete_dir(self, path): | ||
raise NotImplementedError() | ||
|
||
def delete_dir_contents(self, path, missing_dir_ok=False): | ||
assert isinstance(path, str) | ||
assert missing_dir_ok, "unimplemented" | ||
path = path + "/" | ||
todelete = [] | ||
for k, v in self._fs.items(): | ||
if k.startswith(path): | ||
todelete.append(k) | ||
|
||
for k in todelete: | ||
del self._fs[k] | ||
|
||
def delete_file(self, path): | ||
assert isinstance(path, str) | ||
assert path in self._fs | ||
del self._fs[path] | ||
|
||
def equals(self, other): | ||
raise NotImplementedError() | ||
|
||
def get_file_info(self, p): | ||
if isinstance(p, str): | ||
if p not in self._fs: | ||
return FileInfo(str(p)) | ||
|
||
# API docs claim we can leave mtime & size as None | ||
return FileInfo(str(p), type=FileType.File) | ||
|
||
assert isinstance(p, FileSelector) | ||
assert p.allow_not_found, "unimplemented" | ||
assert p.recursive, "unimplemented" | ||
dir = p.base_dir + "/" | ||
len(dir) | ||
ret = [] | ||
for k, v in self._fs.items(): | ||
if not k.startswith(dir): | ||
continue | ||
ret.append(FileInfo(str(k), type=FileType.File)) | ||
|
||
return ret | ||
|
||
def move(self, src, dest): | ||
raise NotImplementedError() | ||
|
||
def normalize_path(self, path): | ||
raise NotImplementedError() | ||
|
||
def open_append_stream(self, path): | ||
raise NotImplementedError() | ||
|
||
def open_input_file(self, path): | ||
raise NotImplementedError() | ||
|
||
def open_input_stream(self, path): | ||
assert isinstance(path, str) | ||
assert path in self._fs | ||
f = self._fs[path] | ||
assert isinstance(f, bytes) | ||
return io.BytesIO(f) | ||
|
||
def open_output_stream(self, path): | ||
class OpenFile(io.BytesIO): | ||
def __init__(self, fs, name): | ||
self._fs = fs | ||
self._name = name | ||
super().__init__() | ||
|
||
def close(self): | ||
assert isinstance(self._fs[self._name], OpenFile) | ||
self._fs[self._name] = self.getvalue() | ||
super().close() | ||
|
||
assert isinstance(path, str) | ||
assert path not in self._fs, "overwrite unimplemented" | ||
self._fs[path] = OpenFile(self._fs, path) | ||
return self._fs[path] |
Oops, something went wrong.