Skip to content

Commit

Permalink
/vsiaz/: add BLOB_TYPE=BLOCK and CHUNK_SIZE options to VSIFOpenEx2L()
Browse files Browse the repository at this point in the history
to be able to overcome the 195 GiB file size limit when using append
blocks, which is the default mode.
  • Loading branch information
rouault committed May 20, 2024
1 parent d4e5d81 commit 6d91bc9
Show file tree
Hide file tree
Showing 8 changed files with 434 additions and 132 deletions.
124 changes: 124 additions & 0 deletions autotest/gcore/vsiaz.py
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,130 @@ def test_vsiaz_write_appendblob_retry():
gdal.VSIFCloseL(f)


###############################################################################
# Test writing a block blob


def test_vsiaz_write_blockblob_chunk_size_1():

if gdaltest.webserver_port == 0:
pytest.skip()

gdal.VSICurlClearCache()

f = gdal.VSIFOpenExL(
"/vsiaz/test_create/file.bin", "wb", False, ["BLOB_TYPE=BLOCK", "CHUNK_SIZE=1"]
)
assert f is not None

assert gdal.VSIFWriteL(b"x", 1, 1, f) == 1

handler = webserver.SequentialHandler()
handler.add(
"PUT",
"/azure/blob/myaccount/test_create/file.bin?blockid=000000000001&comp=block",
201,
expected_headers={"Content-Length": str(1024 * 1024)},
)

with webserver.install_http_handler(handler):
assert gdal.VSIFWriteL(b"x" * (1024 * 1024 - 1), 1024 * 1024 - 1, 1, f) == 1

assert gdal.VSIFWriteL(b"x", 1, 1, f) == 1

handler = webserver.SequentialHandler()
handler.add(
"PUT",
"/azure/blob/myaccount/test_create/file.bin?blockid=000000000002&comp=block",
201,
expected_headers={"Content-Length": "1"},
)

handler.add(
"PUT",
"/azure/blob/myaccount/test_create/file.bin?comp=blocklist",
201,
expected_body=b'<?xml version="1.0" encoding="utf-8"?>\n<BlockList>\n<Latest>000000000001</Latest>\n<Latest>000000000002</Latest>\n</BlockList>\n',
)

with webserver.install_http_handler(handler):
gdal.VSIFCloseL(f)


###############################################################################
# Test writing a block blob


def test_vsiaz_write_blockblob_default_chunk_size():

if gdaltest.webserver_port == 0:
pytest.skip()

gdal.VSICurlClearCache()

f = gdal.VSIFOpenExL(
"/vsiaz/test_create/file.bin", "wb", False, ["BLOB_TYPE=BLOCK"]
)
assert f is not None

handler = webserver.SequentialHandler()
handler.add(
"PUT",
"/azure/blob/myaccount/test_create/file.bin?blockid=000000000001&comp=block",
201,
expected_headers={"Content-Length": str(50 * 1024 * 1024)},
)
handler.add(
"PUT",
"/azure/blob/myaccount/test_create/file.bin?blockid=000000000002&comp=block",
201,
expected_headers={"Content-Length": "1"},
)

handler.add(
"PUT",
"/azure/blob/myaccount/test_create/file.bin?comp=blocklist",
201,
expected_body=b'<?xml version="1.0" encoding="utf-8"?>\n<BlockList>\n<Latest>000000000001</Latest>\n<Latest>000000000002</Latest>\n</BlockList>\n',
)

with webserver.install_http_handler(handler):
assert (
gdal.VSIFWriteL(b"x" * (50 * 1024 * 1024 + 1), 50 * 1024 * 1024 + 1, 1, f)
== 1
)
gdal.VSIFCloseL(f)


###############################################################################
# Test writing a block blob


def test_vsiaz_write_blockblob_single_put():

if gdaltest.webserver_port == 0:
pytest.skip()

gdal.VSICurlClearCache()

f = gdal.VSIFOpenExL(
"/vsiaz/test_create/file.bin", "wb", False, ["BLOB_TYPE=BLOCK"]
)
assert f is not None

handler = webserver.SequentialHandler()
handler.add(
"PUT",
"/azure/blob/myaccount/test_create/file.bin",
201,
expected_headers={"Content-Length": "1"},
)

with webserver.install_http_handler(handler):
assert gdal.VSIFWriteL(b"x", 1, 1, f) == 1
gdal.VSIFCloseL(f)


###############################################################################
# Test Unlink()

Expand Down
2 changes: 1 addition & 1 deletion port/cpl_azure.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class VSIAzureBlobHandleHelper final : public IVSIS3LikeHandleHelper

namespace cpl
{
int GetAzureBufferSize();
int GetAzureAppendBufferSize();
}

#endif /* HAVE_CURL */
Expand Down
24 changes: 18 additions & 6 deletions port/cpl_vsil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2023,10 +2023,10 @@ bool VSIFilesystemHandler::SetFileMetadata(const char * /* pszFilename*/,
/************************************************************************/

/**
* \brief Open file.
* \brief Open/create file.
*
* This function opens a file with the desired access. Large files (larger
* than 2GB) should be supported. Binary access is always implied and
* This function opens (or creates) a file with the desired access.
* Binary access is always implied and
* the "b" does not need to be included in the pszAccess string.
*
* Note that the "VSILFILE *" returned by this function is
Expand Down Expand Up @@ -2064,10 +2064,10 @@ VSILFILE *VSIFOpenExL(const char *pszFilename, const char *pszAccess,
/************************************************************************/

/**
* \brief Open file.
* \brief Open/create file.
*
* This function opens a file with the desired access. Large files (larger
* than 2GB) should be supported. Binary access is always implied and
* This function opens (or creates) a file with the desired access.
* Binary access is always implied and
* the "b" does not need to be included in the pszAccess string.
*
* Note that the "VSILFILE *" returned by this function is
Expand All @@ -2094,6 +2094,18 @@ VSILFILE *VSIFOpenExL(const char *pszFilename, const char *pszAccess,
* delay.</li>
* </ul>
*
* Options specifics for /vsiaz/ in "w" mode:
* <ul>
* <li>BLOB_TYPE=APPEND/BLOCK. (GDAL >= 3.10) Type of blob. Defaults to APPEND.
* Append blocks are limited to 195 GiB
* (however if the file size is below 4 MiB, a block blob will be created in a
* single PUT operation)
* </li>
* <li>CHUNK_SIZE=val in MiB. (GDAL >= 3.10) Size of a block, only taken into
* account when BLOB_TYPE=BLOCK. It can be up to 4096 MiB. Default is 50 MiB.
* The maximum can be 5000 MiB.</li>
* </ul>
*
* Analog of the POSIX fopen() function.
*
* @param pszFilename the file to open. UTF-8 encoded.
Expand Down
2 changes: 1 addition & 1 deletion port/cpl_vsil_adls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1159,7 +1159,7 @@ VSIADLSWriteHandle::VSIADLSWriteHandle(VSIADLSFSHandler *poFS,
const char *pszFilename,
VSIAzureBlobHandleHelper *poHandleHelper)
: VSIAppendWriteHandle(poFS, poFS->GetFSPrefix().c_str(), pszFilename,
GetAzureBufferSize()),
GetAzureAppendBufferSize()),
m_poHandleHelper(poHandleHelper)
{
}
Expand Down
56 changes: 48 additions & 8 deletions port/cpl_vsil_az.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,32 @@ class VSIAzureFSHandler final : public IVSIS3LikeFSHandler
{
return new VSIAzureFSHandler(pszPrefix);
}

//! Maximum number of parts for multipart upload
// Cf https://learn.microsoft.com/en-us/rest/api/storageservices/understanding-block-blobs--append-blobs--and-page-blobs
int GetMaximumPartCount() override
{
return 50000;
}

//! Minimum size of a part for multipart upload (except last one), in MiB.
int GetMinimumPartSizeInMiB() override
{
return 0;
}

//! Maximum size of a part for multipart upload, in MiB.
// Cf https://learn.microsoft.com/en-us/rest/api/storageservices/understanding-block-blobs--append-blobs--and-page-blobs
int GetMaximumPartSizeInMiB() override
{
#if SIZEOF_VOIDP == 8
return 4 * 1024;
#else
// Cannot be larger than 4, otherwise integer overflow would occur
// 1 GiB is the maximum reasonable value on a 32-bit machine
return 1 * 1024;
#endif
}
};

/************************************************************************/
Expand Down Expand Up @@ -735,13 +761,27 @@ VSIAzureFSHandler::CreateWriteHandle(const char *pszFilename,
pszFilename + GetFSPrefix().size(), GetFSPrefix().c_str());
if (poHandleHelper == nullptr)
return nullptr;
auto poHandle = std::make_unique<VSIAzureWriteHandle>(
this, pszFilename, poHandleHelper, papszOptions);
if (!poHandle->IsOK())
const char *pszBlobType = CSLFetchNameValue(papszOptions, "BLOB_TYPE");
if (pszBlobType && EQUAL(pszBlobType, "BLOCK"))
{
return nullptr;
auto poHandle = std::make_unique<VSIS3LikeWriteHandle>(
this, pszFilename, poHandleHelper, false, papszOptions);
if (!poHandle->IsOK())
{
return nullptr;
}
return VSIVirtualHandleUniquePtr(poHandle.release());
}
else
{
auto poHandle = std::make_unique<VSIAzureWriteHandle>(
this, pszFilename, poHandleHelper, papszOptions);
if (!poHandle->IsOK())
{
return nullptr;
}
return VSIVirtualHandleUniquePtr(poHandle.release());
}
return VSIVirtualHandleUniquePtr(poHandle.release());
}

/************************************************************************/
Expand Down Expand Up @@ -1162,10 +1202,10 @@ VSIAzureFSHandler::GetStreamingFilename(const std::string &osFilename) const
}

/************************************************************************/
/* GetAzureBufferSize() */
/* GetAzureAppendBufferSize() */
/************************************************************************/

int GetAzureBufferSize()
int GetAzureAppendBufferSize()
{
int nBufferSize;
int nChunkSizeMB = atoi(CPLGetConfigOption("VSIAZ_CHUNK_SIZE", "4"));
Expand All @@ -1192,7 +1232,7 @@ VSIAzureWriteHandle::VSIAzureWriteHandle(
VSIAzureFSHandler *poFS, const char *pszFilename,
VSIAzureBlobHandleHelper *poHandleHelper, CSLConstList papszOptions)
: VSIAppendWriteHandle(poFS, poFS->GetFSPrefix().c_str(), pszFilename,
GetAzureBufferSize()),
GetAzureAppendBufferSize()),
m_poHandleHelper(poHandleHelper), m_aosOptions(papszOptions),
m_aosHTTPOptions(CPLHTTPGetOptionsFromEnv(pszFilename))
{
Expand Down
47 changes: 43 additions & 4 deletions port/cpl_vsil_curl_class.h
Original file line number Diff line number Diff line change
Expand Up @@ -672,8 +672,47 @@ class IVSIS3LikeFSHandler : public VSICurlFilesystemHandlerBaseWritable

bool AbortPendingUploads(const char *pszFilename) override;

int GetUploadChunkSizeInBytes(const char *pszFilename,
const char *pszSpecifiedValInBytes);
size_t GetUploadChunkSizeInBytes(const char *pszFilename,
const char *pszSpecifiedValInBytes);

//! Maximum number of parts for multipart upload
// Limit currently used by S3 and GS.
// Cf https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
// and https://cloud.google.com/storage/quotas#requests
virtual int GetMaximumPartCount()
{
return 10000;
}

//! Minimum size of a part for multipart upload (except last one), in MiB.
// Limit currently used by S3 and GS.
// Cf https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
// and https://cloud.google.com/storage/quotas#requests
virtual int GetMinimumPartSizeInMiB()
{
return 5;
}

//! Maximum size of a part for multipart upload, in MiB.
// Limit currently used by S3 and GS.
// Cf https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
// and https://cloud.google.com/storage/quotas#requests
virtual int GetMaximumPartSizeInMiB()
{
#if SIZEOF_VOIDP == 8
return 5 * 1024;
#else
// Cannot be larger than 4, otherwise integer overflow would occur
// 1 GiB is the maximum reasonable value on a 32-bit machine
return 1 * 1024;
#endif
}

//! Default size of a part for multipart upload, in MiB.
virtual int GetDefaultPartSizeInMiB()
{
return 50;
}
};

/************************************************************************/
Expand Down Expand Up @@ -731,8 +770,8 @@ class VSIS3LikeWriteHandle final : public VSIVirtualHandle
CPLStringList m_aosHTTPOptions{};

vsi_l_offset m_nCurOffset = 0;
int m_nBufferOff = 0;
int m_nBufferSize = 0;
size_t m_nBufferOff = 0;
size_t m_nBufferSize = 0;
bool m_bClosed = false;
GByte *m_pabyBuffer = nullptr;
std::string m_osUploadID{};
Expand Down
Loading

0 comments on commit 6d91bc9

Please sign in to comment.