我有以下代碼用于使用 MultipartUpload 上傳 s3。
import logging
import boto3
class UploadS3:
def __init__(self, bucket, prefix):
self.s3 = boto3.resource('s3')
self.bucket = bucket
self.prefix = prefix
def start(self, key):
'''Start to upload a new file'''
self.part_no = 1
self.parts = []
key_path = f'{self.prefix}/{key}'
self.s3obj = self.s3.Object(self.bucket, key_path)
self.mpu = self.s3obj.initiate_multipart_upload()
self.buffer = bytearray()
def upload(self, chunk):
'''Upload a chunk'''
if len(self.buffer) >= 5_000_000:
self._upload_buffer()
self.buffer = chunk
def end(self, part_info={}):
if len(self.buffer):
self._upload_buffer()
part_info['Parts'] = self.parts
mpu_result = self.mpu.complete(MultipartUpload=part_info)
logging.info(f'Upload result: {mpu_result}')
def _upload_buffer(self):
self.part = self.mpu.Part(self.part_no)
print(f'buffer len: {len(self.buffer)}')
resp = self.part.upload(Body=self.buffer)
print({'PartNumber': self.part_no, 'ETag': resp['ETag']})
self.parts.append({'PartNumber': self.part_no, 'ETag': resp['ETag']})
self.part_no = 1
self.buffer = bytearray()
我創建了以下測驗代碼:
upload_s3 = UploadS3(BUCKET, PREFIX)
key = 'key2'
upload_s3.start(key)
upload_s3.upload(b'0' * 1_000_000)
upload_s3.upload(b'1' * 1_000_000)
upload_s3.upload(b'2' * 1_000_000)
upload_s3.upload(b'3' * 1_000_000)
upload_s3.upload(b'4' * 999_999)
upload_s3.upload(b'abcde')
upload_s3.upload(b'12345')
upload_s3.end({})
但是,它收到以下錯誤。第一部分的長度是5000004,第二(最后)部分的長度是5,哪個不需要超過5M?
buffer len: 5000004
{'PartNumber': 1, 'ETag': '"e616f253def9510e3be2af0854e4c992"'}
buffer len: 5
{'PartNumber': 2, 'ETag': '"db44331bface5c8678770426baf73bc2"'}
Traceback (most recent call last):
File "test1.py", line 35, in <module>
main()
File "test1.py", line 31, in main
upload_s3.end({})
File "/home/x/upload_s3.py", line 31, in end
mpu_result = self.mpu.complete(MultipartUpload=part_info)
File "/apps/external/4/anaconda3/lib/python3.6/site-packages/boto3/resources/factory.py", line 520, in do_action
response = action(self, *args, **kwargs)
File "/apps/external/4/anaconda3/lib/python3.6/site-packages/boto3/resources/action.py", line 83, in __call__
response = getattr(parent.meta.client, operation_name)(*args, **params)
File "/apps/external/4/anaconda3/lib/python3.6/site-packages/botocore/client.py", line 386, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/apps/external/4/anaconda3/lib/python3.6/site-packages/botocore/client.py", line 705, in _make_api_call
raise error_class(parsed_response, operation_name)
botocore.exceptions.ClientError: An error occurred (EntityTooSmall) when calling the CompleteMultipartUpload operation: Your proposed upload is smaller than the minimum allowed size
uj5u.com熱心網友回復:
在撰寫此答案時,S3 分段上傳限制頁面具有下表:
| 物品 | 規格 |
|---|---|
| 最大物件大小 | 5 TB |
| 每次上傳的最大部分數 | 10,000 |
| 零件號 | 1至10,000(含) |
| 零件尺寸 | 5 MB 到 5 GB。分段上傳的最后一部分沒有最小尺寸限制。 |
| 為串列零件請求回傳的最大零件數 | 1000 |
| 串列分段上傳請求中回傳的分段上傳的最大數量 | 1000 |
然而,有一個微妙的錯誤。它說 5 MB 而不是 5 MiB(可能 5 GB 實際上應該是 5 GiB)。
由于您每5 000 000位元組拆分部分(5 MB,但“僅”~4.77 MiB),因此第一部分和第二部分都小于最小大小。
相反,您應該每5 242 880( 5 * 1024 ** 2) 個位元組拆分這些部分(或者甚至有點 [沒有雙關語意] 只是為了安全起見)。
我在 S3 檔案頁面上提交了一個拉取請求。
轉載請註明出處,本文鏈接:https://www.uj5u.com/ruanti/367211.html
下一篇:如何訪問私有子網中的EC2實體?
