我最近一直想知道確保資料庫記錄的創建是冪等的通常最好的方法是什么。我能想到的兩種方法是:
- 在執行 INSERT 之前檢查記錄是否已經存在
- 在相關列上使用唯一約束以確保不存在具有相同值的兩條記錄
這似乎是一個先看再跳/更容易要求寬恕比許可二分法的例子。在 Python 社區中,我知道后一種方法是可以接受的,甚至是首選方法。我想知道這是否同樣適用于使用關系資料庫。
越快越好嗎?
根據下面的一些測驗,似乎具有唯一約束的 EAFP 方法對于插入新記錄和優雅地處理重復記錄都更快。但是,我可以想象在每個 INSERT 之前使用 SELECT 的 LBYL 方法可能更可取的情況。
- 如果表架構發生更改,則更新約束以包含新列可能會很棘手。在生產環境中更改代碼肯定比遷移資料庫更容易。
- 如果表包含數百萬條記錄,那么在生產環境中添加和洗掉索引可能會很棘手。
- 我用來避免
create_permission_EAFP在我的 Django 示例中的錯誤例外上靜默失敗的字串搜索方法看起來很笨拙。(盡管這可以比一般方法更多地說明我的實作)。
性能測驗
下面的測驗是在我筆記本電腦上的 Docker 容器中使用 Postgres 14 和 Django 3.2 運行的。我決定為此使用 Django 測驗框架,因為每次運行測驗都從一個空資料庫開始。
創建 10,000 條記錄的結果
運行測驗tests.py一萬條記錄的輸出:
======================================================================
FAIL: test_look_before_you_leap_faster_existing_records (idempodentinserts.tests.TestPermissionCreation)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/code/mainapp/idempodentinserts/tests.py", line 82, in test_look_before_you_leap_faster_existing_records
self.assertLess(duration_LBYL, duration_EAFP, f"LBYL took longer with existing records. {report}")
AssertionError: 4.998060464859009 not less than 2.5420615673065186 : LBYL took longer with existing records.
For 10000 create calls...
The Look-before-you-leap strategy took 4.998 seconds (average: 0.500 milliseconds).
The Ask-forgiveness-not-permission strategy took 2.542 seconds (average: 0.254 milliseconds).
======================================================================
FAIL: test_look_before_you_leap_faster_new_records (idempodentinserts.tests.TestPermissionCreation)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/code/mainapp/idempodentinserts/tests.py", line 103, in test_look_before_you_leap_faster_new_records
self.assertLess(duration_LBYL, duration_EAFP, f"LBYL took longer with new records. {report}")
AssertionError: 31.07089853286743 not less than 20.387959241867065 : LBYL took longer with new records.
For 10000 create calls...
The Look-before-you-leap strategy took 31.071 seconds (average: 3.107 milliseconds).
The Ask-forgiveness-not-permission strategy took 20.388 seconds (average: 2.039 milliseconds).
----------------------------------------------------------------------
Ran 4 tests in 122.848s
FAILED (failures=2)
創建 1,000,000 條記錄的結果
即使在創建一百萬條記錄時,該測驗也支持 EAFP 作為更快的方法。雖然所有插入都變慢了,但首先檢查記錄的存在并沒有幫助。
======================================================================
FAIL: test_look_before_you_leap_faster_existing_records (idempodentinserts.tests.TestPermissionCreation)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/code/mainapp/idempodentinserts/tests.py", line 82, in test_look_before_you_leap_faster_existing_records
self.assertLess(duration_LBYL, duration_EAFP, f"LBYL took longer with existing records. {report}")
AssertionError: 445.97691440582275 not less than 247.20186638832092 : LBYL took longer with existing records.
For 1000000 create calls...
The Look-before-you-leap strategy took 445.977 seconds (average: 0.446 milliseconds).
The Ask-forgiveness-not-permission strategy took 247.202 seconds (average: 0.247 milliseconds).
======================================================================
FAIL: test_look_before_you_leap_faster_new_records (idempodentinserts.tests.TestPermissionCreation)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/code/mainapp/idempodentinserts/tests.py", line 103, in test_look_before_you_leap_faster_new_records
self.assertLess(duration_LBYL, duration_EAFP, f"LBYL took longer with new records. {report}")
AssertionError: 6323.6987335681915 not less than 4435.961817026138 : LBYL took longer with new records.
For 1000000 create calls...
The Look-before-you-leap strategy took 6323.699 seconds (average: 6.324 milliseconds).
The Ask-forgiveness-not-permission strategy took 4435.962 seconds (average: 4.436 milliseconds).
----------------------------------------------------------------------
Ran 4 tests in 23372.856s
FAILED (failures=2)
代碼
models.py
from django.db import models
class Permission(models.Model):
subject_uuid = models.UUIDField(db_index=True)
object_uuid = models.UUIDField(db_index=True)
verb = models.CharField(max_length=255)
class UniquePermission(models.Model):
subject_uuid = models.UUIDField(db_index=True)
object_uuid = models.UUIDField(db_index=True)
verb = models.CharField(max_length=255)
class Meta:
unique_together = ("subject_uuid", "object_uuid","verb")
permissions.py
from django import db
from . import models
def create_permission_LBYL(subject_uuid, object_uuid, verb):
if not models.Permission.objects.filter(
subject_uuid=subject_uuid, object_uuid=object_uuid, verb=verb
).exists():
models.Permission.objects.create(
subject_uuid=subject_uuid, object_uuid=object_uuid, verb=verb
)
def create_permission_EAFP(subject_uuid, object_uuid, verb):
try:
models.UniquePermission.objects.create(
subject_uuid=subject_uuid, object_uuid=object_uuid, verb=verb
)
except db.IntegrityError as e:
# Hack to reduce the chance that we mask an unexpected IntegrityError
if "duplicate key value violates unique constraint" not in str(e):
raise
tests.py
from django import test
from . import permissions
from . import models
VERB_LENGTH = 10
THOUSAND = 1000
VERB_COUNT = 10 * THOUSAND
class TestPermissionCreation(test.TransactionTestCase):
"""
Compares performance between the LBYL (look before you leap)
and EAFP (it's easier to ask forgiveness than permission) approaches to idempotent
database inserts.
"""
@classmethod
def setUpClass(cls):
super().setUpClass()
letter_combinations = itertools.combinations(string.ascii_lowercase, VERB_LENGTH)
unique_words = ("".join(combination) for combination in letter_combinations)
cls.verbs = list(itertools.islice(unique_words, VERB_COUNT))
cls.existing_subject_uuids = [uuid.uuid4() for _ in cls.verbs]
cls.existing_object_uuids = [uuid.uuid4() for _ in cls.verbs]
cls.new_subject_uuids = [uuid.uuid4() for _ in cls.verbs]
cls.new_object_uuids = [uuid.uuid4() for _ in cls.verbs]
def setUp(self):
models.Permission.objects.bulk_create(
models.Permission(subject_uuid=sub, object_uuid=obj, verb=verb)
for sub, obj, verb in zip(self.existing_subject_uuids,
self.existing_object_uuids,
self.verbs)
)
models.UniquePermission.objects.bulk_create(
models.UniquePermission(subject_uuid=sub, object_uuid=obj, verb=verb)
for sub, obj, verb in zip(self.existing_subject_uuids,
self.existing_object_uuids,
self.verbs)
)
def report_durations(self, duration_LBYL, duration_EAFP):
verb_count = len(self.verbs)
LBYL_ave_ms = (duration_LBYL / verb_count) * 1000
EAFP_ave_ms = (duration_EAFP / verb_count) * 1000
return (
f"For {verb_count} create calls... "
f"The Look-before-you-leap strategy took {duration_LBYL:.3f} seconds "
f"(average: {LBYL_ave_ms:.3f} milliseconds). "
f"The Ask-forgiveness-not-permission strategy took {duration_EAFP:.3f} seconds "
f"(average: {EAFP_ave_ms:.3f} milliseconds)."
)
def test_look_before_you_leap_faster_existing_records(self):
start_LBYL = time.time()
for sub, obj, verb in zip(self.existing_subject_uuids,
self.existing_object_uuids,
self.verbs):
permissions.create_permission_LBYL(subject_uuid=sub, object_uuid=obj, verb=verb)
duration_LBYL = time.time() - start_LBYL
start_EAFP = time.time()
for sub, obj, verb in zip(self.existing_subject_uuids,
self.existing_object_uuids,
self.verbs):
permissions.create_permission_EAFP(subject_uuid=sub, object_uuid=obj, verb=verb)
duration_EAFP = time.time() - start_EAFP
report = self.report_durations(duration_EAFP=duration_EAFP,duration_LBYL=duration_LBYL)
self.assertLess(duration_LBYL, duration_EAFP, f"LBYL took longer with existing records. {report}")
def test_look_before_you_leap_faster_new_records(self):
start_LBYL = time.time()
for sub, obj, verb in zip(self.new_subject_uuids,
self.new_object_uuids,
self.verbs):
permissions.create_permission_LBYL(subject_uuid=sub, object_uuid=obj, verb=verb)
duration_LBYL = time.time() - start_LBYL
start_EAFP = time.time()
for sub, obj, verb in zip(self.new_subject_uuids,
self.new_object_uuids,
self.verbs):
permissions.create_permission_EAFP(subject_uuid=sub, object_uuid=obj, verb=verb)
duration_EAFP = time.time() - start_EAFP
report = self.report_durations(duration_EAFP=duration_EAFP,duration_LBYL=duration_LBYL)
self.assertLess(duration_LBYL, duration_EAFP, f"LBYL took longer with new records. {report}")
def test_ask_forgiveness_not_permission_faster_existing_records(self):
start_LBYL = time.time()
for sub, obj, verb in zip(self.existing_subject_uuids,
self.existing_object_uuids,
self.verbs):
permissions.create_permission_LBYL(subject_uuid=sub, object_uuid=obj, verb=verb)
duration_LBYL = time.time() - start_LBYL
start_EAFP = time.time()
for sub, obj, verb in zip(self.existing_subject_uuids,
self.existing_object_uuids,
self.verbs,
):
permissions.create_permission_EAFP(subject_uuid=sub, object_uuid=obj, verb=verb)
duration_EAFP = time.time() - start_EAFP
report = self.report_durations(duration_EAFP=duration_EAFP,duration_LBYL=duration_LBYL)
self.assertLess(duration_EAFP, duration_LBYL, f"LBYL took longer with existing records. {report}")
def test_ask_forgiveness_not_permission_faster_new_records(self):
start_LBYL = time.time()
for sub, obj, verb in zip(self.new_subject_uuids,
self.new_object_uuids,
self.verbs):
permissions.create_permission_LBYL(subject_uuid=sub, object_uuid=obj, verb=verb)
duration_LBYL = time.time() - start_LBYL
start_EAFP = time.time()
for sub, obj, verb in zip(self.new_subject_uuids,
self.new_object_uuids,
self.verbs):
permissions.create_permission_EAFP(subject_uuid=sub, object_uuid=obj, verb=verb)
duration_EAFP = time.time() - start_EAFP
report = self.report_durations(duration_EAFP=duration_EAFP,duration_LBYL=duration_LBYL)
self.assertLess(duration_EAFP, duration_LBYL, f"LBYL took longer with new records. {report}")
uj5u.com熱心網友回復:
關系資料庫都是關于保證的。如果您選擇不使用它們的功能(在這種情況下為 UNIQUE CONSTRAINT),則您選擇不擁有該保證。
越快越好嗎?
比正確性更好?不,您想要一個運行盡可能快的正確應用程式,而不是“大多數時間”正確的快速應用程式。使用資料庫保證可以輕松撰寫正確的應用程式。
如果表包含數百萬條記錄,那么在生產環境中添加和洗掉索引可能會很棘手。
洗掉索引完全沒有問題。創建索引可能會鎖定表以進行寫入,是的。但是您可以同時構建索引以避免這種情況。
如果表架構發生更改,則更新約束以包含新列可能會很棘手。
只需創建一個新約束,然后洗掉舊約束。完成后更新應用程式代碼以使用新列。
轉載請註明出處,本文鏈接:https://www.uj5u.com/houduan/420072.html
標籤:
