model_dump
model_dump(
*,
mode: Literal["json", "python"] | str = "python",
include: IncEx = None,
exclude: IncEx = None,
context: dict[str, Any] | None = None,
by_alias: bool = False,
exclude_unset: bool = False,
exclude_defaults: bool = False,
exclude_none: bool = False,
round_trip: bool = False,
warnings: bool
| Literal["none", "warn", "error"] = True,
serialize_as_any: bool = False
) -> dict[str, Any]
▌Type Coercion
本講座在說明,Pydantic model 會將資料型別強制轉換(能力所及的情況下)。
預設情況下,資料型別強制轉換稱為 lax,它會嘗試各種類型轉換。
我們也可以設定為嚴格模式(strict),限制資料型別強制轉換的範圍(下方有對照表)。
以下連結( Conversion Table)提供了有關 Pydantic 在 嚴格(strict)模式 和 寬鬆(lax)模式 下,如何轉換資料的詳細資訊。由於表格非常長,請直接參考連結。
Conversion Table(lax mode vs. strict mode)
Field Type | Input | Strict | Input Source | Conditions |
---|---|---|---|---|
bool | bool | ✓ | Python & JSON | |
bool | float | Python & JSON | Allowed values: 0.0, 1.0. | |
bool | int | Python & JSON | Allowed values: 0, 1. | |
bool | str | Python & JSON | Allowed values: ‘f’, ‘n’, ‘no’, ‘off’, ‘false’, ‘False’, ‘t’, ‘y’, ‘on’, ‘yes’, ‘true’, ‘True’. | |
bool | Decimal | Python | Allowed values: Decimal(0), Decimal(1). | |
bytes | bytearray | Python | ||
bytes | bytes | ✓ | Python | |
bytes | str | Python | ||
callable | Any | ✓ | Python | callable() check must return True. |
date | bytes | Python | Format: YYYY-MM-DD (UTF-8). | |
date | date | ✓ | Python | |
date | datetime | Python | Must be exact date, eg. no H, M, S, f. | |
date | float | Python & JSON | Interpreted as seconds or ms from epoch. See speedate. Must be exact date. | |
date | int | Python & JSON | Interpreted as seconds or ms from epoch. See speedate. Must be exact date. | |
date | str | Python & JSON | Format: YYYY-MM-DD. | |
date | Decimal | Python | Interpreted as seconds or ms from epoch. See speedate. Must be exact date. | |
datetime | bytes | Python | Format: YYYY-MM-DDTHH:MM:SS.f or YYYY-MM-DD. See speedate, (UTF-8). | |
datetime | date | Python | ||
datetime | datetime | ✓ | Python | |
datetime | float | Python & JSON | Interpreted as seconds or ms from epoch, see speedate. | |
datetime | int | Python & JSON | Interpreted as seconds or ms from epoch, see speedate. | |
datetime | str | Python & JSON | Format: YYYY-MM-DDTHH:MM:SS.f or YYYY-MM-DD. See speedate. | |
datetime | Decimal | Python | Interpreted as seconds or ms from epoch, see speedate. | |
deque | deque | ✓ | Python | |
deque | frozenset | Python | ||
deque | list | Python | ||
deque | set | Python | ||
deque | tuple | Python | ||
dict | dict | ✓ | Python | |
dict | Mapping | Python | Must implement the mapping interface and have an items() method. | |
float | bool | Python & JSON | ||
float | bytes | Python | Must match [0-9]+(.[0-9]+)?. | |
float | float | ✓ | Python & JSON | bool is explicitly forbidden. |
float | int | ✓ | Python & JSON | |
float | str | Python & JSON | Must match [0-9]+(.[0-9]+)?. | |
float | Decimal | Python | ||
frozenset | deque | Python | ||
frozenset | dict_keys | Python | ||
frozenset | dict_values | Python | ||
frozenset | frozenset | ✓ | Python | |
frozenset | list | Python | ||
frozenset | set | Python | ||
frozenset | tuple | Python | ||
int | bool | Python & JSON | ||
int | bytes | Python | Must be numeric only, e.g. [0-9]+. | |
int | float | Python & JSON | Must be exact int, e.g. val % 1 == 0, raises error for nan, inf. | |
int | int | ✓ | Python & JSON | bool is explicitly forbidden. |
int | int | Python & JSON | ||
int | str | Python & JSON | Must be numeric only, e.g. [0-9]+. | |
int | Decimal | Python | Must be exact int, e.g. val % 1 == 0. | |
list | deque | Python | ||
list | dict_keys | Python | ||
list | dict_values | Python | ||
list | frozenset | Python | ||
list | list | ✓ | Python | |
list | set | Python | ||
list | tuple | Python | ||
namedtuple | dict | ✓ | Python | |
namedtuple | list | ✓ | Python | |
namedtuple | namedtuple | ✓ | Python | |
namedtuple | tuple | ✓ | Python | |
namedtuple | NamedTuple | ✓ | Python | |
set | deque | Python | ||
set | dict_keys | Python | ||
set | dict_values | Python | ||
set | frozenset | Python | ||
set | list | Python | ||
set | set | ✓ | Python | |
set | tuple | Python | ||
str | bytearray | Python | Assumes UTF-8, error on unicode decoding error. | |
str | bytes | Python | Assumes UTF-8, error on unicode decoding error. | |
str | str | ✓ | Python & JSON | |
time | bytes | Python | Format: HH:MM:SS.FFFFFF. See speedate. | |
time | float | Python & JSON | Interpreted as seconds, range 0 - 86399.9*. | |
time | int | Python & JSON | Interpreted as seconds, range 0 - 86399. | |
time | str | Python & JSON | Format: HH:MM:SS.FFFFFF. See speedate. | |
time | time | ✓ | Python | |
time | Decimal | Python | Interpreted as seconds, range 0 - 86399.9*. | |
timedelta | bytes | Python | Format: ISO8601. See speedate, (UTF-8). | |
timedelta | float | Python & JSON | Interpreted as seconds. | |
timedelta | int | Python & JSON | Interpreted as seconds. | |
timedelta | str | Python & JSON | Format: ISO8601. See speedate. | |
timedelta | timedelta | ✓ | Python | |
timedelta | Decimal | Python | Interpreted as seconds. | |
tuple | deque | Python | ||
tuple | dict_keys | Python | ||
tuple | dict_values | Python | ||
tuple | frozenset | Python | ||
tuple | list | Python | ||
tuple | set | Python | ||
tuple | tuple | ✓ | Python | |
Any | Any | ✓ | Python & JSON | |
ByteSize | float | ✓ | Python & JSON | |
ByteSize | int | ✓ | Python & JSON | |
ByteSize | str | ✓ | Python & JSON | |
ByteSize | Decimal | ✓ | Python | |
Decimal | float | Python & JSON | ||
Decimal | int | Python & JSON | ||
Decimal | str | Python & JSON | Must match [0-9]+(.[0-9]+)?. | |
Decimal | Decimal | ✓ | Python | |
Enum | Any | Python | Input value must be convertible to enum values. | |
Enum | Enum | ✓ | Python | |
IPv4Address | bytes | Python | ||
IPv4Address | int | Python | integer representing the IP address, must be less than 2**32 | |
IPv4Address | str | Python & JSON | ||
IPv4Address | IPv4Address | ✓ | Python | |
IPv4Address | IPv4Interface | ✓ | Python | |
IPv4Interface | bytes | Python | ||
IPv4Interface | int | Python | integer representing the IP address, must be less than 2**32 | |
IPv4Interface | str | Python & JSON | ||
IPv4Interface | tuple | Python | ||
IPv4Interface | IPv4Address | Python | ||
IPv4Interface | IPv4Interface | ✓ | Python | |
IPv4Network | bytes | Python | ||
IPv4Network | int | Python | integer representing the IP network, must be less than 2**32 | |
IPv4Network | str | Python & JSON | ||
IPv4Network | IPv4Address | Python | ||
IPv4Network | IPv4Interface | Python | ||
IPv4Network | IPv4Network | ✓ | Python | |
IPv6Address | bytes | Python | ||
IPv6Address | int | Python | integer representing the IP address, must be less than 2**128 | |
IPv6Address | str | Python & JSON | ||
IPv6Address | IPv6Address | ✓ | Python | |
IPv6Address | IPv6Interface | ✓ | Python | |
IPv6Interface | bytes | Python | ||
IPv6Interface | int | Python | integer representing the IP address, must be less than 2**128 | |
IPv6Interface | str | Python & JSON | ||
IPv6Interface | tuple | Python | ||
IPv6Interface | IPv6Address | Python | ||
IPv6Interface | IPv6Interface | ✓ | Python | |
IPv6Network | bytes | Python | ||
IPv6Network | int | Python | integer representing the IP address, must be less than 2**128 | |
IPv6Network | str | Python & JSON | ||
IPv6Network | IPv6Address | Python | ||
IPv6Network | IPv6Interface | Python | ||
IPv6Network | IPv6Network | ✓ | Python | |
InstanceOf | Any | ✓ | Python | isinstance() check must return True. |
IntEnum | Any | Python | Input value must be convertible to enum values. | |
IntEnum | IntEnum | ✓ | Python | |
Iterable | deque | ✓ | Python | |
Iterable | frozenset | ✓ | Python | |
Iterable | list | ✓ | Python | |
Iterable | set | ✓ | Python | |
Iterable | tuple | ✓ | Python | |
NamedTuple | dict | ✓ | Python | |
NamedTuple | list | ✓ | Python | |
NamedTuple | namedtuple | ✓ | Python | |
NamedTuple | tuple | ✓ | Python | |
NamedTuple | NamedTuple | ✓ | Python | |
None | None | ✓ | Python & JSON | |
Path | str | Python | ||
Path | Path | ✓ | Python | |
Pattern | bytes | ✓ | Python | Input must be a valid pattern. |
Pattern | str | ✓ | Python & JSON | Input must be a valid pattern. |
Sequence | deque | Python | ||
Sequence | list | ✓ | Python | |
Sequence | tuple | Python | ||
Type | Type | ✓ | Python | |
TypedDict | dict | ✓ | Python | |
TypedDict | Any | ✓ | Python | |
TypedDict | Mapping | Python | Must implement the mapping interface and have an items() method. | |
UUID | str | Python | ||
UUID | UUID | ✓ | Python |
from pydantic import BaseModel, Field, ValidationError
# 浮點轉換為布林(lax mode)
class FloatToBool(BaseModel):
x: bool
try:
float_to_bool_lax = FloatToBool(x=0.0, strict=False)
print(float_to_bool_lax) # x=
except ValidationError as e:
print(f"浮點轉換為布林在 lax mode 下會拋出錯誤: {e}")
# 浮點轉換為布林(strict mode)
class FloatToBool(BaseModel):
x: bool
try:
float_to_bool_strict = FloatToBool(x=0.0, strict=True)
print(float_to_bool_strict) # x=
except ValidationError as e:
print(f"浮點轉換為布林在 strict mode 下會拋出錯誤: {e}")
x=False
x=False
lax mode 範例
lax mode (
strict=False
):
如果輸入資料在欄位類型中具有 SINGLE 和 INTUITIVE 表示形式,且轉換期間沒有資料遺失,則資料將被轉換;否則會引發驗證錯誤。
此規則有一個例外:字串欄位 - 實際上所有資料都具有字串形式的直覺表示(例如repr()
和str()
),因此需要自訂規則:僅 str
、bytes
和 bytearray
作為字串欄位的輸入有效。
Field Type | Input | Single & Intuitive R. | All Data Preserved | Result |
---|---|---|---|---|
int | 123 | ![]() |
![]() |
Convert |
int | 123.0 | ![]() |
![]() |
Convert |
int | 123.1 | ![]() |
![]() |
Error |
date | “2020-01-01” | ![]() |
![]() |
Convert |
date | “2020-01-01T00:00:00” | ![]() |
![]() |
Convert |
date | “2020-01-01T12:00:00” | ![]() |
![]() |
Error |
int | b"1" | ![]() |
![]() |
Error |
Lax Mode vs. Strict Mode
由 claude 製作的對照表
資料型別轉換 | lax mode | strict mode | 程式碼範例 |
---|---|---|---|
字串 → 整數 | ![]() |
![]() |
from pydantic import BaseModel, Field class Model(BaseModel): x: int = Field(…) Model(x=‘42’) |
字串 → 浮點數 | ![]() |
![]() |
class Model(BaseModel): x: float = Field(…) Model(x=‘3.14’) |
字串 → 布林值 | ![]() |
![]() |
class Model(BaseModel): x: bool = Field(…) Model(x=‘true’) |
數字 → 布林值 | ![]() |
![]() |
class Model(BaseModel): x: bool = Field(…) Model(x=1) |
整數 → 浮點數 | ![]() |
![]() |
class Model(BaseModel): x: float = Field(…) Model(x=42) |
浮點數 → 整數 | ![]() |
![]() |
class Model(BaseModel): x: int = Field(…) Model(x=3.00) ![]() Model(x=3.14) ![]() |
字串 → 日期 | ![]() ![]() |
![]() ![]() |
from datetime import date; class Model(BaseModel): x: date = Field(…) Model(x=‘2023-04-25’) |
字串 → 日期時間 | ![]() ![]() |
![]() ![]() |
from datetime import datetime class Model(BaseModel): x: datetime = Field(…) Model(x=‘2023-04-25T12:34:56’) |
列表/集合 → 自定義列表/集合類型 |
![]() ![]() |
![]() ![]() |
from typing import List class Model(BaseModel): x: List[int] = Field(…) Model(x=[1, ‘2’, 3.4]) |
由於表格中程式碼不易表示,請參考整合後的程式碼(上為 Pydantic V2):
from pydantic import BaseModel, Field, ValidationError
from typing import List
from datetime import date, datetime
# 字串轉換為整數
class StringToIntModel(BaseModel):
x: int
try:
string_to_int_lax = StringToIntModel(x='42', strict=False)
print(string_to_int_lax) # x=42
except ValidationError as e:
print(f"字串轉換為整數在 strict mode 下會拋出錯誤: {e}")
# 字串轉換為浮點數
class StringToFloatModel(BaseModel):
x: float
try:
string_to_float_lax = StringToFloatModel(x='3.14', strict=False)
print(string_to_float_lax) # x=3.14
except ValidationError as e:
print(f"字串轉換為浮點數在 strict mode 下會拋出錯誤: {e}")
# 字串轉換為布林值
class StringToBoolModel(BaseModel):
x: bool
try:
string_to_bool_lax = StringToBoolModel(x='true', strict=False)
print(string_to_bool_lax) # x=True
except ValidationError as e:
print(f"字串轉換為布林值在 strict mode 下會拋出錯誤: {e}")
# 數字轉換為布林值
class NumberToBoolModel(BaseModel):
x: bool
try:
number_to_bool_lax = NumberToBoolModel(x=1, strict=False)
print(number_to_bool_lax) # x=True
except ValidationError as e:
print(f"數字轉換為布林值在 strict mode 下會拋出錯誤: {e}")
# 整數轉換為浮點數
class IntToFloatModel(BaseModel):
x: float
try:
int_to_float_lax = IntToFloatModel(x=42, strict=False)
print(int_to_float_lax) # x=42.0
except ValidationError as e:
print(f"整數轉換為浮點數在 strict mode 下會拋出錯誤: {e}")
# 浮點數轉換為整數 (在 strict mode 下會拋出 ValidationError)
class FloatToIntModel(BaseModel):
x: int
try:
float_to_int_lax = FloatToIntModel(x=3.14, strict=False)
print(float_to_int_lax)
except ValidationError as e:
print(f"浮點數轉換為整數在 strict mode 下會拋出錯誤: {e}")
# 字串轉換為日期 (在 strict mode 下會拋出 ValidationError)
class StringToDateModel(BaseModel):
x: date
try:
string_to_date_lax = StringToDateModel(x='2023-04-25', strict=False)
print(string_to_date_lax)
except ValidationError as e:
print(f"字串轉換為日期在 strict mode 下會拋出錯誤: {e}")
# 字串轉換為日期時間 (在 strict mode 下會拋出 ValidationError)
class StringToDatetimeModel(BaseModel):
x: datetime
try:
string_to_datetime_lax = StringToDatetimeModel(x='2023-04-25T12:34:56', strict=False)
print(string_to_datetime_lax)
except ValidationError as e:
print(f"字串轉換為日期時間在 strict mode 下會拋出錯誤: {e}")
# 列表/集合轉換為自定義列表/集合類型 (在 strict mode 下會拋出 ValidationError)
class CustomListModel(BaseModel):
x: List[int]
try:
list_to_custom_lax = CustomListModel(x=[1, '2', 3.4], strict=False)
print(list_to_custom_lax)
except ValidationError as e:
print(f"列表/集合轉換為自定義列表/集合類型在 strict mode 下會拋出錯誤: {e}")
# Pydantic 程式範例
# 用於展示在 lax mode (trusted_data=True) 和 strict mode (trusted_data=False) 下的隱式類型轉換行為
from pydantic import BaseModel, Field
from typing import List
from datetime import date, datetime
# 字串轉換為整數
class StringToIntModel(BaseModel):
x: int = Field(...)
# 在 lax mode 下,可以從字串隱式轉換為整數
string_to_int_lax = StringToIntModel(x='42')
print(string_to_int_lax) # x=42
# 字串轉換為浮點數
class StringToFloatModel(BaseModel):
x: float = Field(...)
# 在 lax mode 下,可以從字串隱式轉換為浮點數
string_to_float_lax = StringToFloatModel(x='3.14')
print(string_to_float_lax) # x=3.14
# 字串轉換為布林值
class StringToBoolModel(BaseModel):
x: bool = Field(...)
# 在 lax mode 下,可以從字串隱式轉換為布林值
string_to_bool_lax = StringToBoolModel(x='true')
print(string_to_bool_lax) # x=True
# 數字轉換為布林值
class NumberToBoolModel(BaseModel):
x: bool = Field(...)
# 在 lax mode 下,可以從數字隱式轉換為布林值
number_to_bool_lax = NumberToBoolModel(x=1)
print(number_to_bool_lax) # x=True
# 整數轉換為浮點數
class IntToFloatModel(BaseModel):
x: float = Field(...)
# 在 lax mode 下,可以從整數隱式轉換為浮點數
int_to_float_lax = IntToFloatModel(x=42)
print(int_to_float_lax) # x=42.0
# 浮點數轉換為整數 (在 strict mode 下會拋出 ValidationError)
class FloatToIntModel(BaseModel):
x: int = Field(...)
try:
float_to_int_lax = FloatToIntModel(x=3.14)
print(float_to_int_lax)
except ValueError as e:
print(f"浮點數轉換為整數在 strict mode 下會拋出錯誤: {e}")
# 字串轉換為日期 (在 strict mode 下會拋出 ValidationError)
class StringToDateModel(BaseModel):
x: date = Field(...)
try:
string_to_date_lax = StringToDateModel(x='2023-04-25')
print(string_to_date_lax)
except ValueError as e:
print(f"字串轉換為日期在 strict mode 下會拋出錯誤: {e}")
# 字串轉換為日期時間 (在 strict mode 下會拋出 ValidationError)
class StringToDatetimeModel(BaseModel):
x: datetime = Field(...)
try:
string_to_datetime_lax = StringToDatetimeModel(x='2023-04-25T12:34:56')
print(string_to_datetime_lax)
except ValueError as e:
print(f"字串轉換為日期時間在 strict mode 下會拋出錯誤: {e}")
# 列表/集合轉換為自定義列表/集合類型 (在 strict mode 下會拋出 ValidationError)
class CustomListModel(BaseModel):
x: List[int] = Field(...)
try:
list_to_custom_lax = CustomListModel(x=[1, '2', 3.4])
print(list_to_custom_lax)
except ValueError as e:
print(f"列表/集合轉換為自定義列表/集合類型在 strict mode 下會拋出錯誤: {e}")
另外,經過反覆測試,我初步的結論(不確定是否正確,我找時間向老師提問):Pydantic V2 中,lax mode & stric mode 的結果相同,V1 中不同(但寫法不同,所以略過)。
from pydantic import BaseModel, ValidationError
from typing import List
from datetime import date
# 1. 字串轉換為整數
class StringToIntModel(BaseModel):
x: int
print("1. 字串轉換為整數")
# lax mode 成功
string_to_int_lax = StringToIntModel(x='42', strict=False)
print(string_to_int_lax) # x=42
# strict mode 失敗
try:
string_to_int_strict = StringToIntModel(x='42', strict=True)
print(string_to_int_strict)
except ValidationError as e:
print(f"字串轉換為整數在 strict mode 下失敗: {e}")
# 2. 浮點數轉換為整數
class FloatToIntModel(BaseModel):
x: int
print("2. 浮點數轉換為整數")
# lax mode 失敗
try:
float_to_int_lax = FloatToIntModel(x=3.14, strict=False)
print(float_to_int_lax)
except ValidationError as e:
print(f"浮點數轉換為整數在 lax mode 下失敗: {e}")
# strict mode 失敗
try:
float_to_int_strict = FloatToIntModel(x=3.14, strict=True)
print(float_to_int_strict)
except ValidationError as e:
print(f"浮點數轉換為整數在 strict mode 下失敗: {e}")
# 4. 異構列表轉換為自定義列表類型
class CustomListModel(BaseModel):
x: List[int]
print("4. 異構列表轉換為自定義列表類型")
# lax mode 失敗
try:
list_to_custom_lax = CustomListModel(x=[1, '2', 3.4], strict=False)
print(list_to_custom_lax)
except ValidationError as e:
print(f"異構列表轉換為自定義列表類型在 lax mode 下失敗: {e}")
# strict mode 失敗
try:
list_to_custom_strict = CustomListModel(x=[1, '2', 3.4], strict=True)
print(list_to_custom_strict)
except ValidationError as e:
print(f"異構列表轉換為自定義列表類型在 strict mode 下失敗: {e}")
非 Pydantic 的 lax mode vs. strict mode
轉換 | lax mode | strict mode | 示例程式碼 |
---|---|---|---|
數字 → 布林值 | ![]() |
![]() |
result = bool(0) # lax: False, strict: False |
非零數字 → 布林值 | ![]() |
![]() |
result = bool(1) # lax: True, strict: True |
字串 → 數字 | ![]() |
![]() |
result = int("42") # lax: 42, strict: 42 |
布林值 → 數字 | ![]() |
![]() |
result = int(True) # lax: 1, strict: 1 |
None → 數字 | ![]() |
![]() |
result = int(None) # lax: 0, strict: 0 |
字符串 → 布林值 | ![]() |
![]() |
result = bool("") # lax: False, strict: False |
物件 → 布林值 | ![]() |
![]() |
result = bool(object()) # lax: True, strict: TypeError |
非空字串 → 布林值 | ![]() |
![]() |
result = bool("hello") # lax: True, strict: True |
字串 → 數字 (非數字字串) | ![]() |
![]() |
result = int("hello") # lax: ValueError, strict: TypeError |
字串 → 數字 (包含非數字字元) | ![]() |
![]() |
result = int("42.3a") # lax: ValueError, strict: TypeError |
字串 → 物件 | ![]() |
![]() |
result = object("hello") # lax: <object obj>, strict: TypeError |
數字 → 物件 | ![]() |
![]() |
result = object(42) # lax: <object obj>, strict: TypeError |
布林值 → 物件 | ![]() |
![]() |
result = object(True) # lax: <object obj>, strict: TypeError |
JavaScript 的 lax mode vs. strict mode
轉換 | lax mode | strict mode | 示例程式碼 |
---|---|---|---|
數字 → 布林值 | ![]() |
![]() |
const result = 0 ? false : true; // lax: false, strict: false |
非零數字 → 布林值 | ![]() |
![]() |
const result = 1 ? true : false; // lax: true, strict: true |
字串 → 數字 | ![]() |
![]() |
const result = +'42'; // lax: 42, strict: 42 |
布林值 → 數字 | ![]() |
![]() |
const result = +true; // lax: 1, strict: 1 |
null → 數字 | ![]() |
![]() |
const result = +null; // lax: 0, strict: 0 |
undefined → 數字 | ![]() |
![]() |
const result = +undefined; // lax: NaN, strict: NaN |
物件 → 數字 | ![]() |
![]() |
const result = +{}; // lax: NaN, strict: NaN |
字符串 → 布林值 | ![]() |
![]() |
const result = !!''; // lax: false, strict: false |
物件 → 布林值 | ![]() |
![]() |
const result = !!{}; // lax: true, strict: true |
非空字串 → 布林值 | ![]() |
![]() |
const result = !!'hello'; // lax: true, strict: true |
undefined/null → 布林值 | ![]() |
![]() |
const result = !!undefined; // lax: false, strict: false |
字串 → 物件 (使用包裝類Wrapper Class) | ![]() |
![]() |
const result = new String('hello'); // lax: String {'hello'}, strict: TypeError |
數字 → 物件 (使用包裝類Wrapper Class) | ![]() |
![]() |
const result = new Number(42); // lax: Number {42}, strict: TypeError |
布林值 → 物件 (使用包裝類Wrapper Class) | ![]() |
![]() |
const result = new Boolean(true); // lax: Boolean {true}, strict: TypeError |
ChatGPT 的答案有誤,隱藏起來
資料型別 | Lax Mode 轉換 | Strict Mode 轉換 | 說明 |
---|---|---|---|
整數 (int) | ![]() ExampleModel(value=10) |
![]() StrictExampleModel(value=10) |
整數可以強制轉換成整數。 |
整數 (int) | ![]() ValidationError: 1 validation error for ExampleModel |
![]() ValidationError: 1 validation error for StrictExampleModel |
非整數無法強制轉換成整數,除非是浮點數且值相同。 |
浮點數 (float) | ![]() ExampleModel(value=10.5) |
![]() StrictExampleModel(value=10.5) |
浮點數可以強制轉換成浮點數。 |
浮點數 (float) | ![]() ExampleModel(value=10.5) |
![]() ValidationError: 1 validation error for StrictExampleModel |
非浮點數無法強制轉換成浮點數。 |
字串 (str) | ![]() ExampleModel(value='hello') |
![]() StrictExampleModel(value='hello') |
字串可以強制轉換成字串。 |
字串 (str) | ![]() ExampleModel(value='hello') |
![]() ValidationError: 1 validation error for StrictExampleModel |
非字串無法強制轉換成字串。 |
布林值 (bool) | ![]() ExampleModel(value=True) |
![]() StrictExampleModel(value=True) |
布林值可以強制轉換成布林值。 |
布林值 (bool) | ![]() ExampleModel(value=True) |
![]() ValidationError: 1 validation error for StrictExampleModel |
非布林值無法強制轉換成布林值。 |
列表 (list) | ![]() ExampleModel(value=[1, 2, 3]) |
![]() StrictExampleModel(value=[1, 2, 3]) |
列表可以強制轉換成列表。 |
列表 (list) | ![]() ExampleModel(value=[1, 2, 3]) |
![]() ValidationError: 1 validation error for StrictExampleModel |
非列表無法強制轉換成列表。 |
字典 (dict) | ![]() ExampleModel(value={'key': 'value'}) |
![]() StrictExampleModel(value={'key': 'value'}) |
字典可以強制轉換成字典。 |
字典 (dict) | ![]() ExampleModel(value={'key': 'value'}) |
![]() ValidationError: 1 validation error for StrictExampleModel |
非字典無法強制轉換成字典。 |
元組 (tuple) | ![]() ExampleModel(value=(1, 2, 3)) |
![]() StrictExampleModel(value=(1, 2, 3)) |
元組可以強制轉換成元組。 |
元組 (tuple) | ![]() ExampleModel(value=(1, 2, 3)) |
![]() ValidationError: 1 validation error for StrictExampleModel |
非元組無法強制轉換成元組。 |
集合 (set) | ![]() ExampleModel(value={1, 2, 3}) |
![]() StrictExampleModel(value={1, 2, 3}) |
集合可以強制轉換成集合。 |
集合 (set) | ![]() ExampleModel(value={1, 2, 3}) |
![]() ValidationError: 1 validation error for StrictExampleModel |
非集合無法強制轉換成集合。 |
None | ![]() ExampleModel(value=None) |
![]() StrictExampleModel(value=None) |
None 可以強制轉換成 None。 |
None | ![]() ExampleModel(value=None) |
![]() ValidationError: 1 validation error for StrictExampleModel |
非 None 值無法強制轉換成 None。 |
自訂資料型別 | 自定義模型的實例 | 自定義模型的實例 | 其他自訂資料型別可以強制轉換成該資料型別。 |
自訂資料型別 | ![]() ValidationError: 1 validation error for ExampleModel |
![]() ValidationError: 1 validation error for StrictExampleModel |
非指定資料型別無法強制轉換成該資料型別。 |
預期外的 BUG
預期外的 BUG 是指,Pydantic model 會在初始化的時候幫你檢查,如果資料型別不對,在其能力所及的情況下會幫你強制轉換。
但他可沒料到你之後閒閒沒事做會亂搞,自已又把資料型別改掉。
為了省下檢查時間,提昇程式執行效率,Pydantic 之後不會再做資料型別檢查。(不過出錯也很容易發現就是)
▌Required vs Optional Fields
本講座介紹參數預設值。
from pydantic import BaseModel
class Circle(BaseModel):
center: tuple[int, int] = (0, 0)
radius: int
Circle.model_fields
輸出:
{'center': FieldInfo(annotation=tuple[int, int], required=False, default=(0, 0)),
'radius': FieldInfo(annotation=int, required=True)}
預期外的 BUG
為加速程式執行,系統假設程式設計知道自己在寫什麼,不做檢查。
請自行對 預設值 負責。
from pydantic import BaseModel
class Circle2(BaseModel):
center: tuple[int, int] = "junk"
radius: int
Circle2.model_fields
輸出:
{'center': FieldInfo(annotation=tuple[int, int], required=False, default='junk'),
'radius': FieldInfo(annotation=int, required=True)}
Circle2(radius = 2)
輸出:
Circle2(center='junk', radius=2)
錯誤程式示範
compiled(
) vs. called(
)
用 mutable object 當預設值
Each time a data class instance is created, and that function will therefore return a new default object
deep copy of the mutable object every time a new model instance is created.
▌Nullable Fields
from pydantic import BaseModel, ValidationError
-class Model(BaseModel):
- field: int
+class Model(BaseModel):
+ field: int | None
class Model(BaseModel):
field: int | None = None
較早期版本的 Python
以下兩個範例。推薦前者;後者名稱關係,容易混淆。
from typing import Union
class Model(BaseModel):
field: Union[int, None]
from typing import Optional
class Model(BaseModel):
field: Optional[int] = None
其實是一樣的
class Model(BaseModel):
field_1: int | None
field_2: Union[int, None]
field_3: Optional[int]
Model.model_fields
輸出:
{'field_1': FieldInfo(annotation=Union[int, NoneType], required=True),
'field_2': FieldInfo(annotation=Union[int, NoneType], required=True),
'field_3': FieldInfo(annotation=Union[int, NoneType], required=True)}
常見錯誤
class Model(BaseModel):
field: int = None
m = Model()
m
輸出:
Model(field=None)
▌Combining Nullable and Optional
Required / Optional
Nullable / Not nullable
基本上就這四種情形
Not nullable | Nullable | |
---|---|---|
Required | ![]() |
![]() |
Optional | ![]() |
![]() |
給預設值,就是 Optional
Nullable,就是可設為 None
Required, Not Nullable
from pydantic import BaseModel, ValidationError
class Model(BaseModel):
field: int
try:
Model()
except ValidationError as ex:
print(ex)
1 validation error for Model
field
Field required [type=missing, input_value={}, input_type=dict]
For further information visit https://errors.pydantic.dev/2.6/v/missing
try:
Model(field=None)
except ValidationError as ex:
print(ex)
1 validation error for Model
field
Input should be a valid integer [type=int_type, input_value=None, input_type=NoneType]
For further information visit https://errors.pydantic.dev/2.6/v/int_type
Required, Nullable
from pydantic import BaseModel, ValidationError
class Model(BaseModel):
field: int | None
try:
Model()
except ValidationError as ex:
print(ex)
1 validation error for Model
field
Field required [type=missing, input_value={}, input_type=dict]
For further information visit https://errors.pydantic.dev/2.6/v/missing
Model(field=None)
Model(field=None)
Optional, Not Nullable
from pydantic import BaseModel, ValidationError
class Model(BaseModel):
field: int = 0
Model()
Model(field=0)
try:
Model(field=None)
except ValidationError as ex:
print(ex)
1 validation error for Model
field
Input should be a valid integer [type=int_type, input_value=None, input_type=NoneType]
For further information visit https://errors.pydantic.dev/2.6/v/int_type
Optional, Nullable
from pydantic import BaseModel, ValidationError
class Model(BaseModel):
field: int | None = None
Model()
Model(field=None)
Model(field=None)
Model(field=None)
▌Inspecting Fields
觀看 Fields 資料的幾個重點:
-
透過 model_fields
-
透過 instance
-
該資料是填入值還是預設值?
以下分別為示範程式:
透過 model_fields
from pydantic import BaseModel
class Circle(BaseModel):
center_x: int = 0
center_y: int = 0
radius: int = 1
name: str | None = None
Circle.model_fields
{'center_x': FieldInfo(annotation=int, required=False, default=0),
'center_y': FieldInfo(annotation=int, required=False, default=0),
'radius': FieldInfo(annotation=int, required=False, default=1),
'name': FieldInfo(annotation=Union[str, NoneType], required=False, default=None)}
透過 instance
c1 = Circle(radius=2)
c2 = Circle(name="Unit circle")
c1, c2
(Circle(center_x=0, center_y=0, radius=2, name=None),
Circle(center_x=0, center_y=0, radius=1, name='Unit circle'))
c1.model_dump(), c2.model_dump()
({'center_x': 0, 'center_y': 0, 'radius': 2, 'name': None},
{'center_x': 0, 'center_y': 0, 'radius': 1, 'name': 'Unit circle'})
該資料是填入值還是預設值?
1. 資料是填入值
model_fields_set
c1.model_fields_set, c2.model_fields_set
({'radius'}, {'name'})
2. 資料是預設值
ChatGPT 和 Claude 都是用廻圈比對
__fields__
是否預設的解法。老師的解法漂亮多了,一行搞定,簡潔易懂。
c1.model_fields.keys() - c1.model_fields_set
{'center_x', 'center_y', 'name'}
看看 Claude 的進一步說明:
這三種寫法的確都能正確地得到具有預設值且未被明確賦值的欄位名稱集合,但它們在執行效率和可讀性上有一些區別:
c1.model_fields.keys() - c1.model_fields_set
這種方式有一個潛在的問題。c1.model_fields.keys()
返回的是一個dict_keys
物件,不是一個真正的集合。而dict_keys
物件在執行減法運算時,Python 需要先將它轉換為集合,然後才能進行集合運算。這個隱式的類型轉換會稍微降低執行效率。
set(c1.model_fields.keys()) - set(c1.model_fields_set)
這種寫法解決了上述問題。它先將c1.model_fields.keys()
和c1.model_fields_set
都яв式地轉換為集合,然後再執行集合減法運算。執行效率略高於第一種寫法,但多了兩次set()
轉換的開銷。不過,這種寫法最清晰地展現了我們的本意 - 在集合之間做減法運算。
set(c1.model_fields) - c1.model_fields_set
這是最精簡高效的寫法。c1.model_fields
直接返回一個字典,包含該模型所有的欄位定義。我們將這個字典轉換為集合,再減去c1.model_fields_set
集合,就能直接得到結果。只需一次轉換,沒有任何多餘的操作,執行效率最高。
雖然第三種寫法在執行效率上最優,但我個人認為第二種寫法更容易理解和維護。它清楚地展現了我們的本意,讀者不需要深入了解model_fields
的內部實現細節,也能輕易地理解程式碼的作用。
因此,如果是在一個需要追求極致效率的場景,第三種寫法會是最佳選擇。但在大多數一般情況下,我會選擇第二種寫法,權衡執行效率和可讀性後,它或許是最合適的。這只是我的個人觀點,沒有絕對的對錯,需視具體情況而定。
排除 JSON API 的預設值欄位
示範序列化模型時,排除預設值欄位,僅包含從反序列化資料中實際設定的欄位。
mm.model_dump(include = mm.model_fields_set)
model_dump
是 Pydantic 模型實例上的一個方法,它可以將模型實例序列化為一個字典。
include
參數則允許我們指定要包含哪些欄位。
通過傳遞 model_fields_set
給 include
參數,可以輕鬆地僅包含從反序列化資料中實際設定的欄位,排除所有預設值欄位。
from pydantic import BaseModel
class Model(BaseModel):
field_1: int = 1
field_2: int | None = None
field_3: str
field_4: str | None = "Python"
m1 = Model(field_3="m1")
m2 = Model(field_1=1, field_2=None, field_3="m2", field_4="Python")
m3 = Model(field_1=10, field_2=20, field_3="m3", field_4="Pydantic")
print("m1:", m1.model_dump())
print("m1(model_fields_set):", m1.model_dump(include=m1.model_fields_set))
print("m2(model_fields_set):", m2.model_dump(include=m2.model_fields_set))
print("m3(model_fields_set):", m3.model_dump(include=m3.model_fields_set))
m1: {'field_1': 1, 'field_2': None, 'field_3': 'm1', 'field_4': 'Python'}
m1(model_fields_set): {'field_3': 'm1'}
m2(model_fields_set): {'field_1': 1, 'field_2': None, 'field_3': 'm2', 'field_4': 'Python'}
m3(model_fields_set): {'field_1': 10, 'field_2': 20, 'field_3': 'm3', 'field_4': 'Pydantic'}
▌JSON Schema Generation
Pydantic 可以從資料模型自動生成相應的 JSON Schema。
JSON Schema 是一種用來描述和驗證 JSON 資料格式的規範。它通過提供一組詳細的結構和語義規則,可以有效地確保 JSON 資料遵守預定的格式。使用 JSON Schema 可以增強資料驗證的強度和靈活性,並提升互通性。
很簡單,直接呼叫 Model 的 model_json_schema() 即可。
呼叫後將返回一個字典(dict),內容為 JSON 結構的定義。
from pydantic import BaseModel
from pprint import pprint # pretty print
class User(BaseModel):
name: str
age: int
email: str
pprint(User.model_json_schema())
{'properties': {'age': {'title': 'Age', 'type': 'integer'},
'email': {'title': 'Email', 'type': 'string'},
'name': {'title': 'Name', 'type': 'string'}},
'required': ['name', 'age', 'email'],
'title': 'User',
'type': 'object'}
加上預設值,再試一次。是不是超方便?
from pydantic import BaseModel
from pprint import pprint # pretty print
class User(BaseModel):
name: str = "James Bond"
age: int | None = None
email: str
pprint(User.model_json_schema())
{'properties': {'age': {'anyOf': [{'type': 'integer'}, {'type': 'null'}],
'default': None,
'title': 'Age'},
'email': {'title': 'Email', 'type': 'string'},
'name': {'default': 'James Bond',
'title': 'Name',
'type': 'string'}},
'required': ['email'],
'title': 'User',
'type': 'object'}
參考資料:
model_json_schema(
by_alias: bool = True,
ref_template: str = DEFAULT_REF_TEMPLATE,
schema_generator: type[
GenerateJsonSchema
] = GenerateJsonSchema,
mode: JsonSchemaMode = "validation",
) -> dict[str, Any]
參數
本節範例使用預設值
Name | Type | Description | Default |
---|---|---|---|
by_alias |
bool |
Whether to use attribute aliases or not. | True |
ref_template |
str |
The reference template. | DEFAULT_REF_TEMPLATE |
schema_generator |
type[GenerateJsonSchema] |
To override the logic used to generate the JSON schema, as a subclass of GenerateJsonSchema with your desired modifications |
GenerateJsonSchema |
mode |
JsonSchemaMode |
The mode in which to generate the schema. | 'validation' |
回傳值
Type | Description |
---|---|
dict[str, Any] |
The JSON schema for the given model class. |
@classmethod
def model_json_schema(
cls,
by_alias: bool = True,
ref_template: str = DEFAULT_REF_TEMPLATE,
schema_generator: type[GenerateJsonSchema] = GenerateJsonSchema,
mode: JsonSchemaMode = 'validation',
) -> dict[str, Any]:
"""Generates a JSON schema for a model class.
Args:
by_alias: Whether to use attribute aliases or not.
ref_template: The reference template.
schema_generator: To override the logic used to generate the JSON schema, as a subclass of
`GenerateJsonSchema` with your desired modifications
mode: The mode in which to generate the schema.
Returns:
The JSON schema for the given model class.
"""
return model_json_schema(
cls, by_alias=by_alias, ref_template=ref_template, schema_generator=schema_generator, mode=mode
)
本網址已更新,和老師講的略有不同,可提供正確錨點。
JSON Schema 可以客制化,老師說這很花時間,本課程不涵蓋。但之後會有進階介紹。
▌Project
本專案的目的:創建模型,並測試該模型的反序列化和序列化
創建模型:熟悉 預設值、可選或必填欄位、可否空值等資料填寫練習。
dict: model_validate(data)
JSON: model_validate_json(data_json)
本程式使用 Pydantic 來定義一個名為 Automobile
的模型,包含了製造商名稱、車系名稱、汽車類型、是否為電動車、製造日期、基本建議售價、車輛識別號碼、車門數量、註冊國家和車牌號碼等欄位。
程式中使用了 Pydantic 的 Field
類別來設定每個欄位的預設值、是否為必填欄位,以及提供描述說明。
最後,程式測試了從 Python 字典和 JSON 字串中反序列化成 Automobile
模型實例,並驗證序列化後的結果是否與預期相符。
from datetime import date
from pydantic import BaseModel, ValidationError
# Python dictionary
from datetime import date
data = {
"manufacturer": "BMW",
"series_name": "M4",
"type_": "Convertible",
"is_electric": False,
"manufactured_date": "2023-01-01",
"base_msrp_usd": 93_300,
"vin": "1234567890",
"number_of_doors": 2,
"registration_country": "France",
"license_plate": "AAA-BBB",
}
data_expected_serialization = {
'manufacturer': 'BMW',
'series_name': 'M4',
'type_': 'Convertible',
'is_electric': False,
'manufactured_date': date(2023,1,1),
'base_msrp_usd': 93_300,
'vin': '1234567890',
'number_of_doors': 2,
'registration_country': 'France',
'license_plate': 'AAA-BBB',
}
# JSON
data_json = '''
{
"manufacturer": "BMW",
"series_name": "M4",
"type_": "Convertible",
"manufactured_date": "2023-01-01",
"base_msrp_usd": 93300,
"vin": "1234567890"
}
'''
data_json_expected_serialization = {
'manufacturer': 'BMW',
'series_name': 'M4',
'type_': 'Convertible',
'is_electric': False,
'manufactured_date': date(2023, 1, 1),
'base_msrp_usd': 93_300,
'vin': '1234567890',
'number_of_doors': 4,
'registration_country': None,
'license_plate': None,
}
class Automobile(BaseModel):
manufacturer: str
series_name: str
type_: str
is_electric: bool = False
manufactured_date: date
base_msrp_usd: float
vin: str
number_of_doors: int = 4
registration_country: str | None = None
license_plate: str | None = None
# deserializing and comparing it to an expected serialization output.
car1 = Automobile.model_validate(data)
assert car1.model_dump() == data_expected_serialization
# JSON deserialization
car2 = Automobile.model_validate_json(data_json)
assert car2.model_dump() == data_json_expected_serialization
BaseModel 中的屬性及函式
Pydantic 的 BaseModel
中的屬性及函式。
標黃色部分為本次使用到的函式。
函數名 | 功能說明 |
---|---|
__init__ |
初始化 BaseModel 實例。接受模型定義中的欄位值作為參數。 |
model_config |
返回模型的配置設置,如允許的數據類型、驗證規則等。 |
model_computed_fields |
返回模型中定義的 computed 欄位。 |
model_extra |
返回模型中定義的額外欄位。 |
model_fields |
返回模型中定義的所有欄位。 |
model_fields_set |
返回模型中定義的所有欄位名稱集合。 |
model_construct |
繞過驗證創建模型實例。通常與 from_orm 結合使用。 |
model_copy |
返回模型實例的深拷貝。 |
model_dump |
將模型實例序列化為特定格式(如 JSON)。 |
model_dump_json |
將模型實例序列化為 JSON 字符串。 |
model_json_schema |
返回模型的 JSON 模式定義。 |
model_parametrized_name |
返回模型的參數化名稱。 |
model_post_init |
在模型實例初始化後調用的方法。 |
model_rebuild |
重建模型實例,即從模型定義重新創建一個新實例。 |
model_validate |
驗證模型實例的數據是否符合定義的規則。 |
model_validate_json |
驗證 JSON 數據是否符合模型定義。 |