Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions src/marshmallow/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,44 @@ def _deserialize(self, value, attr, data, **kwargs) -> str:
raise self.make_error("invalid_utf8") from error


class Bytes(Field[bytes]):
Comment thread
rrad5409 marked this conversation as resolved.
"""
Marshmallow field type for any bytes array.
"""
Comment thread
rrad5409 marked this conversation as resolved.

def _deserialize(
self,
value: typing.Any,
attr: str | None,
data: typing.Mapping[str, typing.Any] | None,
**kwargs: typing.Any,
) -> bytes:
try:
match value:
case bytes() as b:
return b
case bytearray() as ba:
return bytes(ba)
case str() as s:
return bytes(
s,
encoding="utf-8",
errors="ignore",
Comment thread
rrad5409 marked this conversation as resolved.
Outdated
)
case int() as i:
return i.to_bytes(
length=max(1, (7 + i.bit_length()) // 8),
byteorder="big",
signed=i < 0,
)
Comment on lines +929 to +934
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Marshmallow often leans on the builtin constructor for type coercion to keep the implementation simple, then excludes types that might be confusing.

if isinstance(value, (bool, int)):
    raise ...
try:
    return bytes(value)
except TypeError:
    ...

Big int to bytes is probably out of scope.

Copy link
Copy Markdown
Author

@rrad5409 rrad5409 Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that YAML doesn't natively support writing a bytes value using hex/octal/binary notation, only encoded base64 strings. So I'm not able to write val: 0xDEADBEEF, get that deserialised to an int, and then convert it to bytes in the field. (sorry I made an error in my sample earlier, the header should have been an int not `bytes)

Especially when writing binary-related values (like registers, addresses, bitmasks etc), writing it as 0xABAA or 0b11001010 is far more convenient to write and more understandable.

The same reasoning applies to str - being forced to encode a string using base64 is pure inconvenience, when I could just write something: "Hello World".

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i also lean towards omitting this. i plan to finish up and release field-level pre_load soon, which should simplify your use case here.

  def int_to_bytes(value):
      if isinstance(value, int):
          return value.to_bytes(
              length=max(1, (7 + value.bit_length()) // 8),
              byteorder="big",
              signed=value < 0,
          )
      return value

class MySchema(Schema):
    foo = fields.Bytes(pre_load=int_to_bytes)

so to build off of @deckar01's suggestion, i think the final implementation should look something like:

  def _deserialize(self, value, attr, data, **kwargs) -> bytes:
      if isinstance(value, (bool, int)):
          raise self.make_error("invalid")
      if isinstance(value, str):
          try:
              return value.encode("utf-8")
          except UnicodeEncodeError as error:
              raise self.make_error("invalid") from error
      try:
          return bytes(value)
      except TypeError as error:
          raise self.make_error("invalid") from error

case obj:
if isinstance(obj, (typing.SupportsBytes, typing.Iterable)):
return bytes(obj)
raise ValidationError("not a bytes-like object")
Comment thread
rrad5409 marked this conversation as resolved.
Outdated
except TypeError as e:
raise ValidationError("not a bytes-like object") from e


class UUID(Field[uuid.UUID]):
"""A UUID field."""

Expand Down
17 changes: 17 additions & 0 deletions tests/test_deserialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,23 @@ def test_string_field_deserialization(self):
with pytest.raises(ValidationError):
field.deserialize({})

def test_bytes_field_deserialization(self):
field = fields.Bytes()
assert field.deserialize(b"foo") == b"foo"
assert field.deserialize(bytearray(b"foo")) == b"foo"
assert field.deserialize("foo") == b"foo"
assert field.deserialize(0xDEAD) == b"\xde\xad"
assert field.deserialize([0xBE, 0xEF]) == b"\xbe\xef"
assert field.deserialize((0xB, 0xA, 0xB, 0xE)) == b"\x0b\x0a\x0b\x0e"

with pytest.raises(ValidationError) as excinfo:
field.deserialize({"hi": 222})
assert excinfo.value.args[0] == "not a bytes-like object"

with pytest.raises(ValidationError) as excinfo:
field.deserialize(["12345"])
assert excinfo.value.args[0] == "not a bytes-like object"

def test_boolean_field_deserialization(self):
field = fields.Boolean()
assert field.deserialize(True) is True
Expand Down