Fix combinatorial explosion in DSDL compiler (#164)
* Adopt https://github.com/UAVCAN/pydsdl/pull/66
* Add complex DSDL type to catch regressions
* Do not unroll (de)serialization of fixed-length arrays
* Update Nunavut
diff --git a/pyuavcan/VERSION b/pyuavcan/VERSION
index 0495c4a..e8ea05d 100644
--- a/pyuavcan/VERSION
+++ b/pyuavcan/VERSION
@@ -1 +1 @@
-1.2.3
+1.2.4
diff --git a/pyuavcan/application/heartbeat_publisher.py b/pyuavcan/application/heartbeat_publisher.py
index 348b205..0035496 100644
--- a/pyuavcan/application/heartbeat_publisher.py
+++ b/pyuavcan/application/heartbeat_publisher.py
@@ -44,7 +44,7 @@
VENDOR_SPECIFIC_STATUS_CODE_MASK = (
- 2 ** list(pyuavcan.dsdl.get_model(Heartbeat)["vendor_specific_status_code"].data_type.bit_length_set)[0] - 1
+ 2 ** pyuavcan.dsdl.get_model(Heartbeat)["vendor_specific_status_code"].data_type.bit_length_set.max - 1
)
diff --git a/pyuavcan/application/plug_and_play.py b/pyuavcan/application/plug_and_play.py
index e880cb6..a565722 100644
--- a/pyuavcan/application/plug_and_play.py
+++ b/pyuavcan/application/plug_and_play.py
@@ -27,10 +27,10 @@
_PSEUDO_UNIQUE_ID_MASK = (
- 2 ** list(pyuavcan.dsdl.get_model(NodeIDAllocationData_1)["unique_id_hash"].data_type.bit_length_set)[0] - 1
+ 2 ** pyuavcan.dsdl.get_model(NodeIDAllocationData_1)["unique_id_hash"].data_type.bit_length_set.max - 1
)
-_NODE_ID_MASK = 2 ** max(pyuavcan.dsdl.get_model(ID)["value"].data_type.bit_length_set) - 1
+_NODE_ID_MASK = 2 ** pyuavcan.dsdl.get_model(ID)["value"].data_type.bit_length_set.max - 1
_UNIQUE_ID_SIZE_BYTES = pyuavcan.application.NodeInfo().unique_id.size
@@ -63,7 +63,7 @@
DEFAULT_PRIORITY = pyuavcan.transport.Priority.SLOW
- _MTU_THRESHOLD = max(pyuavcan.dsdl.get_model(NodeIDAllocationData_2).bit_length_set) // 8
+ _MTU_THRESHOLD = pyuavcan.dsdl.get_model(NodeIDAllocationData_2).bit_length_set.max // 8
def __init__(
self,
diff --git a/pyuavcan/dsdl/_templates/deserialization.j2 b/pyuavcan/dsdl/_templates/deserialization.j2
index eea0663..691a64e 100644
--- a/pyuavcan/dsdl/_templates/deserialization.j2
+++ b/pyuavcan/dsdl/_templates/deserialization.j2
@@ -10,7 +10,7 @@
{% set t = self.inner_type %}
{% if t is StructureType %}
{% set field_ref_map = {} %}
- {% for f, offset in t.iterate_fields_with_offsets(0|bit_length_set) %}
+ {% for f, offset in t.iterate_fields_with_offsets() %}
{% if f is not padding %}
{% set field_ref = 'f'|to_template_unique_name %}
{% do field_ref_map.update({f: field_ref}) %}
@@ -33,7 +33,7 @@
{% elif t is UnionType %}
{% set tag_ref = 'tag'|to_template_unique_name %}
{{ _deserialize_integer(t.tag_field_type, tag_ref, 0|bit_length_set) }}
- {% for f, offset in t.iterate_fields_with_offsets(0|bit_length_set) %}
+ {% for f, offset in t.iterate_fields_with_offsets() %}
{# We generate new temporary for each variant to prevent MyPy from complaining. #}
{% set field_ref = 'uni'|to_template_unique_name %}
{{ 'if' if loop.first else 'elif' }} {{ tag_ref }} == {{ loop.index0 }}:
@@ -45,7 +45,7 @@
{% else %}{% assert False %}{# Delimited type is not expected in this context. #}
{% endif %}
_des_.pad_to_alignment({{ self.alignment_requirement }})
- assert {{ t.bit_length_set|min }} <= (_des_.consumed_bit_length - _base_offset_) <= {{ t.bit_length_set|max }}, \
+ assert {{ t.bit_length_set.min }} <= (_des_.consumed_bit_length - _base_offset_) <= {{ t.bit_length_set.max }}, \
'Bad deserialization of {{ self }}'
{%- endmacro %}
@@ -67,13 +67,17 @@
{{ ref }} = _des_.fetch_{{ offset|alignment_prefix -}}
_array_of_standard_bit_length_primitives({{ t.element_type|numpy_scalar_type }}, {{ t.capacity }})
{% else %}
+ {# Element offset is the superposition of each individual element offsets plus the array's own offset.
+ # For example, an array like uint8[3] offset by 16 bits would have its element_offset = {16, 24, 32}.
+ # We can also unroll element deserialization for small arrays (e.g., below ~10 elements) to take advantage of
+ # spurious alignment of elements but the benefit of such optimization is believed to be negligible. #}
+ {% set element_offset = offset + t.element_type.bit_length_set.repeat_range(t.capacity - 1) %}
{% set element_ref = 'e'|to_template_unique_name %}
- # Unrolled fixed-length array: {{ t }}; the temporary {{ element_ref }} is used for element storage.
+ {% set index_ref = 'i'|to_template_unique_name %}
{{ ref }} = _np_.empty({{ t.capacity }}, {{ t.element_type|numpy_scalar_type }})
- {% for index, element_offset in t.enumerate_elements_with_offsets(offset) %}
- {{ _deserialize_any(t.element_type, element_ref, element_offset) }}
- {{ ref }}[{{ index }}] = {{ element_ref }}
- {% endfor %}
+ for {{ index_ref }} in range({{ t.capacity }}):
+ {{ _deserialize_any(t.element_type, element_ref, element_offset)|indent }}
+ {{ ref }}[{{ index_ref }}] = {{ element_ref }}
{% endif %}
assert len({{ ref }}) == {{ t.capacity }}, '{{ t }}'
{% endmacro %}
diff --git a/pyuavcan/dsdl/_templates/serialization.j2 b/pyuavcan/dsdl/_templates/serialization.j2
index aadf8d2..5ed48e8 100644
--- a/pyuavcan/dsdl/_templates/serialization.j2
+++ b/pyuavcan/dsdl/_templates/serialization.j2
@@ -9,11 +9,11 @@
_base_offset_ = _ser_.current_bit_length
{% set t = self.inner_type %}
{% if t is StructureType %}
- {% for f, offset in t.iterate_fields_with_offsets(0|bit_length_set) %}
+ {% for f, offset in t.iterate_fields_with_offsets() %}
{{ _serialize_any(f.data_type, 'self.' + (f|id), offset) }}
{% endfor %}
{% elif t is UnionType %}
- {% for f, offset in t.iterate_fields_with_offsets(0|bit_length_set) %}
+ {% for f, offset in t.iterate_fields_with_offsets() %}
{% set field_ref = 'self.' + (f|id) %}
{{ 'if' if loop.first else 'elif' }} {{ field_ref }} is not None: # Union tag {{ loop.index0 }}
{{ _serialize_integer(t.tag_field_type, loop.index0|string, 0|bit_length_set)|indent }}
@@ -24,7 +24,7 @@
{% else %}{% assert False %}{# Delimited type is not expected in this context. #}
{% endif %}
_ser_.pad_to_alignment({{ self.alignment_requirement }})
- assert {{ t.bit_length_set|min }} <= (_ser_.current_bit_length - _base_offset_) <= {{ t.bit_length_set|max }}, \
+ assert {{ t.bit_length_set.min }} <= (_ser_.current_bit_length - _base_offset_) <= {{ t.bit_length_set.max }}, \
'Bad serialization of {{ self }}'
{%- endmacro %}
@@ -79,10 +79,15 @@
{% elif t.element_type is PrimitiveType and t.element_type.standard_bit_length %}
_ser_.add_{{ offset|alignment_prefix -}}_array_of_standard_bit_length_primitives({{ ref }})
{% else %}
- # Unrolled fixed-length array: {{ t }}
- {% for index, element_offset in t.enumerate_elements_with_offsets(offset) %}
- {{ _serialize_any(t.element_type, '%s[%d]'|format(ref, index), element_offset) }}
- {% endfor %}
+ {# Element offset is the superposition of each individual element offsets plus the array's own offset.
+ # For example, an array like uint8[3] offset by 16 bits would have its element_offset = {16, 24, 32}.
+ # We can also unroll element serialization for small arrays (e.g., below ~10 elements) to take advantage of
+ # spurious alignment of elements but the benefit of such optimization is believed to be negligible. #}
+ {% set element_offset = offset + t.element_type.bit_length_set.repeat_range(t.capacity - 1) %}
+ {% set element_ref = 'elem'|to_template_unique_name %}
+ # Element offset: {{ element_offset }}
+ for {{ element_ref }} in {{ ref }}:
+ {{ _serialize_any(t.element_type, element_ref, element_offset)|indent }}
{% endif %}
{% endmacro %}
@@ -120,7 +125,7 @@
{%- elif t is VariableLengthArrayType -%} {{ _serialize_variable_length_array(t, ref, offset) }}
{%- elif t is CompositeType -%}
{% if t is DelimitedType %}
- {% if (t.inner_type.bit_length_set | length) > 1 %}
+ {% if not t.inner_type.bit_length_set.fixed_length %}
{# Instead of the outer extent, we use the inner extent, which equals the max bit length and is a
# tighter bound than the user-defined extent.
# This is safe because when serializing we always know the concrete type.
@@ -136,14 +141,14 @@
{{ ref }}._serialize_(_nested_)
_nested_length_ = _nested_.current_bit_length - {{ t.delimiter_header_type.bit_length }}
del _nested_
- assert {{ t.inner_type.bit_length_set|min }} <= _nested_length_ <= {{ t.inner_type.bit_length_set|max }}
+ assert {{ t.inner_type.bit_length_set.min }} <= _nested_length_ <= {{ t.inner_type.bit_length_set.max }}
assert _nested_length_ % 8 == 0
_ser_.add_aligned_u32(_nested_length_ // 8) # Jump back and serialize the delimiter header.
_ser_.skip_bits(_nested_length_) # Return to the current offset.
{% else %}
{# Optional optimization: if the nested object is fixed-length, no need to fork the serializer. #}
- {% set length_bits = t.inner_type.bit_length_set | first %}
- {% assert [length_bits] == t.inner_type.bit_length_set | list %}
+ {% set length_bits = t.inner_type.bit_length_set.max %}
+ {% assert length_bits == t.inner_type.bit_length_set.min %}
{% assert length_bits % 8 == 0 %}
{% set length_bytes = length_bits // 8 %}
# Delimited serialization of {{ t }}, fixed bit length {{ length_bits }} ({{ length_bytes }} bytes)
diff --git a/setup.cfg b/setup.cfg
index af60085..d43d297 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -56,7 +56,7 @@
# Think thrice before adding anything here, please.
# The preferred long-term plan is to avoid adding any new required dependencies whatsoever for the project's lifetime.
install_requires =
- nunavut ~= 1.0
+ nunavut ~= 1.1
numpy ~= 1.17, < 1.20
# TODO: allow NumPy v1.20 -- requires fixing type annotations and many meaningless false-positives. No runtime effects.
diff --git a/tests/dsdl/_builtin_form.py b/tests/dsdl/_builtin_form.py
index 3891f23..38d8552 100644
--- a/tests/dsdl/_builtin_form.py
+++ b/tests/dsdl/_builtin_form.py
@@ -102,7 +102,7 @@
def _unittest_slow_builtin_form_automatic(compiled: typing.List[pyuavcan.dsdl.GeneratedPackageInfo]) -> None:
for info in compiled:
for model in _util.expand_service_types(info.models):
- if max(model.bit_length_set) / 8 > 1024 * 1024:
+ if model.bit_length_set.max / 8 > 1024 * 1024:
_logger.info("Automatic test of %s skipped because the type is too large", model)
continue # Skip large objects because they take forever to convert and test
diff --git a/tests/dsdl/_rand.py b/tests/dsdl/_rand.py
index 54cb2fa..07554fc 100644
--- a/tests/dsdl/_rand.py
+++ b/tests/dsdl/_rand.py
@@ -162,8 +162,12 @@
def _make_random_fragmented_serialized_representation(bls: pydsdl.BitLengthSet) -> typing.Sequence[memoryview]:
- bit_length = random.choice(list(bls))
- byte_length = (bit_length + 7) // 8
+ if bls.max < 8 * 1024: # If the BLS appears small, perform numerical expansion and pick a random value.
+ bit_length = random.choice(list(bls))
+ byte_length = (bit_length + 7) // 8
+ else: # Otherwise, just use the smallest value because expansion is slow.
+ bit_length = bls.min
+ byte_length = (bit_length + 7) // 8
return _fragment_randomly(numpy.random.randint(0, 256, size=byte_length, dtype=numpy.uint8).data)
diff --git a/tests/dsdl/test_dsdl_namespace/numpy/CombinatorialExplosion.0.1.uavcan b/tests/dsdl/test_dsdl_namespace/numpy/CombinatorialExplosion.0.1.uavcan
new file mode 100644
index 0000000..6576dff
--- /dev/null
+++ b/tests/dsdl/test_dsdl_namespace/numpy/CombinatorialExplosion.0.1.uavcan
@@ -0,0 +1,8 @@
+# This data type is crafted to trigger the combinatorial explosion problem: https://github.com/UAVCAN/pydsdl/issues/23
+# The problem is now fixed so we introduce this type to shield us against regressions.
+# If DSDL compilation takes over a few minutes, you have a combinatorial problem somewhere in the compiler.
+
+uavcan.primitive.String.1.0[<=1024] foo
+uavcan.primitive.String.1.0[256] bar
+
+@extent 100 * (1024 ** 2) * 8 # One hundred megabytes should be about right.
diff --git a/tests/dsdl/test_dsdl_namespace/numpy/RGB888_3840x2748.0.1.uavcan b/tests/dsdl/test_dsdl_namespace/numpy/RGB888_3840x2748.0.1.uavcan
index 4c7baff..9f7d1af 100644
--- a/tests/dsdl/test_dsdl_namespace/numpy/RGB888_3840x2748.0.1.uavcan
+++ b/tests/dsdl/test_dsdl_namespace/numpy/RGB888_3840x2748.0.1.uavcan
@@ -11,5 +11,3 @@
uint8[PIXELS_PER_IMAGE * 3] pixels # Row major, top-left pixel first, color ordering RGB
@sealed
-@assert _offset_.count == 1 # Fixed size message
-@print _offset_.max / 8 / 1024 / 1024 # Print size in mebibytes