Skip to content

Commit

Permalink
move tagger into variant
Browse files Browse the repository at this point in the history
fix documentation
implement feedback: better documentation & move well-formedness checks to the right places
  • Loading branch information
essickmango committed Nov 11, 2023
1 parent 7be7a78 commit 06f70f2
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 68 deletions.
64 changes: 30 additions & 34 deletions spec/lang/representation.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,55 +230,51 @@ impl Type {
### Enums

Enum encoding and decoding.
Should we let the type decoding throw UB?
Note that the discriminant setting and reading does not modify any bytes.
This is does not allow types like `Result<bool, bool>` to be stored in one byte.
However this is fine as Rust currently does not do any optimizations like that
and doing it here will introduce more questions.

```rust
fn compute_discriminant<M: Memory>(bytes: List<AbstractByte<M::Provenance>>, discriminator: Discriminator) -> Option<Int> {
let mut disc = discriminator;
loop {
match disc {
Discriminator::Known(val) => break Some(val),
Discriminator::Unknown { offset, children } => {
if offset < 0 || offset >= bytes.len() { throw!(); }
let AbstractByte::Init(val, _) = bytes.get(offset).unwrap()
else { break None };
// else { throw_ub!("Encountered uninitialized byte while computing enum discriminant.") };
let Some(new_disc) = children.get(val)
else { break None };
// else { throw_ub!("Encountered invalid discriminant value.") };
disc = new_disc;
}
// FIXME: we have multiple quite different fail sources,
// it would be nice to return more error information.
match discriminator {
Discriminator::Known(val) => Some(val),
Discriminator::Invalid => None,
Discriminator::Unknown { offset, children, fallback } => {
let AbstractByte::Init(val, _) = bytes[offset]
else { return None };
let next_discriminator = children.get(val).unwrap_or(fallback);
compute_discriminant::<M>(bytes, next_discriminator)
}
}
}

impl Type {
fn decode<M: Memory>(Type::Enum { variants, tag_encoding, size, .. }: Self, bytes: List<AbstractByte<M::Provenance>>) -> Option<Value<M>> {
fn decode<M: Memory>(Type::Enum { variants, discriminator, size, .. }: Self, bytes: List<AbstractByte<M::Provenance>>) -> Option<Value<M>> {
if bytes.len() != size.bytes() { throw!(); }
let disc = compute_discriminant::<M>(bytes, tag_encoding.discriminator)?;
let disc = compute_discriminant::<M>(bytes, discriminator)?;

// Decode into the variant.
// Because the variant is the same size as the enum we don't need to pass a subslice.
let Some(value) = variants[disc].ty.decode(bytes)
else { return None };

// The discriminator does the discriminant check and as such should not be able to return an invalid value.
if disc < Int::ZERO || disc >= variants.len() { throw!(); }
variants.get(disc).unwrap().decode(bytes)
Some(Value::Variant { idx: disc, data: value })
}

fn encode<M: Memory>(Type::Enum { variants, tag_encoding, size, .. }: Self, val: Value<M>) -> List<AbstractByte<M::Provenance>> {
fn encode<M: Memory>(Type::Enum { variants, .. }: Self, val: Value<M>) -> List<AbstractByte<M::Provenance>> {
let Value::Variant { idx, data } = val else { panic!() };
assert_eq!(variants.len(), tag_encoding.tagger.len());

let mut bytes = list![AbstractByte::Uninit; size.bytes()];

// idx is to be guaranteed in bounds by the well-formed check in the typed store.
let variant = variants.get(idx).unwrap();
let encoded_data = variant.encode(data.extract());
assert!(encoded_data.len() <= size.bytes());
bytes.write_subslice_at_index(Int::ZERO, encoded_data);

let tagger = tag_encoding.tagger.get(idx).unwrap();
// idx is to be guaranteed in bounds by the well-formed check in the type.
let Variant { ty: variant, tagger } = variants[idx];
let mut bytes = variant.encode(data.extract());

// TODO: is there a nicer way instead of all these temporary lists?
// write tag afterwards into the encoded bytes. This is fine as we don't allow
// encoded data and the tag to overlap at the moment.
for (offset, value) in tagger.iter() {
bytes.set(offset, value);
bytes.set(offset, AbstractByte::Init(value, None));
}
bytes
}
Expand Down Expand Up @@ -466,7 +462,7 @@ impl<M: Memory> AtomicMemory<M> {
(Value::Tuple(vals), Type::Array { elem: ty, .. }) =>
Value::Tuple(vals.try_map(|val| self.retag_val(val, ty, fn_entry))?),
(Value::Variant { idx, data }, Type::Enum { variants, .. }) =>
Value::Variant { idx, data: self.retag_val(data, variants[idx], fn_entry)? },
Value::Variant { idx, data: self.retag_val(data, variants[idx].ty, fn_entry)? },
_ =>
panic!("this value does not have that type"),
})
Expand Down
11 changes: 6 additions & 5 deletions spec/lang/step/terminators.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,14 @@ fn check_abi_compatibility(
caller_chunks == callee_chunks &&
caller_size == callee_size &&
caller_align == callee_align,
(Type::Enum { variants: caller_variants, tag_encoding: caller_encoding, size: caller_size, align: caller_align },
Type::Enum { variants: callee_variants, tag_encoding: callee_encoding, size: callee_size, align: callee_align }) =>
(Type::Enum { variants: caller_variants, discriminator: caller_discriminator, size: caller_size, align: caller_align },
Type::Enum { variants: callee_variants, discriminator: callee_discriminator, size: callee_size, align: callee_align }) =>
caller_variants.len() == callee_variants.len() &&
caller_variants.zip(callee_variants).all(|(caller_field, callee_field)|
check_abi_compatibility(caller_field, callee_field)
caller_variants.zip(callee_variants).all(|(caller_variant, callee_variant)|
check_abi_compatibility(caller_variant.ty, callee_variant.ty) &&
caller_variant.tagger == callee_variant.tagger
) &&
caller_encoding == callee_encoding &&
caller_discriminator == callee_discriminator &&
caller_size == callee_size &&
caller_align == callee_align,
// Different kind of type, definitely incompatible.
Expand Down
47 changes: 30 additions & 17 deletions spec/lang/types.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,15 @@ pub enum Type {
align: Align,
},
Enum {
/// Each variant is given by a type. All types are thought to "start at offset 0";
/// if the discriminant is encoded as an explicit tag, then that will be put
/// into the padding of the active variant. (This means it is *not* safe to hand
/// out mutable references to a variant at that type, as then the tag might be
/// overwritten!)
/// Each variant is given by a type and its tag description. All variants are thought
/// to "start at offset 0"; if the discriminant is encoded as an explicit tag,
/// then that will be put into the padding of the active variant. (This means it
/// is *not* safe to hand out mutable references to a variant at that type, as
/// then the tag might be overwritten!)
/// The Rust type `!` is encoded as an `Enum` with an empty list of variants.
variants: List<Type>,
/// This contains all the tricky details of how to encode the active variant
/// at runtime.
tag_encoding: TagEncoding,
variants: List<Variant>,
/// This contains the decision tree to decode the variant at runtime.
discriminator: Discriminator,
/// The total size of the enum can indicate trailing padding.
/// Must be large enough to contain all variants.
size: Size,
Expand All @@ -81,18 +80,32 @@ pub struct IntType {

pub type Fields = List<(Offset, Type)>;

/// We leave the details of enum tags to the future.
/// (We might want to extend the "variants" field of `Enum` to also have a
/// discriminant for each variant. We will see.)
pub struct TagEncoding {
discriminator: Discriminator,
tagger: List<Map<Int, u8>>, // TODO: move to Variant struct
pub struct Variant {
/// The actual type of the variant.
pub ty: Type,
/// The information on where to store which bytes to write the tag.
/// MUST NOT touch any bytes written by the actual type of the variant and vice
/// versa. This is because we allow references/pointers to (enum) fields which
/// should be able to dereference without having to deal with the tag.
/// FIXME(essickmango): Int should be `Offset`
pub tagger: Map<Int, u8>,
}

/// The decision tree that computes the discriminant out of the tag for a specific
/// enum type.
pub enum Discriminator {
/// We know the discriminant.
Known(Int),
Unknown { // Branch
/// Tag decoding failed, there is no valid discriminant.
Invalid,
/// We don't know the discriminant, so we branch on the value of a specific byte.
/// The fallback is for readability, as we often are only interested in a couple
/// of values.
/// FIXME(essickmango): change offset to `Offset`
Unknown {
offset: Int,
#[specr::indirection]
fallback: Discriminator,
children: Map<u8, Discriminator>,
},
}
Expand Down Expand Up @@ -143,7 +156,7 @@ impl Type {
Tuple { fields, .. } => fields.all(|(_offset, ty)| ty.inhabited()),
Array { elem, count } => count == 0 || elem.inhabited(),
Union { .. } => true,
Enum { variants, .. } => variants.any(|ty| ty.inhabited()),
Enum { variants, .. } => variants.any(|variant| variant.ty.inhabited()),
}
}

Expand Down
39 changes: 28 additions & 11 deletions spec/lang/well-formed.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,22 +108,40 @@ impl Type {
// And they must all fit into the size.
ensure(size >= last_end)?;
}
Enum { variants, size, tag_encoding, align: _ } => {
// All the variants need to be well-formed and fit in the enum.
Enum { variants, size, discriminator, align: _ } => {
// All the variants need to be well-formed and be the size of the enum so
// we don't have to handle different sizes in the memory representation.
for variant in variants {
variant.check_wf::<T>()?;
ensure(size >= variant.size::<T>())?;
variant.ty.check_wf::<T>()?;
ensure(size == variant.ty.size::<T>())?;
}
// And we need a tagger for each variant (even if they are empty).
ensure(variants.len() == tag_encoding.tagger.len())?;
// TODO: should we ensure that the discriminator can reach a) all idx and b) only valid idx?
// not all idx need to be reachable, but tagger & discriminator need to be in bounds

// check that all variants reached by the discriminator are valid and
// that it never accesses out-of-bounds area.
discriminator.check_wf::<T>(size, variants.len())?;
}
}

ret(())
}
}

impl Discriminator {
fn check_wf<T: Target>(self, size: Size, n_variants: Int) -> Option<()> {
match self {
Discriminator::Known(variant) => ensure(variant >= Int::ZERO && variant < n_variants),
Discriminator::Invalid => ret(()),
Discriminator::Unknown { offset, fallback, children } => {
ensure(offset < size.bytes())?;
fallback.check_wf::<T>(size, n_variants)?;
for discriminator in children.values() {
discriminator.check_wf::<T>(size, n_variants)?;
}
ret(())
}
}
}
}
```

## Well-formed expressions
Expand Down Expand Up @@ -205,7 +223,7 @@ impl ValueExpr {
Variant { idx, data, enum_ty } => {
let Type::Enum { variants, .. } = enum_ty else { throw!() };
enum_ty.check_wf::<T>()?;
let ty = variants.get(idx)?;
let ty = variants.get(idx)?.ty;

let checked = data.check_wf::<T>(locals, prog)?;
ensure(checked == ty);
Expand Down Expand Up @@ -569,8 +587,7 @@ impl<M: Memory> Value<M> {
}
}
(Value::Variant { idx, data }, Type::Enum { variants, .. }) => {
// TODO: check if enum is uninhabited
let variant = variants.get(idx)?;
let variant = variants.get(idx)?.ty;
data.check_wf(variant)?;
}
_ => throw!()
Expand Down
2 changes: 1 addition & 1 deletion tooling/miniutil/src/fmt/ty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ fn fmt_comptype(i: CompTypeIndex, t: CompType, comptypes: &mut Vec<CompType>) ->
},
Type::Enum { variants, .. } => {
variants.iter().enumerate().for_each(|(idx, v)| {
let typ = fmt_type(v, comptypes).to_string();
let typ = fmt_type(v.ty, comptypes).to_string();
s += &format!(" Variant {idx}: {typ}");
});
},
Expand Down

0 comments on commit 06f70f2

Please sign in to comment.