diff --git a/README.md b/README.md
index e016166..91ca7bd 100644
--- a/README.md
+++ b/README.md
@@ -218,12 +218,9 @@ A `LazyBufferCache` is a `Dict`-like type for the caches which automatically def
 new cache arrays on demand when they are required. The function `f` maps
 `size_of_cache = f(size(u))`, which by default creates cache arrays of the same size.
 
-Note that `LazyBufferCache` does cause a dynamic dispatch, though it is type-stable.
-This gives it a ~100ns overhead, and thus on very small problems it can reduce
-performance, but for any sufficiently sized calculation (e.g. >20 ODEs) this
-may not be even measurable. The upside of `LazyBufferCache` is that the user does
-not have to worry about potential issues with chunk sizes and such: `LazyBufferCache`
-is much easier!
+Note that `LazyBufferCache` is type-stable and contains no dynamic dispatch. This gives
+it a ~15ns overhead. The upside of `LazyBufferCache` is that the user does not have to
+worry about potential issues with chunk sizes and such: `LazyBufferCache` is much easier!
 
 ### Example
 
@@ -250,7 +247,7 @@ new caches on demand when they are required. The function `f` generates the cach
 for the type of `u`, and subsequent indexing reuses that cache if that type of `u` has
 already ben seen.
 
-Note that `LazyBufferCache` does cause a dynamic dispatch and its return is not type-inferred.
+Note that `GeneralLazyBufferCache`'s return is not type-inferred.
 This means it's the slowest of the preallocation methods, but it's the most general.
 
 ### Example
diff --git a/docs/src/index.md b/docs/src/index.md
index 7ff3b1d..d6b2ea5 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -208,12 +208,9 @@ A `LazyBufferCache` is a `Dict`-like type for the caches, which automatically de
 new cache arrays on demand when they are required. The function `f` maps
 `size_of_cache = f(size(u))`, which by default creates cache arrays of the same size.
 
-Note that `LazyBufferCache` does cause a dynamic dispatch, though it is type-stable.
-This gives it a ~100ns overhead, and thus on very small problems it can reduce
-performance, but for any sufficiently sized calculation (e.g. >20 ODEs) this
-may not be even measurable. The upside of `LazyBufferCache` is that the user does
-not have to worry about potential issues with chunk sizes and such: `LazyBufferCache`
-is much easier!
+Note that `LazyBufferCache` is type-stable and contains no dynamic dispatch. This gives
+it a ~15ns overhead. The upside of `LazyBufferCache` is that the user does not have to
+worry about potential issues with chunk sizes and such: `LazyBufferCache` is much easier!
 
 ### Example
 
@@ -245,7 +242,7 @@ new caches on demand when they are required. The function `f` generates the cach
 for the type of `u`, and subsequent indexing reuses that cache if that type of `u` has
 already been seen.
 
-Note that `LazyBufferCache` does cause a dynamic dispatch and its return is not type-inferred.
+Note that `GeneralLazyBufferCache`'s return is not type-inferred.
 This means it's the slowest of the preallocation methods, but it's the most general.
 
 ### Example
@@ -319,7 +316,7 @@ tries to do this with a bump allocator.
 
   - See the [SciML Style Guide](https://github.com/SciML/SciMLStyle) for common coding practices and other style decisions.
   - There are a few community forums:
-    
+
       + The #diffeq-bridged and #sciml-bridged channels in the
         [Julia Slack](https://julialang.org/slack/)
       + The #diffeq-bridged and #sciml-bridged channels in the
diff --git a/src/PreallocationTools.jl b/src/PreallocationTools.jl
index d0f4425..989d554 100644
--- a/src/PreallocationTools.jl
+++ b/src/PreallocationTools.jl
@@ -204,7 +204,7 @@ same type and size `f(size(u))` (defaulting to the same size), which is allocate
 needed and then cached within `b` for subsequent usage.
 """
 struct LazyBufferCache{F <: Function}
-    bufs::Dict # a dictionary mapping types to buffers
+    bufs::Dict{Any, Any} # a dictionary mapping (type, size) pairs to buffers
     sizemap::F
     LazyBufferCache(f::F = identity) where {F <: Function} = new{F}(Dict(), f) # start with empty dict
 end
@@ -212,10 +212,9 @@ end
 # override the [] method
 function Base.getindex(b::LazyBufferCache, u::T) where {T <: AbstractArray}
     s = b.sizemap(size(u)) # required buffer size
-    buf = get!(b.bufs, (T, s)) do
+    get!(b.bufs, (T, s)) do
         similar(u, s) # buffer to allocate if it was not found in b.bufs
     end::T  # declare type since b.bufs dictionary is untyped
-    return buf
 end
 
 # GeneralLazyBufferCache
@@ -235,7 +234,7 @@ correct using things like function barriers, then this is a general technique th
 is sufficiently fast.
 """
 struct GeneralLazyBufferCache{F <: Function}
-    bufs::Dict # a dictionary mapping types to buffers
+    bufs::Dict{Any, Any} # a dictionary mapping types to buffers
     f::F
     GeneralLazyBufferCache(f::F = identity) where {F <: Function} = new{F}(Dict(), f) # start with empty dict
 end