experimental_experiment.torch_interpreter.patches.patch_transformers

class experimental_experiment.torch_interpreter.patches.patch_transformers.patched_AttentionMaskConverter[source]

Patches transformers.modeling_attn_mask_utils.AttentionMaskConverter._make_causal_mask.

class experimental_experiment.torch_interpreter.patches.patch_transformers.patched_DynamicCache(num_hidden_layers: int | None = None)[source]

Removes the dependency on torch.nn.Module from transformers.cache_utils.DynamicCache.

batch_repeat_interleave(repeats: int)[source]
batch_select_indices(indices: Tensor)[source]
batch_split(full_batch_size: int, split_size: int, num_hidden_layers: int | None = None) List[transformers.cache_utils.DynamicCache][source]
crop(max_length: int)[source]
classmethod from_batch_splits(splits: List[transformers.cache_utils.DynamicCache], num_hidden_layers: int | None = None) transformers.cache_utils.DynamicCache[source]
classmethod from_legacy_cache(past_key_values: Tuple[Tuple[torch.Tensor]] | None = None, num_hidden_layers: int | None = None) transformers.cache_utils.DynamicCache[source]
get_max_cache_shape() int | None[source]
get_seq_length(layer_idx: int | None = 0) int[source]
get_usable_length(new_seq_length: int, layer_idx: int | None = 0) int[source]
to_legacy_cache() Tuple[Tuple[Tensor], Tuple[Tensor]][source]
update(key_states: Tensor, value_states: Tensor, layer_idx: int, cache_kwargs: Dict[str, Any] | None = None) Tuple[Tensor, Tensor][source]