Source code for experimental_experiment.torch_models.llm_model_helper

from typing import Any, Dict, Tuple, Optional, Union
import numpy as np
from . import assert_found
from .llm_model_setup import LLMInputKind, finalize_llm_setup, finalize_llm_vision_setup


######
# Bert
######


[docs] def get_all_mini_ml_l6_v1( inputs_as_tuple: bool = False, input_cache: bool = True, batch_size: int = 1, common_dynamic_shapes: bool = False, **kwargs, ) -> Dict[str, Any]: """ Gets a non initialized model. :param inputs_as_tuple: returns dummy inputs as a dictionary or not :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` :param batch_size: batch size :param common_dynamic_shapes: if True returns dynamic shapes as well :return: dicionary See `all-MiniLM-L6-v1 <https://huggingface.co/sentence-transformers/all-MiniLM-L6-v1/blob/main/config.json>`_. Model forward signature: :: def forward( self, input_ids: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, token_type_ids: Optional[torch.Tensor] = None, position_ids: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]: """ import transformers config = { "_name_or_path": "nreimers/MiniLM-L6-H384-uncased", "architectures": ["BertModel"], "attention_probs_dropout_prob": 0.1, "gradient_checkpointing": False, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 384, "initializer_range": 0.02, "intermediate_size": 1536, "layer_norm_eps": 1e-12, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_hidden_layers": 6, "pad_token_id": 0, "position_embedding_type": "absolute", "transformers_version": "4.8.2", "type_vocab_size": 2, "use_cache": True, "vocab_size": 30522, } assert_found(kwargs, config) config.update(**kwargs) conf = transformers.BertConfig(**config) model = transformers.BertModel(conf) model.eval() res = finalize_llm_setup( model, batch_size, max_token_id=30522, cache_last_dim=32, num_attention_heads=12, common_dynamic_shapes=common_dynamic_shapes, inputs_as_tuple=inputs_as_tuple, num_hidden_layers=config["num_hidden_layers"], input_cache=input_cache, ) if not input_cache: return res # We flatten the cache. for k in ["inputs", "inputs2"]: if k not in res: continue kv = res[k]["past_key_values"] kv = [ (kv.key_cache[i], kv.value_cache[i], kv.key_cache[i], kv.value_cache[i]) for i in range(len(kv.key_cache)) ] res[k]["past_key_values"] = kv sh = res["dynamic_shapes"]["past_key_values"] if sh: sh1 = sh[0][0] res["dynamic_shapes"]["past_key_values"] = [ tuple(sh1 for _ in range(4)) for s in range(config["num_hidden_layers"]) ] return res
############ # Phi Series ############
[docs] def get_phi2( inputs_as_tuple: bool = False, input_cache: bool = True, batch_size: int = 1, common_dynamic_shapes: bool = False, **kwargs, ) -> Dict[str, Any]: """ Gets a non initialized model. :param inputs_as_tuple: returns dummy inputs as a dictionary or not :param input_cache: generate data for this iteration with or without cache :param batch_size: batch size :param common_dynamic_shapes: if True returns dynamic shapes as well :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` :return: dict See `Phi-2/config.json <https://huggingface.co/microsoft/phi-2/blob/main/config.json>`_. """ import transformers config = { "_name_or_path": "microsoft/phi-2", "architectures": ["PhiForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 50256, "embd_pdrop": 0.0, "eos_token_id": 50256, "hidden_act": "gelu_new", "hidden_size": 2560, "initializer_range": 0.02, "intermediate_size": 10240, "layer_norm_eps": 1e-05, "max_position_embeddings": 2048, "model_type": "phi", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "partial_rotary_factor": 0.4, "qk_layernorm": False, "resid_pdrop": 0.1, "rope_scaling": None, "rope_theta": 10000.0, "tie_word_embeddings": False, "torch_dtype": "float16", "transformers_version": "4.37.0", "use_cache": True, "vocab_size": 51200, } assert_found(kwargs, config) config.update(**kwargs) conf = transformers.PhiConfig(**config) model = transformers.PhiForCausalLM(conf) model.eval() return finalize_llm_setup( model, batch_size, max_token_id=50285, cache_last_dim=80, common_dynamic_shapes=common_dynamic_shapes, inputs_as_tuple=inputs_as_tuple, num_hidden_layers=config["num_hidden_layers"], input_cache=input_cache, )
[docs] def get_phi35_mini_instruct( inputs_as_tuple: bool = False, input_cache: bool = True, batch_size: int = 1, common_dynamic_shapes: bool = False, **kwargs, ) -> Dict[str, Any]: """ Gets a non initialized model. :param inputs_as_tuple: returns dummy inputs as a dictionary or not :param batch_size: batch size :param input_cache: generate data for this iteration with or without cache :param common_dynamic_shapes: if True returns dynamic shapes as well :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` :return: dictionary See `Phi-3.5-mini-instruct/config.json <https://huggingface.co/microsoft/Phi-3.5-mini-instruct/blob/main/config.json>`_. """ import transformers config = { "_name_or_path": "Phi-3.5-mini-instruct", "architectures": ["Phi3ForCausalLM"], "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_phi3.Phi3Config", "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM", }, "bos_token_id": 1, "embd_pdrop": 0.0, "eos_token_id": 32000, "hidden_act": "silu", "hidden_size": 3072, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "model_type": "phi3", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "original_max_position_embeddings": 4096, "pad_token_id": 32000, "resid_pdrop": 0.0, "rms_norm_eps": 1e-05, "rope_scaling": { "long_factor": [ 1.0800000429153442, 1.1100000143051147, 1.1399999856948853, 1.340000033378601, 1.5899999141693115, 1.600000023841858, 1.6200000047683716, 2.620000123977661, 3.2300000190734863, 3.2300000190734863, 4.789999961853027, 7.400000095367432, 7.700000286102295, 9.09000015258789, 12.199999809265137, 17.670000076293945, 24.46000099182129, 28.57000160217285, 30.420001983642578, 30.840002059936523, 32.590003967285156, 32.93000411987305, 42.320003509521484, 44.96000289916992, 50.340003967285156, 50.45000457763672, 57.55000305175781, 57.93000411987305, 58.21000289916992, 60.1400032043457, 62.61000442504883, 62.62000274658203, 62.71000289916992, 63.1400032043457, 63.1400032043457, 63.77000427246094, 63.93000411987305, 63.96000289916992, 63.970001220703125, 64.02999877929688, 64.06999969482422, 64.08000183105469, 64.12000274658203, 64.41000366210938, 64.4800033569336, 64.51000213623047, 64.52999877929688, 64.83999633789062, ], "short_factor": [ 1.0, 1.0199999809265137, 1.0299999713897705, 1.0299999713897705, 1.0499999523162842, 1.0499999523162842, 1.0499999523162842, 1.0499999523162842, 1.0499999523162842, 1.0699999332427979, 1.0999999046325684, 1.1099998950958252, 1.1599998474121094, 1.1599998474121094, 1.1699998378753662, 1.2899998426437378, 1.339999794960022, 1.679999828338623, 1.7899998426437378, 1.8199998140335083, 1.8499997854232788, 1.8799997568130493, 1.9099997282028198, 1.9399996995925903, 1.9899996519088745, 2.0199997425079346, 2.0199997425079346, 2.0199997425079346, 2.0199997425079346, 2.0199997425079346, 2.0199997425079346, 2.0299997329711914, 2.0299997329711914, 2.0299997329711914, 2.0299997329711914, 2.0299997329711914, 2.0299997329711914, 2.0299997329711914, 2.0299997329711914, 2.0299997329711914, 2.0799996852874756, 2.0899996757507324, 2.189999580383301, 2.2199995517730713, 2.5899994373321533, 2.729999542236328, 2.749999523162842, 2.8399994373321533, ], "type": "longrope", }, "rope_theta": 10000.0, "sliding_window": 262144, "tie_word_embeddings": False, "torch_dtype": "bfloat16", "use_cache": True, "attention_bias": False, "vocab_size": 32064, } assert_found(kwargs, config) config.update(**kwargs) conf = transformers.Phi3Config(**config) model = transformers.Phi3ForCausalLM(conf) model.eval() return finalize_llm_setup( model, batch_size, max_token_id=32064, cache_last_dim=96, common_dynamic_shapes=common_dynamic_shapes, inputs_as_tuple=inputs_as_tuple, num_hidden_layers=config["num_hidden_layers"], input_cache=input_cache, )
[docs] def get_phi35_vision_instruct( inputs_as_tuple: bool = False, batch_size: int = 2, input_kind: LLMInputKind = LLMInputKind.input_ids, device: str = "cpu", common_dynamic_shapes: bool = False, **kwargs, ) -> Tuple[Any, Union[Tuple[Any, ...], Dict[str, Any]], Optional[Any]]: """ Gets a non initialized model. :param batch_size: batch size to use :param inputs_as_tuple: returns dummy inputs as a dictionary or not :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` :param common_dynamic_shapes: if True returns dynamic shapes as well :return: model, inputs, dynamic shapes """ from .fromhub.configuration_phi3_v import Phi3VConfig from .fromhub.modeling_phi3_v import Phi3VForCausalLM config = { "_name_or_path": "Phi-3.5-vision-instruct", "architectures": ["Phi3VForCausalLM"], "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_phi3_v.Phi3VConfig", "AutoModelForCausalLM": "modeling_phi3_v.Phi3VForCausalLM", }, "bos_token_id": 1, "embd_layer": { "embedding_cls": "image", "hd_transform_order": "sub_glb", "projection_cls": "mlp", "use_hd_transform": True, "with_learnable_separator": True, }, "embd_pdrop": 0.0, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 3072, "img_processor": { "image_dim_out": 1024, "model_name": "openai/clip-vit-large-patch14-336", "name": "clip_vision_model", "num_img_tokens": 144, }, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "model_type": "phi3_v", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "original_max_position_embeddings": 4096, "pad_token_id": 32000, "resid_pdrop": 0.0, "rms_norm_eps": 1e-05, "rope_scaling": { "long_factor": [ 1.0800000429153442, 1.1100000143051147, 1.1399999856948853, 1.340000033378601, 1.5899999141693115, 1.600000023841858, 1.6200000047683716, 2.620000123977661, 3.2300000190734863, 3.2300000190734863, 4.789999961853027, 7.400000095367432, 7.700000286102295, 9.09000015258789, 12.199999809265137, 17.670000076293945, 24.46000099182129, 28.57000160217285, 30.420001983642578, 30.840002059936523, 32.590003967285156, 32.93000411987305, 42.320003509521484, 44.96000289916992, 50.340003967285156, 50.45000457763672, 57.55000305175781, 57.93000411987305, 58.21000289916992, 60.1400032043457, 62.61000442504883, 62.62000274658203, 62.71000289916992, 63.1400032043457, 63.1400032043457, 63.77000427246094, 63.93000411987305, 63.96000289916992, 63.970001220703125, 64.02999877929688, 64.06999969482422, 64.08000183105469, 64.12000274658203, 64.41000366210938, 64.4800033569336, 64.51000213623047, 64.52999877929688, 64.83999633789062, ], "short_factor": [ 1.08, 1.1, 1.1300000000000001, 1.2800000000000002, 1.3100000000000003, 1.4500000000000004, 1.4500000000000004, 1.9500000000000008, 2.030000000000001, 2.4299999999999926, 2.5699999999999896, 2.9499999999999815, 3.729999999999965, 3.869999999999962, 4.189999999999955, 4.43999999999995, 4.6399999999999455, 4.979999999999938, 5.159999999999934, 5.279999999999932, 5.759999999999922, 5.889999999999919, 5.889999999999919, 5.969999999999917, 6.089999999999915, 6.2799999999999105, 6.7699999999999, 6.8899999999998975, 7.109999999999893, 7.129999999999892, 7.179999999999891, 7.289999999999889, 7.339999999999888, 7.559999999999883, 7.619999999999882, 7.69999999999988, 7.879999999999876, 7.879999999999876, 7.879999999999876, 7.939999999999875, 7.949999999999875, 7.979999999999874, 8.19999999999987, 8.439999999999864, 8.469999999999864, 8.589999999999861, 8.809999999999857, 8.999999999999853, ], "type": "su", }, "rope_theta": 10000.0, "sliding_window": 262144, "tie_word_embeddings": False, "torch_dtype": "bfloat16", "transformers_version": "4.38.1", "use_cache": True, "vocab_size": 32064, # "_attn_implementation": "flash_attention_2", "_attn_implementation": "eager", } assert_found(kwargs, config) config.update(**kwargs) conf = Phi3VConfig(**config) model = Phi3VForCausalLM(conf) model.eval().to(device) if ( input_kind == LLMInputKind.input_ids | LLMInputKind.attention_mask | LLMInputKind.past_key_values ): return finalize_llm_setup( model, batch_size=batch_size, max_token_id=32064, cache_last_dim=96, common_dynamic_shapes=common_dynamic_shapes, inputs_as_tuple=inputs_as_tuple, num_hidden_layers=config["num_hidden_layers"], input_cache=True, device=device, ) return finalize_llm_vision_setup( model, input_kind, batch_size=batch_size, max_token_id=32064, cache_last_dim=96, common_dynamic_shapes=common_dynamic_shapes, inputs_as_tuple=inputs_as_tuple, num_hidden_layers=config["num_hidden_layers"], device=device, )
[docs] def get_phi3_vision_128k_instruct( inputs_as_tuple: bool = False, batch_size: int = 2, input_kind: LLMInputKind = LLMInputKind.input_ids, device: str = "cpu", common_dynamic_shapes: bool = False, **kwargs, ) -> Tuple[Any, Union[Tuple[Any, ...], Dict[str, Any]], Optional[Any]]: """ Gets a non initialized model. :param batch_size: batch size to use :param inputs_as_tuple: returns dummy inputs as a dictionary or not :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` :param common_dynamic_shapes: if True returns dynamic shapes as well :return: model, inputs, dynamic shapes See `Phi-3-vision-128k-instruct/config.json <https://huggingface.co/microsoft/Phi-3-vision-128k-instruct/blob/main/config.json>`_. """ from .configuration_phi3_v import Phi3VConfig from .modeling_phi3_v import Phi3VForCausalLM config = { "_name_or_path": "Phi-3-vision-128k-instruct", "architectures": ["Phi3VForCausalLM"], "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_phi3_v.Phi3VConfig", "AutoModelForCausalLM": "modeling_phi3_v.Phi3VForCausalLM", }, "bos_token_id": 1, "embd_layer": { "embedding_cls": "image", "hd_transform_order": "sub_glb", "projection_cls": "mlp", "use_hd_transform": True, "with_learnable_separator": True, }, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 3072, "img_processor": { "image_dim_out": 1024, "model_name": "openai/clip-vit-large-patch14-336", "name": "clip_vision_model", "num_img_tokens": 144, }, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "model_type": "phi3_v", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "original_max_position_embeddings": 4096, "rms_norm_eps": 1e-05, "rope_scaling": { "long_factor": [ 1.0299999713897705, 1.0499999523162842, 1.0499999523162842, 1.0799999237060547, 1.2299998998641968, 1.2299998998641968, 1.2999999523162842, 1.4499999284744263, 1.5999999046325684, 1.6499998569488525, 1.8999998569488525, 2.859999895095825, 3.68999981880188, 5.419999599456787, 5.489999771118164, 5.489999771118164, 9.09000015258789, 11.579999923706055, 15.65999984741211, 15.769999504089355, 15.789999961853027, 18.360000610351562, 21.989999771118164, 23.079999923706055, 30.009998321533203, 32.35000228881836, 32.590003967285156, 35.56000518798828, 39.95000457763672, 53.840003967285156, 56.20000457763672, 57.95000457763672, 59.29000473022461, 59.77000427246094, 59.920005798339844, 61.190006256103516, 61.96000671386719, 62.50000762939453, 63.3700065612793, 63.48000717163086, 63.48000717163086, 63.66000747680664, 63.850006103515625, 64.08000946044922, 64.760009765625, 64.80001068115234, 64.81001281738281, 64.81001281738281, ], "short_factor": [ 1.05, 1.05, 1.05, 1.1, 1.1, 1.1, 1.2500000000000002, 1.2500000000000002, 1.4000000000000004, 1.4500000000000004, 1.5500000000000005, 1.8500000000000008, 1.9000000000000008, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.000000000000001, 2.1000000000000005, 2.1000000000000005, 2.2, 2.3499999999999996, 2.3499999999999996, 2.3499999999999996, 2.3499999999999996, 2.3999999999999995, 2.3999999999999995, 2.6499999999999986, 2.6999999999999984, 2.8999999999999977, 2.9499999999999975, 3.049999999999997, 3.049999999999997, 3.049999999999997, ], "type": "su", }, "rope_theta": 10000.0, "sliding_window": 131072, "tie_word_embeddings": False, "torch_dtype": "bfloat16", "transformers_version": "4.38.1", "use_cache": True, "vocab_size": 32064, "_attn_implementation": "flash_attention_2", } assert_found(kwargs, config) config.update(**kwargs) conf = Phi3VConfig(**config) model = Phi3VForCausalLM(conf) model.eval() if ( input_kind == LLMInputKind.input_ids | LLMInputKind.attention_mask | LLMInputKind.past_key_values ): return finalize_llm_setup( model, batch_size=batch_size, max_token_id=32064, cache_last_dim=96, common_dynamic_shapes=common_dynamic_shapes, inputs_as_tuple=inputs_as_tuple, num_hidden_layers=config["num_hidden_layers"], input_cache=True, device=device, ) return finalize_llm_vision_setup( model, input_kind, batch_size=batch_size, max_token_id=32064, cache_last_dim=96, common_dynamic_shapes=common_dynamic_shapes, inputs_as_tuple=inputs_as_tuple, num_hidden_layers=config["num_hidden_layers"], device=device, )
############## # Jamba, Mamba ##############
[docs] def get_ai21_jamba_15_mini( inputs_as_tuple: bool = False, input_cache: bool = True, batch_size: int = 1, common_dynamic_shapes: bool = False, **kwargs, ) -> Dict[str, Any]: """ Gets a non initialized model. :param inputs_as_tuple: returns dummy inputs as a dictionary or not :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` :param batch_size: batch size :param common_dynamic_shapes: if True returns dynamic shapes as well :return: dictionary See `ai21labs/AI21-Jamba-1.5-Mini/config.json <https://huggingface.co/ai21labs/AI21-Jamba-1.5-Mini/blob/main/config.json>`_. """ import transformers config = { "architectures": ["JambaForCausalLM"], "attention_dropout": 0.0, "attn_layer_offset": 4, "attn_layer_period": 8, "bos_token_id": 1, "eos_token_id": [2, 518], "expert_layer_offset": 1, "expert_layer_period": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "mamba_conv_bias": True, "mamba_d_conv": 4, "mamba_d_state": 16, "mamba_dt_rank": 256, "mamba_expand": 2, "mamba_proj_bias": False, "max_position_embeddings": 262144, "model_type": "jamba", "num_attention_heads": 32, "num_experts": 16, "num_experts_per_tok": 2, "num_hidden_layers": 32, "num_key_value_heads": 8, "num_logits_to_keep": 1, "output_router_logits": False, "pad_token_id": 0, "rms_norm_eps": 1e-06, "router_aux_loss_coef": 0.001, "sliding_window": None, "tie_word_embeddings": False, "torch_dtype": "bfloat16", "transformers_version": "4.40.2", "use_cache": True, "use_mamba_kernels": False, # maybe another test to add "vocab_size": 65536, } config.update( { "_from_model_config": True, "bos_token_id": 1, "eos_token_id": [2, 518], "pad_token_id": 0, "transformers_version": "4.40.2", } ) assert_found(kwargs, config) config.update(**kwargs) conf = transformers.JambaConfig(**config) model = transformers.JambaForCausalLM(conf) model.eval() return finalize_llm_setup( model, batch_size, max_token_id=63028, cache_last_dim=80, common_dynamic_shapes=common_dynamic_shapes, inputs_as_tuple=inputs_as_tuple, num_hidden_layers=config["num_hidden_layers"], input_cache=input_cache, )
[docs] def get_falcon_mamba_7b( batch_size: int = 2, input_cache: bool = True, inputs_as_tuple: bool = False, common_dynamic_shapes: bool = False, **kwargs, ) -> Tuple[Any, Union[Tuple[Any, ...], Dict[str, Any]]]: """ Gets a non initialized model. :param inputs_as_tuple: returns dummy inputs as a dictionary or not :param batch_size: batch size :param input_cache: generate data for this iteration with or without cache :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` :param common_dynamic_shapes: if True returns dynamic shapes as well :return: dictionary See `flacon-mamba-7b/config.json <https://huggingface.co/tiiuae/falcon-mamba-7b/blob/main/config.json>`_. """ import transformers config = { "_name_or_path": "./", "architectures": ["FalconMambaForCausalLM"], "bos_token_id": 0, "conv_kernel": 4, "eos_token_id": 11, "expand": 16, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.1, "intermediate_size": 8192, "layer_norm_epsilon": 1e-05, "model_type": "falcon_mamba", "num_hidden_layers": 64, "pad_token_id": 11, "rescale_prenorm_residual": False, "residual_in_fp32": True, "state_size": 16, "tie_word_embeddings": False, "time_step_floor": 0.0001, "time_step_init_scheme": "random", "time_step_max": 0.1, "time_step_min": 0.001, "time_step_rank": 256, "time_step_scale": 1.0, "torch_dtype": "bfloat16", "transformers_version": "4.43.0.dev0", "use_bias": False, "use_cache": True, "use_conv_bias": True, "vocab_size": 65024, } config.update( { "_from_model_config": True, "bos_token_id": 0, "eos_token_id": 11, "pad_token_id": 11, "transformers_version": "4.43.0.dev0", } ) assert_found(kwargs, config) config.update(**kwargs) conf = transformers.FalconMambaConfig(**config) model = transformers.FalconMambaForCausalLM(conf) model.eval() return finalize_llm_setup( model, batch_size, max_token_id=65024, cache_last_dim=64, common_dynamic_shapes=common_dynamic_shapes, inputs_as_tuple=inputs_as_tuple, num_hidden_layers=config["num_hidden_layers"], input_cache=input_cache, seq_length_multiple=8, input_cache_class=transformers.cache_utils.MambaCache, )
####### # Llama #######
[docs] def get_smollm_1_7b( batch_size: int = 2, input_cache: bool = True, inputs_as_tuple: bool = False, common_dynamic_shapes: bool = False, **kwargs, ) -> Tuple[Any, Union[Tuple[Any, ...], Dict[str, Any]]]: """ Gets a non initialized model. :param inputs_as_tuple: returns dummy inputs as a dictionary or not :param batch_size: batch size :param input_cache: generate data for this iteration with or without cache :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` :param common_dynamic_shapes: if True returns dynamic shapes as well :return: dictionary See `SmolLM-1.7B <https://huggingface.co/HuggingFaceTB/SmolLM-1.7B/blob/main/config.json>`_. """ import transformers config = { "_name_or_path": "/fsx/loubna/checkpoints/cosmo2_1T/500000", "architectures": ["LlamaForCausalLM"], "attention_bias": False, "attention_dropout": 0.0, "bos_token_id": 0, "eos_token_id": 0, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 2048, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": None, "rope_theta": 10000.0, "tie_word_embeddings": True, "torch_dtype": "float32", "transformers_version": "4.39.3", "use_cache": True, "vocab_size": 49152, "_attn_implementation": "eager", } config.update( { "_from_model_config": True, "bos_token_id": 0, "eos_token_id": 0, "transformers_version": "4.39.3", } ) assert_found(kwargs, config) config.update(**kwargs) conf = transformers.LlamaConfig(**config) model = transformers.LlamaForCausalLM(conf) model.eval() return finalize_llm_setup( model, batch_size, max_token_id=49152, cache_last_dim=64, common_dynamic_shapes=common_dynamic_shapes, inputs_as_tuple=inputs_as_tuple, num_hidden_layers=config["num_hidden_layers"], input_cache=input_cache, seq_length_multiple=8, )
[docs] def get_llama32_9b_vision( inputs_as_tuple: bool = False, common_dynamic_shapes: bool = False, **kwargs ) -> Tuple[Any, Union[Tuple[Any, ...], Dict[str, Any]]]: """ Gets a non initialized model. :param inputs_as_tuple: returns dummy inputs as a dictionary or not :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` :param common_dynamic_shapes: if True returns dynamic shapes as well :return: model, inputs See `MLlama <https://huggingface.co/docs/transformers/main/en/model_doc/mllama>`_. """ import torch import transformers assert not common_dynamic_shapes, "dynamic shapes are not implemented" config = {} config.update(**kwargs) vision_config = transformers.MllamaVisionConfig(**config) text_config = transformers.MllamaTextConfig(**config) configuration = transformers.MllamaConfig(vision_config, text_config) model = transformers.MllamaForConditionalGeneration(configuration) model.eval() dim = (1, 30) inputs = dict( input_ids=torch.randint(0, 49152, dim).to(torch.int64), pixel_values=torch.rand((1, 1, 1, 3, 512, 1080)).to(torch.float16), aspect_ratio_mask=None, aspect_ratio_ids=torch.from_numpy(np.array([[2]], dtype=np.int32)), attention_mask=torch.ones(*dim, dtype=torch.int64), ) if inputs_as_tuple: inputs = tuple(inputs.values()) return model, inputs