Source code for onnx_diagnostic.torch_models.hghub.hub_data

import io
import functools
import textwrap
from typing import Dict, List

__date__ = "2025-03-26"

__data_arch__ = textwrap.dedent(
    """
    architecture,task
    ASTModel,feature-extraction
    AlbertModel,feature-extraction
    BeitForImageClassification,image-classification
    BigBirdModel,feature-extraction
    BlenderbotModel,feature-extraction
    BloomModel,feature-extraction
    CLIPModel,zero-shot-image-classification
    CLIPVisionModel,feature-extraction
    CamembertModel,feature-extraction
    CodeGenModel,feature-extraction
    ConvBertModel,feature-extraction
    ConvNextForImageClassification,image-classification
    ConvNextV2Model,image-feature-extraction
    CvtModel,feature-extraction
    DPTModel,image-feature-extraction
    Data2VecAudioModel,feature-extraction
    Data2VecTextModel,feature-extraction
    Data2VecVisionModel,image-feature-extraction
    DebertaModel,feature-extraction
    DebertaV2Model,feature-extraction
    DecisionTransformerModel,reinforcement-learning
    DeiTModel,image-feature-extraction
    DetrModel,image-feature-extraction
    Dinov2Model,image-feature-extraction
    DistilBertModel,feature-extraction
    DonutSwinModel,feature-extraction
    ElectraModel,feature-extraction
    EsmModel,feature-extraction
    GLPNModel,image-feature-extraction
    GPTBigCodeModel,feature-extraction
    GPTJModel,feature-extraction
    GPTNeoModel,feature-extraction
    GPTNeoXForCausalLM,text-generation
    GemmaForCausalLM,text-generation
    GraniteForCausalLM,text-generation
    GroupViTModel,feature-extraction
    HieraForImageClassification,image-classification
    HubertModel,feature-extraction
    IBertModel,feature-extraction
    IdeficsForVisionText2Text,image-text-to-text
    ImageGPTModel,image-feature-extraction
    LayoutLMModel,feature-extraction
    LayoutLMv3Model,feature-extraction
    LevitModel,image-feature-extraction
    LiltModel,feature-extraction
    LlamaForCausalLM,text-generation
    LongT5Model,feature-extraction
    LongformerModel,feature-extraction
    MCTCTModel,feature-extraction
    MPNetModel,feature-extraction
    MT5Model,feature-extraction
    MarianMTModel,text2text-generation
    MarkupLMModel,feature-extraction
    MaskFormerForInstanceSegmentation,image-segmentation
    MegatronBertModel,feature-extraction
    MgpstrForSceneTextRecognition,feature-extraction
    MistralForCausalLM,text-generation
    MobileBertModel,feature-extraction
    MobileNetV1Model,image-feature-extraction
    MobileNetV2Model,image-feature-extraction
    MobileViTForImageClassification,image-classification
    ModernBertForMaskedLM,fill-mask
    MoonshineForConditionalGeneration,automatic-speech-recognition
    MptForCausalLM,text-generation
    MusicgenForConditionalGeneration,text-to-audio
    NystromformerModel,feature-extraction
    OPTModel,feature-extraction
    Olmo2ForCausalLM,text-generation
    OlmoForCausalLM,text-generation
    OwlViTModel,feature-extraction
    Owlv2Model,feature-extraction
    PatchTSMixerForPrediction,no-pipeline-tag
    PatchTSTForPrediction,no-pipeline-tag
    PegasusModel,feature-extraction
    Phi3ForCausalLM,text-generation
    PhiForCausalLM,text-generation
    Pix2StructForConditionalGeneration,image-to-text
    PoolFormerModel,image-feature-extraction
    PvtForImageClassification,image-classification
    Qwen2ForCausalLM,text-generation
    RTDetrForObjectDetection,object-detection
    RegNetModel,image-feature-extraction
    RemBertModel,feature-extraction
    ResNetForImageClassification,image-classification
    RoFormerModel,feature-extraction
    RobertaModel,feature-extraction
    RtDetrV2ForObjectDetection,object-detection
    SEWDModel,feature-extraction
    SEWModel,feature-extraction
    SamModel,mask-generation
    SegformerModel,image-feature-extraction
    SiglipModel,zero-shot-image-classification
    SiglipVisionModel,image-feature-extraction
    Speech2TextModel,feature-extraction
    SpeechT5ForTextToSpeech,text-to-audio
    SplinterModel,feature-extraction
    SqueezeBertModel,feature-extraction
    Swin2SRModel,image-feature-extraction
    SwinModel,image-feature-extraction
    Swinv2Model,image-feature-extraction
    T5ForConditionalGeneration,text2text-generation
    TableTransformerModel,image-feature-extraction
    UniSpeechForSequenceClassification,audio-classification
    ViTForImageClassification,image-classification
    ViTMAEModel,image-feature-extraction
    ViTMSNForImageClassification,image-classification
    VisionEncoderDecoderModel,document-question-answering
    VitPoseForPoseEstimation,keypoint-detection
    VitsModel,text-to-audio
    Wav2Vec2ConformerForCTC,automatic-speech-recognition
    Wav2Vec2Model,feature-extraction
    WhisperForConditionalGeneration,no-pipeline-tag
    XLMModel,feature-extraction
    XLMRobertaForCausalLM,text-generation
    YolosForObjectDetection,object-detection
    YolosModel,image-feature-extraction"""
)

__data_tasks__ = [
    "automatic-speech-recognition",
    "image-text-to-text",
    "image-to-text",
    "text-generation",
    "object-detection",
    "document-question-answering",
    "feature-extraction",
    "text-to-audio",
    "zero-shot-image-classification",
    "image-segmentation",
    "reinforcement-learning",
    "no-pipeline-tag",
    "image-classification",
    "text2text-generation",
    "mask-generation",
    "keypoint-detection",
    "audio-classification",
    "image-feature-extraction",
    "fill-mask",
]

__models_testing__ = """
hf-internal-testing/tiny-random-BeitForImageClassification
hf-internal-testing/tiny-random-convnext
fxmarty/tiny-random-GemmaForCausalLM
hf-internal-testing/tiny-random-GPTNeoXForCausalLM
hf-internal-testing/tiny-random-GraniteForCausalLM
hf-internal-testing/tiny-random-HieraForImageClassification
fxmarty/tiny-llama-fast-tokenizer
sshleifer/tiny-marian-en-de
hf-internal-testing/tiny-random-MaskFormerForInstanceSegmentation
echarlaix/tiny-random-mistral
hf-internal-testing/tiny-random-mobilevit
hf-internal-testing/tiny-random-MoonshineForConditionalGeneration
hf-internal-testing/tiny-random-OlmoForCausalLM
hf-internal-testing/tiny-random-Olmo2ForCausalLM
echarlaix/tiny-random-PhiForCausalLM
Xenova/tiny-random-Phi3ForCausalLM
fxmarty/pix2struct-tiny-random
fxmarty/tiny-dummy-qwen2
hf-internal-testing/tiny-random-ViTMSNForImageClassification
hf-internal-testing/tiny-random-YolosModel
hf-internal-testing/tiny-xlm-roberta
HuggingFaceM4/tiny-random-idefics
"""


[docs] @functools.cache def load_models_testing() -> List[str]: """Returns model ids for testing.""" return [_.strip() for _ in __models_testing__.split("\n") if _.strip()]
[docs] @functools.cache def load_architecture_task() -> Dict[str, str]: """ Returns a dictionary mapping architecture to task. import pprint from onnx_diagnostic.torch_models.hghub.hub_data import load_architecture_task pprint.pprint(load_architecture_task()) """ import pandas df = pandas.read_csv(io.StringIO(__data_arch__)) return dict(zip(list(df["architecture"]), list(df["task"])))