You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1029 lines
68 KiB

# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import json
import os
import warnings
from pathlib import Path
from typing import TYPE_CHECKING, Any, Optional, Union
from huggingface_hub import is_offline_mode, model_info
from ..configuration_utils import PreTrainedConfig
from ..dynamic_module_utils import get_class_from_dynamic_module
from ..feature_extraction_utils import FeatureExtractionMixin, PreTrainedFeatureExtractor
from ..image_processing_utils import BaseImageProcessor
from ..models.auto.configuration_auto import AutoConfig
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor
from ..models.auto.modeling_auto import AutoModelForDepthEstimation, AutoModelForImageToImage
from ..models.auto.processing_auto import PROCESSOR_MAPPING, AutoProcessor
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
from ..processing_utils import ProcessorMixin
from ..tokenization_python import PreTrainedTokenizer
from ..utils import (
CONFIG_NAME,
cached_file,
extract_commit_hash,
find_adapter_config_file,
is_kenlm_available,
is_peft_available,
is_pyctcdecode_available,
is_torch_available,
logging,
)
from .any_to_any import AnyToAnyPipeline
from .audio_classification import AudioClassificationPipeline
from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline
from .base import (
ArgumentHandler,
CsvPipelineDataFormat,
JsonPipelineDataFormat,
PipedPipelineDataFormat,
Pipeline,
PipelineDataFormat,
PipelineException,
PipelineRegistry,
get_default_model_and_revision,
load_model,
)
from .depth_estimation import DepthEstimationPipeline
from .document_question_answering import DocumentQuestionAnsweringPipeline
from .feature_extraction import FeatureExtractionPipeline
from .fill_mask import FillMaskPipeline
from .image_classification import ImageClassificationPipeline
from .image_feature_extraction import ImageFeatureExtractionPipeline
from .image_segmentation import ImageSegmentationPipeline
from .image_text_to_text import ImageTextToTextPipeline
from .image_to_image import ImageToImagePipeline
from .keypoint_matching import KeypointMatchingPipeline
from .mask_generation import MaskGenerationPipeline
from .object_detection import ObjectDetectionPipeline
from .question_answering import QuestionAnsweringArgumentHandler, QuestionAnsweringPipeline
from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline
from .text_classification import TextClassificationPipeline
from .text_generation import TextGenerationPipeline
from .text_to_audio import TextToAudioPipeline
from .token_classification import (
AggregationStrategy,
NerPipeline,
TokenClassificationArgumentHandler,
TokenClassificationPipeline,
)
from .video_classification import VideoClassificationPipeline
from .visual_question_answering import VisualQuestionAnsweringPipeline
from .zero_shot_audio_classification import ZeroShotAudioClassificationPipeline
from .zero_shot_classification import ZeroShotClassificationArgumentHandler, ZeroShotClassificationPipeline
from .zero_shot_image_classification import ZeroShotImageClassificationPipeline
from .zero_shot_object_detection import ZeroShotObjectDetectionPipeline
if is_torch_available():
import torch
from ..models.auto.modeling_auto import (
AutoModel,
AutoModelForAudioClassification,
AutoModelForCausalLM,
AutoModelForCTC,
AutoModelForDocumentQuestionAnswering,
AutoModelForImageClassification,
AutoModelForImageSegmentation,
AutoModelForImageTextToText,
AutoModelForKeypointMatching,
AutoModelForMaskedLM,
AutoModelForMaskGeneration,
AutoModelForMultimodalLM,
AutoModelForObjectDetection,
AutoModelForQuestionAnswering,
AutoModelForSemanticSegmentation,
AutoModelForSeq2SeqLM,
AutoModelForSequenceClassification,
AutoModelForSpeechSeq2Seq,
AutoModelForTableQuestionAnswering,
AutoModelForTextToSpectrogram,
AutoModelForTextToWaveform,
AutoModelForTokenClassification,
AutoModelForVideoClassification,
AutoModelForVisualQuestionAnswering,
AutoModelForZeroShotImageClassification,
AutoModelForZeroShotObjectDetection,
)
if TYPE_CHECKING:
from ..modeling_utils import PreTrainedModel
from ..tokenization_utils_tokenizers import PreTrainedTokenizerFast
logger = logging.get_logger(__name__)
# Register all the supported tasks here
TASK_ALIASES = {
"sentiment-analysis": "text-classification",
"ner": "token-classification",
"vqa": "visual-question-answering",
"text-to-speech": "text-to-audio",
}
SUPPORTED_TASKS = {
"audio-classification": {
"impl": AudioClassificationPipeline,
"pt": (AutoModelForAudioClassification,) if is_torch_available() else (),
"default": {"model": ("superb/wav2vec2-base-superb-ks", "372e048")},
"type": "audio",
},
"automatic-speech-recognition": {
"impl": AutomaticSpeechRecognitionPipeline,
"pt": (AutoModelForCTC, AutoModelForSpeechSeq2Seq) if is_torch_available() else (),
"default": {"model": ("facebook/wav2vec2-base-960h", "22aad52")},
"type": "multimodal",
},
"text-to-audio": {
"impl": TextToAudioPipeline,
"pt": (AutoModelForTextToWaveform, AutoModelForTextToSpectrogram) if is_torch_available() else (),
"default": {"model": ("suno/bark-small", "1dbd7a1")},
"type": "text",
},
"feature-extraction": {
"impl": FeatureExtractionPipeline,
"pt": (AutoModel,) if is_torch_available() else (),
"default": {"model": ("distilbert/distilbert-base-cased", "6ea8117")},
"type": "multimodal",
},
"text-classification": {
"impl": TextClassificationPipeline,
"pt": (AutoModelForSequenceClassification,) if is_torch_available() else (),
"default": {"model": ("distilbert/distilbert-base-uncased-finetuned-sst-2-english", "714eb0f")},
"type": "text",
},
"token-classification": {
"impl": TokenClassificationPipeline,
"pt": (AutoModelForTokenClassification,) if is_torch_available() else (),
"default": {"model": ("dbmdz/bert-large-cased-finetuned-conll03-english", "4c53496")},
"type": "text",
},
"question-answering": {
"impl": QuestionAnsweringPipeline,
"pt": (AutoModelForQuestionAnswering,) if is_torch_available() else (),
"default": {"model": ("distilbert/distilbert-base-cased-distilled-squad", "564e9b5")},
"type": "text",
},
"table-question-answering": {
"impl": TableQuestionAnsweringPipeline,
"pt": (AutoModelForTableQuestionAnswering,) if is_torch_available() else (),
"default": {"model": ("google/tapas-base-finetuned-wtq", "e3dde19")},
"type": "text",
},
"visual-question-answering": {
"impl": VisualQuestionAnsweringPipeline,
"pt": (AutoModelForVisualQuestionAnswering,) if is_torch_available() else (),
"default": {"model": ("dandelin/vilt-b32-finetuned-vqa", "d0a1f6a")},
"type": "multimodal",
},
"document-question-answering": {
"impl": DocumentQuestionAnsweringPipeline,
"pt": (AutoModelForDocumentQuestionAnswering,) if is_torch_available() else (),
"default": {"model": ("impira/layoutlm-document-qa", "beed3c4")},
"type": "multimodal",
},
"fill-mask": {
"impl": FillMaskPipeline,
"pt": (AutoModelForMaskedLM,) if is_torch_available() else (),
"default": {"model": ("distilbert/distilroberta-base", "fb53ab8")},
"type": "text",
},
"text-generation": {
"impl": TextGenerationPipeline,
"pt": (AutoModelForCausalLM,) if is_torch_available() else (),
"default": {"model": ("openai-community/gpt2", "607a30d")},
"type": "text",
},
"zero-shot-classification": {
"impl": ZeroShotClassificationPipeline,
"pt": (AutoModelForSequenceClassification,) if is_torch_available() else (),
"default": {
"model": ("facebook/bart-large-mnli", "d7645e1"),
"config": ("facebook/bart-large-mnli", "d7645e1"),
},
"type": "text",
},
"zero-shot-image-classification": {
"impl": ZeroShotImageClassificationPipeline,
"pt": (AutoModelForZeroShotImageClassification,) if is_torch_available() else (),
"default": {"model": ("openai/clip-vit-base-patch32", "3d74acf")},
"type": "multimodal",
},
"zero-shot-audio-classification": {
"impl": ZeroShotAudioClassificationPipeline,
"pt": (AutoModel,) if is_torch_available() else (),
"default": {"model": ("laion/clap-htsat-fused", "cca9e28")},
"type": "multimodal",
},
"image-classification": {
"impl": ImageClassificationPipeline,
"pt": (AutoModelForImageClassification,) if is_torch_available() else (),
"default": {"model": ("google/vit-base-patch16-224", "3f49326")},
"type": "image",
},
"image-feature-extraction": {
"impl": ImageFeatureExtractionPipeline,
"pt": (AutoModel,) if is_torch_available() else (),
"default": {"model": ("google/vit-base-patch16-224", "3f49326")},
"type": "image",
},
"image-segmentation": {
"impl": ImageSegmentationPipeline,
"pt": (AutoModelForImageSegmentation, AutoModelForSemanticSegmentation) if is_torch_available() else (),
"default": {"model": ("facebook/detr-resnet-50-panoptic", "d53b52a")},
"type": "multimodal",
},
"image-text-to-text": {
"impl": ImageTextToTextPipeline,
"pt": (AutoModelForImageTextToText,) if is_torch_available() else (),
"default": {"model": ("llava-hf/llava-onevision-qwen2-0.5b-ov-hf", "2c9ba3b")},
"type": "multimodal",
},
"object-detection": {
"impl": ObjectDetectionPipeline,
"pt": (AutoModelForObjectDetection,) if is_torch_available() else (),
"default": {"model": ("facebook/detr-resnet-50", "1d5f47b")},
"type": "multimodal",
},
"zero-shot-object-detection": {
"impl": ZeroShotObjectDetectionPipeline,
"pt": (AutoModelForZeroShotObjectDetection,) if is_torch_available() else (),
"default": {"model": ("google/owlvit-base-patch32", "cbc355f")},
"type": "multimodal",
},
"depth-estimation": {
"impl": DepthEstimationPipeline,
"pt": (AutoModelForDepthEstimation,) if is_torch_available() else (),
"default": {"model": ("Intel/dpt-large", "bc15f29")},
"type": "image",
},
"video-classification": {
"impl": VideoClassificationPipeline,
"pt": (AutoModelForVideoClassification,) if is_torch_available() else (),
"default": {"model": ("MCG-NJU/videomae-base-finetuned-kinetics", "488eb9a")},
"type": "video",
},
"mask-generation": {
"impl": MaskGenerationPipeline,
"pt": (AutoModelForMaskGeneration,) if is_torch_available() else (),
"default": {"model": ("facebook/sam-vit-huge", "87aecf0")},
"type": "multimodal",
},
"image-to-image": {
"impl": ImageToImagePipeline,
"pt": (AutoModelForImageToImage,) if is_torch_available() else (),
"default": {"model": ("caidas/swin2SR-classical-sr-x2-64", "cee1c92")},
"type": "image",
},
"keypoint-matching": {
"impl": KeypointMatchingPipeline,
"pt": (AutoModelForKeypointMatching,) if is_torch_available() else (),
"default": {"model": ("magic-leap-community/superglue_outdoor", "f4041f8")},
"type": "image",
},
"any-to-any": {
"impl": AnyToAnyPipeline,
"tf": (),
"pt": (AutoModelForMultimodalLM,) if is_torch_available() else (),
"default": {
"model": {
"pt": ("google/gemma-3n-E4B-it", "c1221e9"),
}
},
"type": "multimodal",
},
}
PIPELINE_REGISTRY = PipelineRegistry(supported_tasks=SUPPORTED_TASKS, task_aliases=TASK_ALIASES)
def get_supported_tasks() -> list[str]:
"""
Returns a list of supported task strings.
"""
return PIPELINE_REGISTRY.get_supported_tasks()
def get_task(model: str, token: str | None = None, **deprecated_kwargs) -> str:
if is_offline_mode():
raise RuntimeError("You cannot infer task automatically within `pipeline` when using offline mode")
try:
info = model_info(model, token=token)
except Exception as e:
raise RuntimeError(f"Instantiating a pipeline without a task set raised an error: {e}")
if not info.pipeline_tag:
raise RuntimeError(
f"The model {model} does not seem to have a correct `pipeline_tag` set to infer the task automatically"
)
if getattr(info, "library_name", "transformers") not in {"transformers", "timm"}:
raise RuntimeError(f"This model is meant to be used with {info.library_name} not with transformers")
task = info.pipeline_tag
return task
def check_task(task: str) -> tuple[str, dict, Any]:
"""
Checks an incoming task string, to validate it's correct and return the default Pipeline and Model classes, and
default models if they exist.
Args:
task (`str`):
The task defining which pipeline will be returned. Currently accepted tasks are:
- `"audio-classification"`
- `"automatic-speech-recognition"`
- `"conversational"`
- `"depth-estimation"`
- `"document-question-answering"`
- `"feature-extraction"`
- `"fill-mask"`
- `"image-classification"`
- `"image-feature-extraction"`
- `"image-segmentation"`
- `"image-to-image"`
- `"keypoint-matching"`
- `"object-detection"`
- `"question-answering"`
- `"table-question-answering"`
- `"text-classification"` (alias `"sentiment-analysis"` available)
- `"text-generation"`
- `"text-to-audio"` (alias `"text-to-speech"` available)
- `"token-classification"` (alias `"ner"` available)
- `"video-classification"`
- `"visual-question-answering"` (alias `"vqa"` available)
- `"zero-shot-classification"`
- `"zero-shot-image-classification"`
- `"zero-shot-object-detection"`
Returns:
(normalized_task: `str`, task_defaults: `dict`, task_options: (`tuple`, None)) The normalized task name
(removed alias and options).
"""
return PIPELINE_REGISTRY.check_task(task)
def clean_custom_task(task_info):
import transformers
if "impl" not in task_info:
raise RuntimeError("This model introduces a custom pipeline without specifying its implementation.")
pt_class_names = task_info.get("pt", ())
if isinstance(pt_class_names, str):
pt_class_names = [pt_class_names]
task_info["pt"] = tuple(getattr(transformers, c) for c in pt_class_names)
return task_info, None
# <generated-code>
# fmt: off
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# The part of the file below was automatically generated from the code.
# Do NOT edit this part of the file manually as any edits will be overwritten by the generation
# of the file. If any change should be done, please apply the changes to the `pipeline` function
# below and run `python utils/check_pipeline_typing.py --fix_and_overwrite` to update the file.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from typing import Literal, overload
@overload
def pipeline(task: Literal[None], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> Pipeline: ...
@overload
def pipeline(task: Literal["any-to-any"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> AnyToAnyPipeline: ...
@overload
def pipeline(task: Literal["audio-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> AudioClassificationPipeline: ...
@overload
def pipeline(task: Literal["automatic-speech-recognition"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> AutomaticSpeechRecognitionPipeline: ...
@overload
def pipeline(task: Literal["depth-estimation"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> DepthEstimationPipeline: ...
@overload
def pipeline(task: Literal["document-question-answering"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> DocumentQuestionAnsweringPipeline: ...
@overload
def pipeline(task: Literal["feature-extraction"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> FeatureExtractionPipeline: ...
@overload
def pipeline(task: Literal["fill-mask"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> FillMaskPipeline: ...
@overload
def pipeline(task: Literal["image-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ImageClassificationPipeline: ...
@overload
def pipeline(task: Literal["image-feature-extraction"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ImageFeatureExtractionPipeline: ...
@overload
def pipeline(task: Literal["image-segmentation"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ImageSegmentationPipeline: ...
@overload
def pipeline(task: Literal["image-text-to-text"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ImageTextToTextPipeline: ...
@overload
def pipeline(task: Literal["image-to-image"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ImageToImagePipeline: ...
@overload
def pipeline(task: Literal["keypoint-matching"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> KeypointMatchingPipeline: ...
@overload
def pipeline(task: Literal["mask-generation"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> MaskGenerationPipeline: ...
@overload
def pipeline(task: Literal["object-detection"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ObjectDetectionPipeline: ...
@overload
def pipeline(task: Literal["question-answering"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> QuestionAnsweringPipeline: ...
@overload
def pipeline(task: Literal["table-question-answering"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> TableQuestionAnsweringPipeline: ...
@overload
def pipeline(task: Literal["text-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> TextClassificationPipeline: ...
@overload
def pipeline(task: Literal["text-generation"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> TextGenerationPipeline: ...
@overload
def pipeline(task: Literal["text-to-audio"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> TextToAudioPipeline: ...
@overload
def pipeline(task: Literal["token-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> TokenClassificationPipeline: ...
@overload
def pipeline(task: Literal["video-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> VideoClassificationPipeline: ...
@overload
def pipeline(task: Literal["visual-question-answering"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> VisualQuestionAnsweringPipeline: ...
@overload
def pipeline(task: Literal["zero-shot-audio-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ZeroShotAudioClassificationPipeline: ...
@overload
def pipeline(task: Literal["zero-shot-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ZeroShotClassificationPipeline: ...
@overload
def pipeline(task: Literal["zero-shot-image-classification"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ZeroShotImageClassificationPipeline: ...
@overload
def pipeline(task: Literal["zero-shot-object-detection"], model: str | PreTrainedModel | None = None, config: str | PreTrainedConfig | None = None, tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None, feature_extractor: str | PreTrainedFeatureExtractor | None = None, image_processor: str | BaseImageProcessor | None = None, processor: str | ProcessorMixin | None = None, revision: str | None = None, use_fast: bool = True, token: str | bool | None = None, device: int | str | torch.device | None = None, device_map: str | dict[str, int | str] | None = None, dtype: str | torch.dtype | None = "auto", trust_remote_code: bool | None = None, model_kwargs: dict[str, Any] | None = None, pipeline_class: Any | None = None, **kwargs: Any) -> ZeroShotObjectDetectionPipeline: ...
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# The part of the file above was automatically generated from the code.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# fmt: on
# </generated-code>
def pipeline(
task: str | None = None,
model: str | PreTrainedModel | None = None,
config: str | PreTrainedConfig | None = None,
tokenizer: str | PreTrainedTokenizer | PreTrainedTokenizerFast | None = None,
feature_extractor: str | PreTrainedFeatureExtractor | None = None,
image_processor: str | BaseImageProcessor | None = None,
processor: str | ProcessorMixin | None = None,
revision: str | None = None,
use_fast: bool = True,
token: str | bool | None = None,
device: int | str | torch.device | None = None,
device_map: str | dict[str, int | str] | None = None,
dtype: str | torch.dtype | None = "auto",
trust_remote_code: bool | None = None,
model_kwargs: dict[str, Any] | None = None,
pipeline_class: Any | None = None,
**kwargs: Any,
) -> Pipeline:
"""
Utility factory method to build a [`Pipeline`].
A pipeline consists of:
- One or more components for pre-processing model inputs, such as a [tokenizer](tokenizer),
[image_processor](image_processor), [feature_extractor](feature_extractor), or [processor](processors).
- A [model](model) that generates predictions from the inputs.
- Optional post-processing steps to refine the model's output, which can also be handled by processors.
<Tip>
While there are such optional arguments as `tokenizer`, `feature_extractor`, `image_processor`, and `processor`,
they shouldn't be specified all at once. If these components are not provided, `pipeline` will try to load
required ones automatically. In case you want to provide these components explicitly, please refer to a
specific pipeline in order to get more details regarding what components are required.
</Tip>
Args:
task (`str`):
The task defining which pipeline will be returned. Currently accepted tasks are:
- `"audio-classification"`: will return a [`AudioClassificationPipeline`].
- `"automatic-speech-recognition"`: will return a [`AutomaticSpeechRecognitionPipeline`].
- `"depth-estimation"`: will return a [`DepthEstimationPipeline`].
- `"document-question-answering"`: will return a [`DocumentQuestionAnsweringPipeline`].
- `"feature-extraction"`: will return a [`FeatureExtractionPipeline`].
- `"fill-mask"`: will return a [`FillMaskPipeline`]:.
- `"image-classification"`: will return a [`ImageClassificationPipeline`].
- `"image-feature-extraction"`: will return an [`ImageFeatureExtractionPipeline`].
- `"image-segmentation"`: will return a [`ImageSegmentationPipeline`].
- `"image-text-to-text"`: will return a [`ImageTextToTextPipeline`].
- `"image-to-image"`: will return a [`ImageToImagePipeline`].
- `"keypoint-matching"`: will return a [`KeypointMatchingPipeline`].
- `"mask-generation"`: will return a [`MaskGenerationPipeline`].
- `"object-detection"`: will return a [`ObjectDetectionPipeline`].
- `"question-answering"`: will return a [`QuestionAnsweringPipeline`].
- `"table-question-answering"`: will return a [`TableQuestionAnsweringPipeline`].
- `"text-classification"` (alias `"sentiment-analysis"` available): will return a
[`TextClassificationPipeline`].
- `"text-generation"`: will return a [`TextGenerationPipeline`]:.
- `"text-to-audio"` (alias `"text-to-speech"` available): will return a [`TextToAudioPipeline`]:.
- `"token-classification"` (alias `"ner"` available): will return a [`TokenClassificationPipeline`].
- `"video-classification"`: will return a [`VideoClassificationPipeline`].
- `"visual-question-answering"`: will return a [`VisualQuestionAnsweringPipeline`].
- `"zero-shot-classification"`: will return a [`ZeroShotClassificationPipeline`].
- `"zero-shot-image-classification"`: will return a [`ZeroShotImageClassificationPipeline`].
- `"zero-shot-audio-classification"`: will return a [`ZeroShotAudioClassificationPipeline`].
- `"zero-shot-object-detection"`: will return a [`ZeroShotObjectDetectionPipeline`].
model (`str` or [`PreTrainedModel`], *optional*):
The model that will be used by the pipeline to make predictions. This can be a model identifier or an
actual instance of a pretrained model inheriting from [`PreTrainedModel`].
If not provided, the default for the `task` will be loaded.
config (`str` or [`PreTrainedConfig`], *optional*):
The configuration that will be used by the pipeline to instantiate the model. This can be a model
identifier or an actual pretrained model configuration inheriting from [`PreTrainedConfig`].
If not provided, the default configuration file for the requested model will be used. That means that if
`model` is given, its default configuration will be used. However, if `model` is not supplied, this
`task`'s default model's config is used instead.
tokenizer (`str` or [`PreTrainedTokenizer`], *optional*):
The tokenizer that will be used by the pipeline to encode data for the model. This can be a model
identifier or an actual pretrained tokenizer inheriting from [`PreTrainedTokenizer`].
If not provided, the default tokenizer for the given `model` will be loaded (if it is a string). If `model`
is not specified or not a string, then the default tokenizer for `config` is loaded (if it is a string).
However, if `config` is also not given or not a string, then the default tokenizer for the given `task`
will be loaded.
feature_extractor (`str` or [`PreTrainedFeatureExtractor`], *optional*):
The feature extractor that will be used by the pipeline to encode data for the model. This can be a model
identifier or an actual pretrained feature extractor inheriting from [`PreTrainedFeatureExtractor`].
Feature extractors are used for non-NLP models, such as Speech or Vision models as well as multi-modal
models. Multi-modal models will also require a tokenizer to be passed.
If not provided, the default feature extractor for the given `model` will be loaded (if it is a string). If
`model` is not specified or not a string, then the default feature extractor for `config` is loaded (if it
is a string). However, if `config` is also not given or not a string, then the default feature extractor
for the given `task` will be loaded.
image_processor (`str` or [`BaseImageProcessor`], *optional*):
The image processor that will be used by the pipeline to preprocess images for the model. This can be a
model identifier or an actual image processor inheriting from [`BaseImageProcessor`].
Image processors are used for Vision models and multi-modal models that require image inputs. Multi-modal
models will also require a tokenizer to be passed.
If not provided, the default image processor for the given `model` will be loaded (if it is a string). If
`model` is not specified or not a string, then the default image processor for `config` is loaded (if it is
a string).
processor (`str` or [`ProcessorMixin`], *optional*):
The processor that will be used by the pipeline to preprocess data for the model. This can be a model
identifier or an actual processor inheriting from [`ProcessorMixin`].
Processors are used for multi-modal models that require multi-modal inputs, for example, a model that
requires both text and image inputs.
If not provided, the default processor for the given `model` will be loaded (if it is a string). If `model`
is not specified or not a string, then the default processor for `config` is loaded (if it is a string).
revision (`str`, *optional*, defaults to `"main"`):
When passing a task name or a string model identifier: The specific model version to use. It can be a
branch name, a tag name, or a commit id, since we use a git-based system for storing models and other
artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
use_fast (`bool`, *optional*, defaults to `True`):
Whether or not to use a Fast tokenizer if possible (a [`PreTrainedTokenizerFast`]).
token (`str` or *bool*, *optional*):
The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
when running `hf auth login`.
device (`int` or `str` or `torch.device`):
Defines the device (*e.g.*, `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like `1`) on which this
pipeline will be allocated.
device_map (`str` or `dict[str, Union[int, str, torch.device]`, *optional*):
Sent directly as `model_kwargs` (just a simpler shortcut). When `accelerate` library is present, set
`device_map="auto"` to compute the most optimized `device_map` automatically (see
[here](https://huggingface.co/docs/accelerate/main/en/package_reference/big_modeling#accelerate.cpu_offload)
for more information).
<Tip warning={true}>
Do not use `device_map` AND `device` at the same time as they will conflict
</Tip>
dtype (`str` or `torch.dtype`, *optional*):
Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model
(`torch.float16`, `torch.bfloat16`, ... or `"auto"`).
trust_remote_code (`bool`, *optional*, defaults to `False`):
Whether or not to allow for custom code defined on the Hub in their own modeling, configuration,
tokenization or even pipeline files. This option should only be set to `True` for repositories you trust
and in which you have read the code, as it will execute code present on the Hub on your local machine.
model_kwargs (`dict[str, Any]`, *optional*):
Additional dictionary of keyword arguments passed along to the model's `from_pretrained(...,
**model_kwargs)` function.
kwargs (`dict[str, Any]`, *optional*):
Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
corresponding pipeline class for possible values).
Returns:
[`Pipeline`]: A suitable pipeline for the task.
Examples:
```python
>>> from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
>>> # Sentiment analysis pipeline
>>> analyzer = pipeline("sentiment-analysis")
>>> # Question answering pipeline, specifying the checkpoint identifier
>>> oracle = pipeline(
... "question-answering", model="distilbert/distilbert-base-cased-distilled-squad", tokenizer="google-bert/bert-base-cased"
... )
>>> # Named entity recognition pipeline, passing in a specific model and tokenizer
>>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
>>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
>>> recognizer = pipeline("ner", model=model, tokenizer=tokenizer)
```"""
if model_kwargs is None:
model_kwargs = {}
code_revision = kwargs.pop("code_revision", None)
commit_hash = kwargs.pop("_commit_hash", None)
local_files_only = kwargs.get("local_files_only", False)
hub_kwargs = {
"revision": revision,
"token": token,
"trust_remote_code": trust_remote_code,
"_commit_hash": commit_hash,
"local_files_only": local_files_only,
}
if task is None and model is None:
raise RuntimeError(
"Impossible to instantiate a pipeline without either a task or a model "
"being specified. "
"Please provide a task class or a model"
)
if model is None and tokenizer is not None:
raise RuntimeError(
"Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer"
" may not be compatible with the default model. Please provide a PreTrainedModel class or a"
" path/identifier to a pretrained model when providing tokenizer."
)
if model is None and feature_extractor is not None:
raise RuntimeError(
"Impossible to instantiate a pipeline with feature_extractor specified but not the model as the provided"
" feature_extractor may not be compatible with the default model. Please provide a PreTrainedModel class"
" or a path/identifier to a pretrained model when providing feature_extractor."
)
if isinstance(model, Path):
model = str(model)
if commit_hash is None:
pretrained_model_name_or_path = None
if isinstance(config, str):
pretrained_model_name_or_path = config
elif config is None and isinstance(model, str):
pretrained_model_name_or_path = model
if not isinstance(config, PreTrainedConfig) and pretrained_model_name_or_path is not None:
# We make a call to the config file first (which may be absent) to get the commit hash as soon as possible
resolved_config_file = cached_file(
pretrained_model_name_or_path,
CONFIG_NAME,
_raise_exceptions_for_gated_repo=False,
_raise_exceptions_for_missing_entries=False,
_raise_exceptions_for_connection_errors=False,
cache_dir=model_kwargs.get("cache_dir"),
**hub_kwargs,
)
hub_kwargs["_commit_hash"] = extract_commit_hash(resolved_config_file, commit_hash)
else:
hub_kwargs["_commit_hash"] = getattr(config, "_commit_hash", None)
# Config is the primordial information item.
# Instantiate config if needed
adapter_path = None
if isinstance(config, str):
config = AutoConfig.from_pretrained(
config, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs
)
hub_kwargs["_commit_hash"] = config._commit_hash
elif config is None and isinstance(model, str):
# Check for an adapter file in the model path if PEFT is available
if is_peft_available():
# `find_adapter_config_file` doesn't accept `trust_remote_code`
_hub_kwargs = {k: v for k, v in hub_kwargs.items() if k != "trust_remote_code"}
maybe_adapter_path = find_adapter_config_file(
model,
token=hub_kwargs["token"],
revision=hub_kwargs["revision"],
_commit_hash=hub_kwargs["_commit_hash"],
)
if maybe_adapter_path is not None:
with open(maybe_adapter_path, "r", encoding="utf-8") as f:
adapter_config = json.load(f)
adapter_path = model
model = adapter_config["base_model_name_or_path"]
config = AutoConfig.from_pretrained(
model, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs
)
hub_kwargs["_commit_hash"] = config._commit_hash
custom_tasks = {}
if config is not None and len(getattr(config, "custom_pipelines", {})) > 0:
custom_tasks = config.custom_pipelines
if task is None and trust_remote_code is not False:
if len(custom_tasks) == 1:
task = list(custom_tasks.keys())[0]
else:
raise RuntimeError(
"We can't infer the task automatically for this model as there are multiple tasks available. Pick "
f"one in {', '.join(custom_tasks.keys())}"
)
if task is None and model is not None:
if not isinstance(model, str):
raise RuntimeError(
"Inferring the task automatically requires to check the hub with a model_id defined as a `str`. "
f"{model} is not a valid model_id."
)
task = get_task(model, token)
# Retrieve the task
if task in custom_tasks:
targeted_task, task_options = clean_custom_task(custom_tasks[task])
if pipeline_class is None:
if not trust_remote_code:
raise ValueError(
"Loading this pipeline requires you to execute the code in the pipeline file in that"
" repo on your local machine. Make sure you have read the code there to avoid malicious use, then"
" set the option `trust_remote_code=True` to remove this error."
)
class_ref = targeted_task["impl"]
pipeline_class = get_class_from_dynamic_module(
class_ref,
model,
code_revision=code_revision,
**hub_kwargs,
)
else:
normalized_task, targeted_task, task_options = check_task(task)
if pipeline_class is None:
pipeline_class = targeted_task["impl"]
# Use default model/config/tokenizer for the task if no model is provided
if model is None:
model, default_revision = get_default_model_and_revision(targeted_task, task_options)
revision = revision if revision is not None else default_revision
logger.warning(
f"No model was supplied, defaulted to {model} and revision {revision}.\n"
"Using a pipeline without specifying a model name and revision in production is not recommended."
)
hub_kwargs["revision"] = revision
if config is None and isinstance(model, str):
config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs)
hub_kwargs["_commit_hash"] = config._commit_hash
if device_map is not None:
if "device_map" in model_kwargs:
raise ValueError(
'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those'
" arguments might conflict, use only one.)"
)
if device is not None:
logger.warning(
"Both `device` and `device_map` are specified. `device` will override `device_map`. You"
" will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`."
)
model_kwargs["device_map"] = device_map
# BC for the `torch_dtype` argument
if (torch_dtype := kwargs.get("torch_dtype")) is not None:
logger.warning_once("`torch_dtype` is deprecated! Use `dtype` instead!")
# If both are provided, keep `dtype`
dtype = torch_dtype if dtype == "auto" else dtype
if "torch_dtype" in model_kwargs or "dtype" in model_kwargs:
if "torch_dtype" in model_kwargs:
logger.warning_once("`torch_dtype` is deprecated! Use `dtype` instead!")
# If the user did not explicitly provide `dtype` (i.e. the function default "auto" is still
# present) but a value is supplied inside `model_kwargs`, we silently defer to the latter instead of
# raising. This prevents false positives like providing `dtype` only via `model_kwargs` while the
# top-level argument keeps its default value "auto".
if dtype == "auto":
dtype = None
else:
raise ValueError(
'You cannot use both `pipeline(... dtype=..., model_kwargs={"dtype":...})` as those'
" arguments might conflict, use only one.)"
)
if dtype is not None:
if isinstance(dtype, str) and hasattr(torch, dtype):
dtype = getattr(torch, dtype)
model_kwargs["dtype"] = dtype
model_name = model if isinstance(model, str) else None
# Load the correct model if possible
if isinstance(model, str):
model_classes = targeted_task["pt"]
model = load_model(
adapter_path if adapter_path is not None else model,
model_classes=model_classes,
config=config,
task=task,
**hub_kwargs,
**model_kwargs,
)
hub_kwargs["_commit_hash"] = model.config._commit_hash
# Check which preprocessing classes the pipeline uses
# None values indicate optional classes that the pipeline can run without, we don't raise errors if loading fails
load_tokenizer = pipeline_class._load_tokenizer
load_feature_extractor = pipeline_class._load_feature_extractor
load_image_processor = pipeline_class._load_image_processor
load_processor = pipeline_class._load_processor
if load_tokenizer or load_tokenizer is None:
try:
# Try to infer tokenizer from model or config name (if provided as str)
if tokenizer is None:
if isinstance(model_name, str):
tokenizer = model_name
elif isinstance(config, str):
tokenizer = config
else:
# Impossible to guess what is the right tokenizer here
raise Exception(
"Impossible to guess which tokenizer to use. "
"Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer."
)
# Instantiate tokenizer if needed
if isinstance(tokenizer, (str, tuple)):
if isinstance(tokenizer, tuple):
# For tuple we have (tokenizer name, {kwargs})
use_fast = tokenizer[1].pop("use_fast", use_fast)
tokenizer_identifier = tokenizer[0]
tokenizer_kwargs = tokenizer[1]
else:
tokenizer_identifier = tokenizer
tokenizer_kwargs = model_kwargs.copy()
tokenizer_kwargs.pop("torch_dtype", None), tokenizer_kwargs.pop("dtype", None)
tokenizer = AutoTokenizer.from_pretrained(
tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs
)
except Exception as e:
if load_tokenizer:
raise e
else:
tokenizer = None
if load_image_processor or load_image_processor is None:
try:
# Try to infer image processor from model or config name (if provided as str)
if image_processor is None:
if isinstance(model_name, str):
image_processor = model_name
elif isinstance(config, str):
image_processor = config
# Backward compatibility, as `feature_extractor` used to be the name
# for `ImageProcessor`.
elif feature_extractor is not None and isinstance(feature_extractor, BaseImageProcessor):
image_processor = feature_extractor
else:
# Impossible to guess what is the right image_processor here
raise Exception(
"Impossible to guess which image processor to use. "
"Please provide a PreTrainedImageProcessor class or a path/identifier "
"to a pretrained image processor."
)
# Instantiate image_processor if needed
if isinstance(image_processor, (str, tuple)):
image_processor = AutoImageProcessor.from_pretrained(
image_processor, _from_pipeline=task, **hub_kwargs, **model_kwargs
)
except Exception as e:
if load_image_processor:
raise e
else:
image_processor = None
if load_feature_extractor or load_feature_extractor is None:
try:
# Try to infer feature extractor from model or config name (if provided as str)
if feature_extractor is None:
if isinstance(model_name, str):
feature_extractor = model_name
elif isinstance(config, str):
feature_extractor = config
else:
# Impossible to guess what is the right feature_extractor here
raise Exception(
"Impossible to guess which feature extractor to use. "
"Please provide a PreTrainedFeatureExtractor class or a path/identifier "
"to a pretrained feature extractor."
)
# Instantiate feature_extractor if needed
if isinstance(feature_extractor, (str, tuple)):
feature_extractor = AutoFeatureExtractor.from_pretrained(
feature_extractor, _from_pipeline=task, **hub_kwargs, **model_kwargs
)
config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(
pretrained_model_name_or_path or model_name,
**hub_kwargs,
)
processor_class = config_dict.get("processor_class", None)
if processor_class is not None and processor_class.endswith("WithLM") and isinstance(model_name, str):
try:
import kenlm # to trigger `ImportError` if not installed
from pyctcdecode import BeamSearchDecoderCTC
if os.path.isdir(model_name) or os.path.isfile(model_name):
decoder = BeamSearchDecoderCTC.load_from_dir(model_name)
else:
language_model_glob = os.path.join(
BeamSearchDecoderCTC._LANGUAGE_MODEL_SERIALIZED_DIRECTORY, "*"
)
alphabet_filename = BeamSearchDecoderCTC._ALPHABET_SERIALIZED_FILENAME
allow_patterns = [language_model_glob, alphabet_filename]
decoder = BeamSearchDecoderCTC.load_from_hf_hub(model_name, allow_patterns=allow_patterns)
kwargs["decoder"] = decoder
except ImportError as e:
logger.warning(
f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Error: {e}"
)
if not is_kenlm_available():
logger.warning("Try to install `kenlm`: `pip install kenlm")
if not is_pyctcdecode_available():
logger.warning("Try to install `pyctcdecode`: `pip install pyctcdecode")
except Exception as e:
if load_feature_extractor:
raise e
else:
feature_extractor = None
if load_processor or load_processor is None:
try:
# Try to infer processor from model or config name (if provided as str)
if processor is None:
if isinstance(model_name, str):
processor = model_name
elif isinstance(config, str):
processor = config
else:
# Impossible to guess what is the right processor here
raise Exception(
"Impossible to guess which processor to use. "
"Please provide a processor instance or a path/identifier "
"to a processor."
)
# Instantiate processor if needed
if isinstance(processor, (str, tuple)):
processor = AutoProcessor.from_pretrained(processor, _from_pipeline=task, **hub_kwargs, **model_kwargs)
if not isinstance(processor, ProcessorMixin):
raise TypeError(
"Processor was loaded, but it is not an instance of `ProcessorMixin`. "
f"Got type `{type(processor)}` instead. Please check that you specified "
"correct pipeline task for the model and model has processor implemented and saved."
)
except Exception as e:
if load_processor:
raise e
else:
processor = None
if tokenizer is not None:
kwargs["tokenizer"] = tokenizer
if feature_extractor is not None:
kwargs["feature_extractor"] = feature_extractor
if dtype is not None:
kwargs["dtype"] = dtype
if image_processor is not None:
kwargs["image_processor"] = image_processor
if device is not None:
kwargs["device"] = device
if processor is not None:
kwargs["processor"] = processor
return pipeline_class(model=model, task=task, **kwargs)