Source code for composer.models.bert.model

# Copyright 2022 MosaicML Composer authors
# SPDX-License-Identifier: Apache-2.0

"""Implements a BERT wrapper around a :class:`.ComposerTransformer`."""

from __future__ import annotations

from typing import Optional

from torchmetrics import MeanSquaredError
from torchmetrics.classification import MatthewsCorrCoef, MulticlassAccuracy
from torchmetrics.regression import SpearmanCorrCoef

from composer.metrics.nlp import BinaryF1Score, LanguageCrossEntropy, MaskedAccuracy
from composer.models.huggingface import HuggingFaceModel
from composer.utils.import_helpers import MissingConditionalImportError

__all__ = ['create_bert_mlm', 'create_bert_classification']


[docs]def create_bert_mlm(use_pretrained: Optional[bool] = False, pretrained_model_name: Optional[str] = None, model_config: Optional[dict] = None, tokenizer_name: Optional[str] = None, gradient_checkpointing: Optional[bool] = False): """BERT model based on |:hugging_face:| Transformers. For more information, see `Transformers <https://huggingface.co/transformers/>`_. Args: gradient_checkpointing (bool, optional): Use gradient checkpointing. Default: ``False``. use_pretrained (bool, optional): Whether to initialize the model with the pretrained weights. Default: ``False``. model_config (dict): The settings used to create a Hugging Face BertConfig. BertConfig is used to specify the architecture of a Hugging Face model. tokenizer_name (transformers.BertTokenizer, optional): Tokenizer name used to preprocess the dataset and validate the models inputs. .. code-block:: { "_name_or_path": "bert-base-uncased", "architectures": ["BertForMaskedLM"], "attention_probs_dropout_prob": 0.1, "classifier_dropout": null, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "layer_norm_eps": 1e-12, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_hidden_layers": 12, "pad_token_id": 0, "position_embedding_type": "absolute", "transformers_version": "4.16.0", "type_vocab_size": 2, "use_cache": true, "vocab_size": 30522 } To create a BERT model for Masked Language Model pretraining: .. testcode:: from composer.models import create_bert_mlm model = create_bert_mlm() """ try: import transformers except ImportError as e: raise MissingConditionalImportError(extra_deps_group='nlp', conda_package='transformers') from e if not model_config: model_config = {} if not pretrained_model_name: pretrained_model_name = 'bert-base-uncased' if use_pretrained: assert transformers.AutoModelForMaskedLM.from_pretrained is not None, 'AutoModelForMaskedLM has from_pretrained method' model = transformers.AutoModelForMaskedLM.from_pretrained(pretrained_model_name_or_path=pretrained_model_name, **model_config) else: config = transformers.AutoConfig.from_pretrained(pretrained_model_name, **model_config) assert transformers.AutoModelForMaskedLM.from_config is not None, 'AutoModelForMaskedLM has from_config method' model = transformers.AutoModelForMaskedLM.from_config(config) if gradient_checkpointing: model.gradient_checkpointing_enable() # type: ignore # setup the tokenizer if tokenizer_name: tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_name) else: tokenizer = None metrics = [LanguageCrossEntropy(ignore_index=-100), MaskedAccuracy(ignore_index=-100)] return HuggingFaceModel(model=model, tokenizer=tokenizer, use_logits=True, metrics=metrics)
[docs]def create_bert_classification(num_labels: int = 2, use_pretrained: bool = False, pretrained_model_name: Optional[str] = None, model_config: Optional[dict] = None, tokenizer_name: Optional[str] = None, gradient_checkpointing: bool = False): """BERT classification model based on |:hugging_face:| Transformers. For more information, see `Transformers <https://huggingface.co/transformers/>`_. Args: num_labels (int, optional): The number of classes in the classification task. Default: ``2``. gradient_checkpointing (bool, optional): Use gradient checkpointing. Default: ``False``. use_pretrained (bool, optional): Whether to initialize the model with the pretrained weights. Default: ``False``. model_config (dict): The settings used to create a Hugging Face BertConfig. BertConfig is used to specify the architecture of a Hugging Face model. tokenizer_name (str, optional): Tokenizer name used to preprocess the dataset and validate the models inputs. .. code-block:: { "_name_or_path": "bert-base-uncased", "architectures": [ "BertForSequenceClassification ], "attention_probs_dropout_prob": 0.1, "classifier_dropout": null, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1", "2": "LABEL_2" }, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2 }, "layer_norm_eps": 1e-12, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_hidden_layers": 12, "pad_token_id": 0, "position_embedding_type": "absolute", "transformers_version": "4.16.0", "type_vocab_size": 2, "use_cache": true, "vocab_size": 30522 } To create a BERT model for classification: .. testcode:: from composer.models import create_bert_classification model = create_bert_classification(num_labels=3) # if the task has three classes. Note: This function can be used to construct a BERT model for regression by setting ``num_labels == 1``. This will have two noteworthy effects. First, it will switch the training loss to :class:`~torch.nn.MSELoss`. Second, the returned :class:`.ComposerModel`'s train/validation metrics will be :class:`~torchmetrics.MeanSquaredError` and :class:`~torchmetrics.SpearmanCorrCoef`. For the classification case (when ``num_labels > 1``), the training loss is :class:`~torch.nn.CrossEntropyLoss`, and the train/validation metrics are :class:`~torchmetrics.MulticlassAccuracy` and :class:`~torchmetrics.MatthewsCorrCoef`, as well as :class:`.BinaryF1Score` if ``num_labels == 2``. """ try: import transformers except ImportError as e: raise MissingConditionalImportError(extra_deps_group='nlp', conda_package='transformers') from e if not model_config: model_config = {} model_config['num_labels'] = num_labels if not pretrained_model_name: pretrained_model_name = 'bert-base-uncased' if use_pretrained: assert transformers.AutoModelForSequenceClassification.from_pretrained is not None, 'AutoModelForSequenceClassification has from_pretrained method' model = transformers.AutoModelForSequenceClassification.from_pretrained( pretrained_model_name_or_path=pretrained_model_name, **model_config) else: config = transformers.AutoConfig.from_pretrained(pretrained_model_name, **model_config) assert transformers.AutoModelForSequenceClassification.from_config is not None, 'AutoModelForSequenceClassification has from_config method' model = transformers.AutoModelForSequenceClassification.from_config(config) if gradient_checkpointing: model.gradient_checkpointing_enable() # setup the tokenizer if tokenizer_name: tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_name) else: tokenizer = None if num_labels == 1: # Metrics for a regression model metrics = [MeanSquaredError(), SpearmanCorrCoef()] else: # Metrics for a classification model metrics = [ MulticlassAccuracy(num_classes=num_labels, average='micro'), MatthewsCorrCoef(task='multiclass', num_classes=num_labels) ] if num_labels == 2: metrics.append(BinaryF1Score()) return HuggingFaceModel(model=model, tokenizer=tokenizer, use_logits=True, metrics=metrics)