core.model.transformer.model
View Source
import os from abc import abstractmethod from functools import reduce import transformers from transformers import Trainer, TrainingArguments, PreTrainedModel from core.model import Model from core.utils import Random, check_and_create_folder, clear_filename, CacheName import core.utils.const as const class TransformerModel(Model): ''' Basic Transformer model to work with Transformer like models. This class is abstract, use subclasses `core.model.transformer.models.IsNextSCDBert`, `core.model.transformer.models.IsSCDBert`, `core.model.transformer.models.SelectSCDBert`, `core.model.transformer.models.GivenTextFindSCDBert`, or `core.model.transformer.models.GivenSCDFindTextBert`. **This model uses a GPU if found via CUDA, else it will use multiple CPU cores.** *However, a GPU will be much faster.* ''' def __init__(self, annotated_corpus_train, annotated_corpus_eval, pretrained_model=None, dataset_memory_only=False, learning_rate=5e-5, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-8, weight_decay=0.01, warmup_steps=500, num_train_epochs=3, **kwargs ): ''' Args: pretrained_model (string): The model to fine-tune, if ``None`` uses `core.utils.const.BERT_MODEL_DEFAULT` dataset_memory_only (bool): Passed to `core.model.transformer.dataset.Dataset` learning_rate (float): Passed to `transformers.TrainingArguments` adam_beta1 (float): Passed to `transformers.TrainingArguments` adam_beta2 (float): Passed to `transformers.TrainingArguments` adam_epsilon (float): Passed to `transformers.TrainingArguments` weight_decay (float): Passed to `transformers.TrainingArguments` warmup_steps (int): Passed to `transformers.TrainingArguments` num_train_epochs (int): Passed to `transformers.TrainingArguments` See documentation of [``transformers.TrainingArguments``](http://huggingface.co/transformers/main_classes/trainer.html#transformers.TrainingArguments). ''' if pretrained_model == None: self.pretrained_model = self._default_pretrained_model() self.dataset_memory_only = dataset_memory_only self.training_params = { 'learning_rate' : learning_rate, 'adam_beta1' : adam_beta1, 'adam_beta2' : adam_beta2, 'adam_epsilon' : adam_epsilon, 'weight_decay' : weight_decay, 'warmup_steps' : warmup_steps, 'num_train_epochs' : num_train_epochs } self.training_params_str = reduce( lambda r, t: r + '-' + t[0][t[0].rindex('_')+1:] + '=' + str(t[1]), sorted(self.training_params.items()), "" )[1:] super().__init__(annotated_corpus_train, annotated_corpus_eval, **kwargs) def is_gpu_optimized(): return True @abstractmethod def _default_pretrained_model(self): pass @abstractmethod def _tokenizer_class(self): pass @abstractmethod def _model_class(self): pass @abstractmethod def _dataset_class(self): pass def _prepare_training(self): self.tokenizer = self._tokenizer_class().from_pretrained(self.pretrained_model, local_files_only=const.TRANSFORMERS_NO_NETWORK) self.data_train = self._dataset_class()(self.annotated_corpus_train, self.tokenizer, memory_only=self.dataset_memory_only) self.data_eval = self._dataset_class()(self.annotated_corpus_eval, self.tokenizer, memory_only=self.dataset_memory_only) self.model_save_dir = CacheName.dirname(os.path.join( const.FINETUNED_MODELDIR, clear_filename(self.pretrained_model + '-' + self.data_train.get_cachename() + '_' + self.training_params_str + '_') )) def _batch_size_factorize(self, dataset, batch_size_single): b_s = batch_size_single // dataset.BATCH_SIZE_INNER return 1 if b_s < 1 else b_s def _create_trainer(self, model_dir): trainer_args = TrainingArguments( output_dir=const.FINETUNED_MODELDIR, logging_dir=const.TRAINING_LOGFILES, num_train_epochs=self.training_params['num_train_epochs'], per_device_train_batch_size=self._batch_size_factorize(self.data_train, const.TRAIN_BATCH_SIZE), per_device_eval_batch_size=self._batch_size_factorize(self.data_eval, const.EVAL_BATCH_SIZE), learning_rate=self.training_params['learning_rate'], # alpha adam_beta1=self.training_params['adam_beta1'], adam_beta2=self.training_params['adam_beta2'], adam_epsilon=self.training_params['adam_epsilon'], weight_decay=self.training_params['weight_decay'], # lambda warmup_steps=self.training_params['warmup_steps'], logging_strategy='no', seed=Random.get_seed(), save_strategy='no' ) before = transformers.logging.get_verbosity() transformers.logging.set_verbosity_error() model = self._model_class().from_pretrained(model_dir, local_files_only=const.TRANSFORMERS_NO_NETWORK) transformers.logging.set_verbosity(before) self.trainer = Trainer( model=model, args=trainer_args, train_dataset=self.data_train, eval_dataset=self.data_eval, compute_metrics=self.data_eval.compute_metrics ) def _is_cached(self): return os.path.isdir(self.model_save_dir) def _load_cached(self): self._create_trainer(self.model_save_dir) def _train(self): self._create_trainer(self.pretrained_model) trainer_output = self.trainer.train() if self.write_model: check_and_create_folder(const.FINETUNED_MODELDIR) self.trainer.save_model(self.model_save_dir) return { 'runtime' : trainer_output.metrics['train_runtime'], 'samples_per_second' : trainer_output.metrics['train_samples_per_second'] } def _evaluate(self): return self.trainer.evaluate() @abstractmethod def _parse_prediction(self, result, input, tokens): pass def _predict(self, *args): tokens = self.tokenizer(*args, truncation='longest_first', padding='max_length', max_length=self._dataset_class().MAX_INPUT_LEN, return_tensors='pt' ) model = self.trainer.model if not isinstance(model, PreTrainedModel): model = transformers.modeling_utils.unwrap_model(model) if model.device != "cpu": # make sure to have model on CPU (not on GPU!) model.to("cpu") if len(tokens['input_ids']) > 1: tokens = { k : v.unsqueeze(0) for k, v in tokens.items()} return self._parse_prediction(model(**tokens), args, tokens)
View Source
class TransformerModel(Model): ''' Basic Transformer model to work with Transformer like models. This class is abstract, use subclasses `core.model.transformer.models.IsNextSCDBert`, `core.model.transformer.models.IsSCDBert`, `core.model.transformer.models.SelectSCDBert`, `core.model.transformer.models.GivenTextFindSCDBert`, or `core.model.transformer.models.GivenSCDFindTextBert`. **This model uses a GPU if found via CUDA, else it will use multiple CPU cores.** *However, a GPU will be much faster.* ''' def __init__(self, annotated_corpus_train, annotated_corpus_eval, pretrained_model=None, dataset_memory_only=False, learning_rate=5e-5, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-8, weight_decay=0.01, warmup_steps=500, num_train_epochs=3, **kwargs ): ''' Args: pretrained_model (string): The model to fine-tune, if ``None`` uses `core.utils.const.BERT_MODEL_DEFAULT` dataset_memory_only (bool): Passed to `core.model.transformer.dataset.Dataset` learning_rate (float): Passed to `transformers.TrainingArguments` adam_beta1 (float): Passed to `transformers.TrainingArguments` adam_beta2 (float): Passed to `transformers.TrainingArguments` adam_epsilon (float): Passed to `transformers.TrainingArguments` weight_decay (float): Passed to `transformers.TrainingArguments` warmup_steps (int): Passed to `transformers.TrainingArguments` num_train_epochs (int): Passed to `transformers.TrainingArguments` See documentation of [``transformers.TrainingArguments``](http://huggingface.co/transformers/main_classes/trainer.html#transformers.TrainingArguments). ''' if pretrained_model == None: self.pretrained_model = self._default_pretrained_model() self.dataset_memory_only = dataset_memory_only self.training_params = { 'learning_rate' : learning_rate, 'adam_beta1' : adam_beta1, 'adam_beta2' : adam_beta2, 'adam_epsilon' : adam_epsilon, 'weight_decay' : weight_decay, 'warmup_steps' : warmup_steps, 'num_train_epochs' : num_train_epochs } self.training_params_str = reduce( lambda r, t: r + '-' + t[0][t[0].rindex('_')+1:] + '=' + str(t[1]), sorted(self.training_params.items()), "" )[1:] super().__init__(annotated_corpus_train, annotated_corpus_eval, **kwargs) def is_gpu_optimized(): return True @abstractmethod def _default_pretrained_model(self): pass @abstractmethod def _tokenizer_class(self): pass @abstractmethod def _model_class(self): pass @abstractmethod def _dataset_class(self): pass def _prepare_training(self): self.tokenizer = self._tokenizer_class().from_pretrained(self.pretrained_model, local_files_only=const.TRANSFORMERS_NO_NETWORK) self.data_train = self._dataset_class()(self.annotated_corpus_train, self.tokenizer, memory_only=self.dataset_memory_only) self.data_eval = self._dataset_class()(self.annotated_corpus_eval, self.tokenizer, memory_only=self.dataset_memory_only) self.model_save_dir = CacheName.dirname(os.path.join( const.FINETUNED_MODELDIR, clear_filename(self.pretrained_model + '-' + self.data_train.get_cachename() + '_' + self.training_params_str + '_') )) def _batch_size_factorize(self, dataset, batch_size_single): b_s = batch_size_single // dataset.BATCH_SIZE_INNER return 1 if b_s < 1 else b_s def _create_trainer(self, model_dir): trainer_args = TrainingArguments( output_dir=const.FINETUNED_MODELDIR, logging_dir=const.TRAINING_LOGFILES, num_train_epochs=self.training_params['num_train_epochs'], per_device_train_batch_size=self._batch_size_factorize(self.data_train, const.TRAIN_BATCH_SIZE), per_device_eval_batch_size=self._batch_size_factorize(self.data_eval, const.EVAL_BATCH_SIZE), learning_rate=self.training_params['learning_rate'], # alpha adam_beta1=self.training_params['adam_beta1'], adam_beta2=self.training_params['adam_beta2'], adam_epsilon=self.training_params['adam_epsilon'], weight_decay=self.training_params['weight_decay'], # lambda warmup_steps=self.training_params['warmup_steps'], logging_strategy='no', seed=Random.get_seed(), save_strategy='no' ) before = transformers.logging.get_verbosity() transformers.logging.set_verbosity_error() model = self._model_class().from_pretrained(model_dir, local_files_only=const.TRANSFORMERS_NO_NETWORK) transformers.logging.set_verbosity(before) self.trainer = Trainer( model=model, args=trainer_args, train_dataset=self.data_train, eval_dataset=self.data_eval, compute_metrics=self.data_eval.compute_metrics ) def _is_cached(self): return os.path.isdir(self.model_save_dir) def _load_cached(self): self._create_trainer(self.model_save_dir) def _train(self): self._create_trainer(self.pretrained_model) trainer_output = self.trainer.train() if self.write_model: check_and_create_folder(const.FINETUNED_MODELDIR) self.trainer.save_model(self.model_save_dir) return { 'runtime' : trainer_output.metrics['train_runtime'], 'samples_per_second' : trainer_output.metrics['train_samples_per_second'] } def _evaluate(self): return self.trainer.evaluate() @abstractmethod def _parse_prediction(self, result, input, tokens): pass def _predict(self, *args): tokens = self.tokenizer(*args, truncation='longest_first', padding='max_length', max_length=self._dataset_class().MAX_INPUT_LEN, return_tensors='pt' ) model = self.trainer.model if not isinstance(model, PreTrainedModel): model = transformers.modeling_utils.unwrap_model(model) if model.device != "cpu": # make sure to have model on CPU (not on GPU!) model.to("cpu") if len(tokens['input_ids']) > 1: tokens = { k : v.unsqueeze(0) for k, v in tokens.items()} return self._parse_prediction(model(**tokens), args, tokens)
Basic Transformer model to work with Transformer like models.
This class is abstract, use subclasses core.model.transformer.models.IsNextSCDBert
,
core.model.transformer.models.IsSCDBert
, core.model.transformer.models.SelectSCDBert
,
core.model.transformer.models.GivenTextFindSCDBert
, or
core.model.transformer.models.GivenSCDFindTextBert
.
This model uses a GPU if found via CUDA, else it will use multiple CPU cores.
However, a GPU will be much faster.
#  
TransformerModel(
annotated_corpus_train,
annotated_corpus_eval,
pretrained_model=None,
dataset_memory_only=False,
learning_rate=5e-05,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
weight_decay=0.01,
warmup_steps=500,
num_train_epochs=3,
**kwargs
)
View Source
def __init__(self, annotated_corpus_train, annotated_corpus_eval, pretrained_model=None, dataset_memory_only=False, learning_rate=5e-5, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-8, weight_decay=0.01, warmup_steps=500, num_train_epochs=3, **kwargs ): ''' Args: pretrained_model (string): The model to fine-tune, if ``None`` uses `core.utils.const.BERT_MODEL_DEFAULT` dataset_memory_only (bool): Passed to `core.model.transformer.dataset.Dataset` learning_rate (float): Passed to `transformers.TrainingArguments` adam_beta1 (float): Passed to `transformers.TrainingArguments` adam_beta2 (float): Passed to `transformers.TrainingArguments` adam_epsilon (float): Passed to `transformers.TrainingArguments` weight_decay (float): Passed to `transformers.TrainingArguments` warmup_steps (int): Passed to `transformers.TrainingArguments` num_train_epochs (int): Passed to `transformers.TrainingArguments` See documentation of [``transformers.TrainingArguments``](http://huggingface.co/transformers/main_classes/trainer.html#transformers.TrainingArguments). ''' if pretrained_model == None: self.pretrained_model = self._default_pretrained_model() self.dataset_memory_only = dataset_memory_only self.training_params = { 'learning_rate' : learning_rate, 'adam_beta1' : adam_beta1, 'adam_beta2' : adam_beta2, 'adam_epsilon' : adam_epsilon, 'weight_decay' : weight_decay, 'warmup_steps' : warmup_steps, 'num_train_epochs' : num_train_epochs } self.training_params_str = reduce( lambda r, t: r + '-' + t[0][t[0].rindex('_')+1:] + '=' + str(t[1]), sorted(self.training_params.items()), "" )[1:] super().__init__(annotated_corpus_train, annotated_corpus_eval, **kwargs)
Args
- pretrained_model (string): The model to fine-tune, if
None
usescore.utils.const.BERT_MODEL_DEFAULT
- dataset_memory_only (bool): Passed to
core.model.transformer.dataset.Dataset
- learning_rate (float): Passed to
transformers.TrainingArguments
- adam_beta1 (float): Passed to
transformers.TrainingArguments
- adam_beta2 (float): Passed to
transformers.TrainingArguments
- adam_epsilon (float): Passed to
transformers.TrainingArguments
- weight_decay (float): Passed to
transformers.TrainingArguments
- warmup_steps (int): Passed to
transformers.TrainingArguments
- num_train_epochs (int): Passed to
transformers.TrainingArguments
See documentation of transformers.TrainingArguments
.
View Source
def is_gpu_optimized(): return True
Check if a model is optimized for GPU usage!
Returns
bool