core.model.base
View Source
from abc import ABC, abstractmethod class Model(ABC): ''' Basic model to be extended by implemented models. This class provides the general API. Example: <pre> bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval) bert.train() print(bert.evaluate()) </pre> This class is abstract, take a look at `core.model.transformer.model.TransformerModel` or `core.model.scdmatrix.model.SCDMatrix`. Also see the examples for `predict()`! ''' def __init__(self, annotated_corpus_train, annotated_corpus_eval, ignore_cache=False, write_model=True ): ''' Create a model Args: annotated_corpus_train (`core.corpus.annotated_corpus.AnnotatedCorpus`): The corpus to train on annotated_corpus_eval (`core.corpus.annotated_corpus.AnnotatedCorpus`): The corpus to evaluate on ignore_cache (bool): Ignore an existing cache and overwrite it write_model (bool): (Over-)Write the model to a cache ''' self.ignore_cache = ignore_cache self.write_model = write_model self.annotated_corpus_train = annotated_corpus_train self.annotated_corpus_eval = annotated_corpus_eval self.was_trained = False @abstractmethod def is_gpu_optimized(): """ Check if a model is optimized for GPU usage! Returns: bool """ pass def train(self): ''' Train the model, uses the train data given in constructor. Returns: metrics from training as dictionary, or None (if already trained, loaded from cache) ''' if self.was_trained: return None self._prepare_training() if self._is_cached() and not self.ignore_cache: self._load_cached() metrics = None else: metrics = self._train() self.was_trained = True return metrics @abstractmethod def _prepare_training(self): pass @abstractmethod def _is_cached(self): pass @abstractmethod def _load_cached(self): pass @abstractmethod def _train(self): pass def evaluate(self): ''' Run the evaluation on the model, uses the eval data given in constructor. => Will run `train()` if model is untrained! Returns: the evaluated statistics ''' if not self.was_trained: self.train() return self._evaluate() @abstractmethod def _evaluate(self): pass def predict(self, *args): ''' Runs a single prediction on the model, given the input as arguments. => Will run `train()` if model is untrained! Returns: the predicted value (format depends on model) Examples: > The results of the examples may be inaccurate, they are *only* examples! - `core.model.transformer.models.IsNextSCDBert` >>> bert = IsNextSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(bert.predict("The bison is cool!", "The bison is an animal.")) (True, 'Next sentence seems to be matching scd!') - `core.model.transformer.models.IsSCDBert` >>> bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(bert.predict("The bison is cool!")) (False, 'Seems to be no scd!') - `core.model.transformer.models.SelectSCDBert` >>> bert = SelectSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(bert.predict([ "The bison is cool!" ] * 4, [ "A car has an engine.", "The computer calculates results.", "The bison is an animal.", "Somewhere is someone." ])) (2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'") - `core.model.transformer.models.GivenTextFindSCDBert` The model `core.model.transformer.models.GivenSCDFindTextBert` works the same way, but the other way round, i.e. it gets a SCD and selects the matching sentence from a text. (``SCD: 'The bison is an animal.'; Text: '. we use the computer to do research. the bison'``) >>> b = GivenTextFindSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(b.predict( "The bison is cool!", "A car has an engine. The computer calculates results. The bison is an animal. Somewhere is someone." )) Text: 'The bison is cool!'; SCD: '. the computer calculates results. the bison is an animal. somewhere' - `core.model.scdmatrix.models.iSCDMatrix` >>> matrix = iSCDMatrix(annotated_corpus_train, annotated_corpus_eval) >>> print(matrix.predict("The bison is cool!")) (False, 'Seems to be no scd!', "Would predict as SCD: 'becom cooler reduc temperatur'") - `core.model.scdmatrix.models.MPSCDMatrix` >>> matrix = MPSCDMatrix(annotated_corpus_train, annotated_corpus_eval) >>> print(matrix.predict("The bison is cool!", [ "A car has an engine.", "The computer calculates results.", "The bison is an animal.", "Somewhere is someone." ])) (2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'", "Would predict as SCD: 'becom cooler reduc temperatur'") ''' if not self.was_trained: self.train() return self._predict(*args) @abstractmethod def _predict(self, *args): pass
View Source
class Model(ABC): ''' Basic model to be extended by implemented models. This class provides the general API. Example: <pre> bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval) bert.train() print(bert.evaluate()) </pre> This class is abstract, take a look at `core.model.transformer.model.TransformerModel` or `core.model.scdmatrix.model.SCDMatrix`. Also see the examples for `predict()`! ''' def __init__(self, annotated_corpus_train, annotated_corpus_eval, ignore_cache=False, write_model=True ): ''' Create a model Args: annotated_corpus_train (`core.corpus.annotated_corpus.AnnotatedCorpus`): The corpus to train on annotated_corpus_eval (`core.corpus.annotated_corpus.AnnotatedCorpus`): The corpus to evaluate on ignore_cache (bool): Ignore an existing cache and overwrite it write_model (bool): (Over-)Write the model to a cache ''' self.ignore_cache = ignore_cache self.write_model = write_model self.annotated_corpus_train = annotated_corpus_train self.annotated_corpus_eval = annotated_corpus_eval self.was_trained = False @abstractmethod def is_gpu_optimized(): """ Check if a model is optimized for GPU usage! Returns: bool """ pass def train(self): ''' Train the model, uses the train data given in constructor. Returns: metrics from training as dictionary, or None (if already trained, loaded from cache) ''' if self.was_trained: return None self._prepare_training() if self._is_cached() and not self.ignore_cache: self._load_cached() metrics = None else: metrics = self._train() self.was_trained = True return metrics @abstractmethod def _prepare_training(self): pass @abstractmethod def _is_cached(self): pass @abstractmethod def _load_cached(self): pass @abstractmethod def _train(self): pass def evaluate(self): ''' Run the evaluation on the model, uses the eval data given in constructor. => Will run `train()` if model is untrained! Returns: the evaluated statistics ''' if not self.was_trained: self.train() return self._evaluate() @abstractmethod def _evaluate(self): pass def predict(self, *args): ''' Runs a single prediction on the model, given the input as arguments. => Will run `train()` if model is untrained! Returns: the predicted value (format depends on model) Examples: > The results of the examples may be inaccurate, they are *only* examples! - `core.model.transformer.models.IsNextSCDBert` >>> bert = IsNextSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(bert.predict("The bison is cool!", "The bison is an animal.")) (True, 'Next sentence seems to be matching scd!') - `core.model.transformer.models.IsSCDBert` >>> bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(bert.predict("The bison is cool!")) (False, 'Seems to be no scd!') - `core.model.transformer.models.SelectSCDBert` >>> bert = SelectSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(bert.predict([ "The bison is cool!" ] * 4, [ "A car has an engine.", "The computer calculates results.", "The bison is an animal.", "Somewhere is someone." ])) (2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'") - `core.model.transformer.models.GivenTextFindSCDBert` The model `core.model.transformer.models.GivenSCDFindTextBert` works the same way, but the other way round, i.e. it gets a SCD and selects the matching sentence from a text. (``SCD: 'The bison is an animal.'; Text: '. we use the computer to do research. the bison'``) >>> b = GivenTextFindSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(b.predict( "The bison is cool!", "A car has an engine. The computer calculates results. The bison is an animal. Somewhere is someone." )) Text: 'The bison is cool!'; SCD: '. the computer calculates results. the bison is an animal. somewhere' - `core.model.scdmatrix.models.iSCDMatrix` >>> matrix = iSCDMatrix(annotated_corpus_train, annotated_corpus_eval) >>> print(matrix.predict("The bison is cool!")) (False, 'Seems to be no scd!', "Would predict as SCD: 'becom cooler reduc temperatur'") - `core.model.scdmatrix.models.MPSCDMatrix` >>> matrix = MPSCDMatrix(annotated_corpus_train, annotated_corpus_eval) >>> print(matrix.predict("The bison is cool!", [ "A car has an engine.", "The computer calculates results.", "The bison is an animal.", "Somewhere is someone." ])) (2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'", "Would predict as SCD: 'becom cooler reduc temperatur'") ''' if not self.was_trained: self.train() return self._predict(*args) @abstractmethod def _predict(self, *args): pass
Basic model to be extended by implemented models. This class provides the general API.
Example
bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval) bert.train() print(bert.evaluate())
This class is abstract, take a look at core.model.transformer.model.TransformerModel
or core.model.scdmatrix.model.SCDMatrix
. Also see the examples for predict()
!
View Source
def __init__(self, annotated_corpus_train, annotated_corpus_eval, ignore_cache=False, write_model=True ): ''' Create a model Args: annotated_corpus_train (`core.corpus.annotated_corpus.AnnotatedCorpus`): The corpus to train on annotated_corpus_eval (`core.corpus.annotated_corpus.AnnotatedCorpus`): The corpus to evaluate on ignore_cache (bool): Ignore an existing cache and overwrite it write_model (bool): (Over-)Write the model to a cache ''' self.ignore_cache = ignore_cache self.write_model = write_model self.annotated_corpus_train = annotated_corpus_train self.annotated_corpus_eval = annotated_corpus_eval self.was_trained = False
Create a model
Args
- annotated_corpus_train (
core.corpus.annotated_corpus.AnnotatedCorpus
): The corpus to train on - annotated_corpus_eval (
core.corpus.annotated_corpus.AnnotatedCorpus
): The corpus to evaluate on - ignore_cache (bool): Ignore an existing cache and overwrite it
- write_model (bool): (Over-)Write the model to a cache
View Source
@abstractmethod def is_gpu_optimized(): """ Check if a model is optimized for GPU usage! Returns: bool """ pass
Check if a model is optimized for GPU usage!
Returns
bool
View Source
def train(self): ''' Train the model, uses the train data given in constructor. Returns: metrics from training as dictionary, or None (if already trained, loaded from cache) ''' if self.was_trained: return None self._prepare_training() if self._is_cached() and not self.ignore_cache: self._load_cached() metrics = None else: metrics = self._train() self.was_trained = True return metrics
Train the model, uses the train data given in constructor.
Returns: metrics from training as dictionary, or None (if already trained, loaded from cache)
View Source
def evaluate(self): ''' Run the evaluation on the model, uses the eval data given in constructor. => Will run `train()` if model is untrained! Returns: the evaluated statistics ''' if not self.was_trained: self.train() return self._evaluate()
Run the evaluation on the model, uses the eval data given in constructor.
=> Will run train()
if model is untrained!
Returns
the evaluated statistics
View Source
def predict(self, *args): ''' Runs a single prediction on the model, given the input as arguments. => Will run `train()` if model is untrained! Returns: the predicted value (format depends on model) Examples: > The results of the examples may be inaccurate, they are *only* examples! - `core.model.transformer.models.IsNextSCDBert` >>> bert = IsNextSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(bert.predict("The bison is cool!", "The bison is an animal.")) (True, 'Next sentence seems to be matching scd!') - `core.model.transformer.models.IsSCDBert` >>> bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(bert.predict("The bison is cool!")) (False, 'Seems to be no scd!') - `core.model.transformer.models.SelectSCDBert` >>> bert = SelectSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(bert.predict([ "The bison is cool!" ] * 4, [ "A car has an engine.", "The computer calculates results.", "The bison is an animal.", "Somewhere is someone." ])) (2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'") - `core.model.transformer.models.GivenTextFindSCDBert` The model `core.model.transformer.models.GivenSCDFindTextBert` works the same way, but the other way round, i.e. it gets a SCD and selects the matching sentence from a text. (``SCD: 'The bison is an animal.'; Text: '. we use the computer to do research. the bison'``) >>> b = GivenTextFindSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(b.predict( "The bison is cool!", "A car has an engine. The computer calculates results. The bison is an animal. Somewhere is someone." )) Text: 'The bison is cool!'; SCD: '. the computer calculates results. the bison is an animal. somewhere' - `core.model.scdmatrix.models.iSCDMatrix` >>> matrix = iSCDMatrix(annotated_corpus_train, annotated_corpus_eval) >>> print(matrix.predict("The bison is cool!")) (False, 'Seems to be no scd!', "Would predict as SCD: 'becom cooler reduc temperatur'") - `core.model.scdmatrix.models.MPSCDMatrix` >>> matrix = MPSCDMatrix(annotated_corpus_train, annotated_corpus_eval) >>> print(matrix.predict("The bison is cool!", [ "A car has an engine.", "The computer calculates results.", "The bison is an animal.", "Somewhere is someone." ])) (2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'", "Would predict as SCD: 'becom cooler reduc temperatur'") ''' if not self.was_trained: self.train() return self._predict(*args)
Runs a single prediction on the model, given the input as arguments.
=> Will run train()
if model is untrained!
Returns
the predicted value (format depends on model)
Examples
The results of the examples may be inaccurate, they are only examples!
>>> bert = IsNextSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(bert.predict("The bison is cool!", "The bison is an animal.")) (True, 'Next sentence seems to be matching scd!')
>>> bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(bert.predict("The bison is cool!")) (False, 'Seems to be no scd!')
>>> bert = SelectSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(bert.predict([ "The bison is cool!" ] * 4, [ "A car has an engine.", "The computer calculates results.", "The bison is an animal.", "Somewhere is someone." ])) (2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'")
core.model.transformer.models.GivenTextFindSCDBert
The modelcore.model.transformer.models.GivenSCDFindTextBert
works the same way, but the other way round, i.e. it gets a SCD and selects the matching sentence from a text. (SCD: 'The bison is an animal.'; Text: '. we use the computer to do research. the bison'
)
>>> b = GivenTextFindSCDBert(annotated_corpus_train, annotated_corpus_eval) >>> print(b.predict( "The bison is cool!", "A car has an engine. The computer calculates results. The bison is an animal. Somewhere is someone." )) Text: 'The bison is cool!'; SCD: '. the computer calculates results. the bison is an animal. somewhere'
>>> matrix = iSCDMatrix(annotated_corpus_train, annotated_corpus_eval) >>> print(matrix.predict("The bison is cool!")) (False, 'Seems to be no scd!', "Would predict as SCD: 'becom cooler reduc temperatur'")
>>> matrix = MPSCDMatrix(annotated_corpus_train, annotated_corpus_eval) >>> print(matrix.predict("The bison is cool!", [ "A car has an engine.", "The computer calculates results.", "The bison is an animal.", "Somewhere is someone." ])) (2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'", "Would predict as SCD: 'becom cooler reduc temperatur'")