core.model.base

View Source
from abc import ABC, abstractmethod

class Model(ABC):
	'''
		Basic model to be extended by implemented models.
		This class provides the general API.

		Example:
			<pre>
			bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval)
			&nbsp;
			bert.train()
			print(bert.evaluate())
			</pre>

		This class is abstract, take a look at `core.model.transformer.model.TransformerModel`
		or `core.model.scdmatrix.model.SCDMatrix`. Also see the examples for `predict()`!
	'''

	def __init__(self,
			annotated_corpus_train, annotated_corpus_eval,
			ignore_cache=False, write_model=True
		):
		'''
			Create a model
			Args:
				annotated_corpus_train (`core.corpus.annotated_corpus.AnnotatedCorpus`): The corpus to train on
				annotated_corpus_eval (`core.corpus.annotated_corpus.AnnotatedCorpus`): The corpus to evaluate on
				ignore_cache (bool): Ignore an existing cache and overwrite it
				write_model (bool): (Over-)Write the model to a cache
		'''
		self.ignore_cache = ignore_cache
		self.write_model = write_model

		self.annotated_corpus_train = annotated_corpus_train
		self.annotated_corpus_eval = annotated_corpus_eval

		self.was_trained = False

	@abstractmethod
	def is_gpu_optimized():
		"""
			Check if a model is optimized for GPU usage!

			Returns:
				bool
		"""
		pass

	def train(self):
		'''
			Train the model, uses the train data given in constructor.

			Returns: metrics from training as dictionary, or None (if already trained, loaded from cache)
		'''
		if self.was_trained:
			return None

		self._prepare_training()
		if self._is_cached() and not self.ignore_cache:
			self._load_cached()
			metrics = None
		else:
			metrics = self._train()

		self.was_trained = True

		return metrics

	@abstractmethod
	def _prepare_training(self):
		pass

	@abstractmethod
	def _is_cached(self):
		pass

	@abstractmethod
	def _load_cached(self):
		pass

	@abstractmethod
	def _train(self):
		pass

	def evaluate(self):
		'''
			Run the evaluation on the model, uses the eval data given in constructor.  
				=> Will run `train()` if model is untrained!
			Returns:
				the evaluated statistics
		'''
		if not self.was_trained:
			self.train()
		return self._evaluate()

	@abstractmethod
	def _evaluate(self):
		pass

	def predict(self, *args):
		'''
			Runs a single prediction on the model, given the input as arguments.  
				=> Will run `train()` if model is untrained!
			Returns:
				the predicted value (format depends on model)
			Examples:
				> The results of the examples may be inaccurate, they are *only* examples!

				- `core.model.transformer.models.IsNextSCDBert`
					
					>>> bert = IsNextSCDBert(annotated_corpus_train, annotated_corpus_eval)
					>>> print(bert.predict("The bison is cool!", "The bison is an animal."))
					(True, 'Next sentence seems to be matching scd!')

				- `core.model.transformer.models.IsSCDBert`

					>>> bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval)  
					>>> print(bert.predict("The bison is cool!"))  
					(False, 'Seems to be no scd!')

				- `core.model.transformer.models.SelectSCDBert`

					>>> bert = SelectSCDBert(annotated_corpus_train, annotated_corpus_eval)
					>>> print(bert.predict([
								"The bison is cool!"
							] * 4, [
								"A car has an engine.",
								"The computer calculates results.",
								"The bison is an animal.",
								"Somewhere is someone."
							]))
					(2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'")

				- `core.model.transformer.models.GivenTextFindSCDBert`  
					The model `core.model.transformer.models.GivenSCDFindTextBert` works the same way,
					but the other way round, i.e. it gets a SCD and selects the matching sentence from a text.
					(``SCD: 'The bison is an animal.'; Text: '. we use the computer to do research. the bison'``)

					>>> b = GivenTextFindSCDBert(annotated_corpus_train, annotated_corpus_eval)
					>>> print(b.predict(
							"The bison is cool!",
							"A car has an engine. The computer calculates results. The bison is an animal. Somewhere is someone."
						))
					Text: 'The bison is cool!'; SCD: '. the computer calculates results. the bison is an animal. somewhere'

				- `core.model.scdmatrix.models.iSCDMatrix`

					>>> matrix = iSCDMatrix(annotated_corpus_train, annotated_corpus_eval)  
					>>> print(matrix.predict("The bison is cool!"))  
					(False, 'Seems to be no scd!', "Would predict as SCD: 'becom cooler reduc temperatur'")

				- `core.model.scdmatrix.models.MPSCDMatrix`

					>>> matrix = MPSCDMatrix(annotated_corpus_train, annotated_corpus_eval)  
					>>> print(matrix.predict("The bison is cool!", [
							"A car has an engine.",
							"The computer calculates results.",
							"The bison is an animal.",
							"Somewhere is someone."
						]))
					(2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'", "Would predict as SCD: 'becom cooler reduc temperatur'")	
		'''
		if not self.was_trained:
			self.train()
		return self._predict(*args)

	@abstractmethod
	def _predict(self, *args):
		pass
#   class Model(abc.ABC):
View Source
class Model(ABC):
	'''
		Basic model to be extended by implemented models.
		This class provides the general API.

		Example:
			<pre>
			bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval)
			&nbsp;
			bert.train()
			print(bert.evaluate())
			</pre>

		This class is abstract, take a look at `core.model.transformer.model.TransformerModel`
		or `core.model.scdmatrix.model.SCDMatrix`. Also see the examples for `predict()`!
	'''

	def __init__(self,
			annotated_corpus_train, annotated_corpus_eval,
			ignore_cache=False, write_model=True
		):
		'''
			Create a model
			Args:
				annotated_corpus_train (`core.corpus.annotated_corpus.AnnotatedCorpus`): The corpus to train on
				annotated_corpus_eval (`core.corpus.annotated_corpus.AnnotatedCorpus`): The corpus to evaluate on
				ignore_cache (bool): Ignore an existing cache and overwrite it
				write_model (bool): (Over-)Write the model to a cache
		'''
		self.ignore_cache = ignore_cache
		self.write_model = write_model

		self.annotated_corpus_train = annotated_corpus_train
		self.annotated_corpus_eval = annotated_corpus_eval

		self.was_trained = False

	@abstractmethod
	def is_gpu_optimized():
		"""
			Check if a model is optimized for GPU usage!

			Returns:
				bool
		"""
		pass

	def train(self):
		'''
			Train the model, uses the train data given in constructor.

			Returns: metrics from training as dictionary, or None (if already trained, loaded from cache)
		'''
		if self.was_trained:
			return None

		self._prepare_training()
		if self._is_cached() and not self.ignore_cache:
			self._load_cached()
			metrics = None
		else:
			metrics = self._train()

		self.was_trained = True

		return metrics

	@abstractmethod
	def _prepare_training(self):
		pass

	@abstractmethod
	def _is_cached(self):
		pass

	@abstractmethod
	def _load_cached(self):
		pass

	@abstractmethod
	def _train(self):
		pass

	def evaluate(self):
		'''
			Run the evaluation on the model, uses the eval data given in constructor.  
				=> Will run `train()` if model is untrained!
			Returns:
				the evaluated statistics
		'''
		if not self.was_trained:
			self.train()
		return self._evaluate()

	@abstractmethod
	def _evaluate(self):
		pass

	def predict(self, *args):
		'''
			Runs a single prediction on the model, given the input as arguments.  
				=> Will run `train()` if model is untrained!
			Returns:
				the predicted value (format depends on model)
			Examples:
				> The results of the examples may be inaccurate, they are *only* examples!

				- `core.model.transformer.models.IsNextSCDBert`
					
					>>> bert = IsNextSCDBert(annotated_corpus_train, annotated_corpus_eval)
					>>> print(bert.predict("The bison is cool!", "The bison is an animal."))
					(True, 'Next sentence seems to be matching scd!')

				- `core.model.transformer.models.IsSCDBert`

					>>> bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval)  
					>>> print(bert.predict("The bison is cool!"))  
					(False, 'Seems to be no scd!')

				- `core.model.transformer.models.SelectSCDBert`

					>>> bert = SelectSCDBert(annotated_corpus_train, annotated_corpus_eval)
					>>> print(bert.predict([
								"The bison is cool!"
							] * 4, [
								"A car has an engine.",
								"The computer calculates results.",
								"The bison is an animal.",
								"Somewhere is someone."
							]))
					(2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'")

				- `core.model.transformer.models.GivenTextFindSCDBert`  
					The model `core.model.transformer.models.GivenSCDFindTextBert` works the same way,
					but the other way round, i.e. it gets a SCD and selects the matching sentence from a text.
					(``SCD: 'The bison is an animal.'; Text: '. we use the computer to do research. the bison'``)

					>>> b = GivenTextFindSCDBert(annotated_corpus_train, annotated_corpus_eval)
					>>> print(b.predict(
							"The bison is cool!",
							"A car has an engine. The computer calculates results. The bison is an animal. Somewhere is someone."
						))
					Text: 'The bison is cool!'; SCD: '. the computer calculates results. the bison is an animal. somewhere'

				- `core.model.scdmatrix.models.iSCDMatrix`

					>>> matrix = iSCDMatrix(annotated_corpus_train, annotated_corpus_eval)  
					>>> print(matrix.predict("The bison is cool!"))  
					(False, 'Seems to be no scd!', "Would predict as SCD: 'becom cooler reduc temperatur'")

				- `core.model.scdmatrix.models.MPSCDMatrix`

					>>> matrix = MPSCDMatrix(annotated_corpus_train, annotated_corpus_eval)  
					>>> print(matrix.predict("The bison is cool!", [
							"A car has an engine.",
							"The computer calculates results.",
							"The bison is an animal.",
							"Somewhere is someone."
						]))
					(2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'", "Would predict as SCD: 'becom cooler reduc temperatur'")	
		'''
		if not self.was_trained:
			self.train()
		return self._predict(*args)

	@abstractmethod
	def _predict(self, *args):
		pass

Basic model to be extended by implemented models. This class provides the general API.

Example
bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval)
 
bert.train()
print(bert.evaluate())

This class is abstract, take a look at core.model.transformer.model.TransformerModel or core.model.scdmatrix.model.SCDMatrix. Also see the examples for predict()!

#   Model( annotated_corpus_train, annotated_corpus_eval, ignore_cache=False, write_model=True )
View Source
	def __init__(self,
			annotated_corpus_train, annotated_corpus_eval,
			ignore_cache=False, write_model=True
		):
		'''
			Create a model
			Args:
				annotated_corpus_train (`core.corpus.annotated_corpus.AnnotatedCorpus`): The corpus to train on
				annotated_corpus_eval (`core.corpus.annotated_corpus.AnnotatedCorpus`): The corpus to evaluate on
				ignore_cache (bool): Ignore an existing cache and overwrite it
				write_model (bool): (Over-)Write the model to a cache
		'''
		self.ignore_cache = ignore_cache
		self.write_model = write_model

		self.annotated_corpus_train = annotated_corpus_train
		self.annotated_corpus_eval = annotated_corpus_eval

		self.was_trained = False

Create a model

Args
#  
@abstractmethod
def is_gpu_optimized():
View Source
	@abstractmethod
	def is_gpu_optimized():
		"""
			Check if a model is optimized for GPU usage!

			Returns:
				bool
		"""
		pass

Check if a model is optimized for GPU usage!

Returns

bool

#   def train(self):
View Source
	def train(self):
		'''
			Train the model, uses the train data given in constructor.

			Returns: metrics from training as dictionary, or None (if already trained, loaded from cache)
		'''
		if self.was_trained:
			return None

		self._prepare_training()
		if self._is_cached() and not self.ignore_cache:
			self._load_cached()
			metrics = None
		else:
			metrics = self._train()

		self.was_trained = True

		return metrics

Train the model, uses the train data given in constructor.

Returns: metrics from training as dictionary, or None (if already trained, loaded from cache)

#   def evaluate(self):
View Source
	def evaluate(self):
		'''
			Run the evaluation on the model, uses the eval data given in constructor.  
				=> Will run `train()` if model is untrained!
			Returns:
				the evaluated statistics
		'''
		if not self.was_trained:
			self.train()
		return self._evaluate()

Run the evaluation on the model, uses the eval data given in constructor.
=> Will run train() if model is untrained!

Returns

the evaluated statistics

#   def predict(self, *args):
View Source
	def predict(self, *args):
		'''
			Runs a single prediction on the model, given the input as arguments.  
				=> Will run `train()` if model is untrained!
			Returns:
				the predicted value (format depends on model)
			Examples:
				> The results of the examples may be inaccurate, they are *only* examples!

				- `core.model.transformer.models.IsNextSCDBert`
					
					>>> bert = IsNextSCDBert(annotated_corpus_train, annotated_corpus_eval)
					>>> print(bert.predict("The bison is cool!", "The bison is an animal."))
					(True, 'Next sentence seems to be matching scd!')

				- `core.model.transformer.models.IsSCDBert`

					>>> bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval)  
					>>> print(bert.predict("The bison is cool!"))  
					(False, 'Seems to be no scd!')

				- `core.model.transformer.models.SelectSCDBert`

					>>> bert = SelectSCDBert(annotated_corpus_train, annotated_corpus_eval)
					>>> print(bert.predict([
								"The bison is cool!"
							] * 4, [
								"A car has an engine.",
								"The computer calculates results.",
								"The bison is an animal.",
								"Somewhere is someone."
							]))
					(2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'")

				- `core.model.transformer.models.GivenTextFindSCDBert`  
					The model `core.model.transformer.models.GivenSCDFindTextBert` works the same way,
					but the other way round, i.e. it gets a SCD and selects the matching sentence from a text.
					(``SCD: 'The bison is an animal.'; Text: '. we use the computer to do research. the bison'``)

					>>> b = GivenTextFindSCDBert(annotated_corpus_train, annotated_corpus_eval)
					>>> print(b.predict(
							"The bison is cool!",
							"A car has an engine. The computer calculates results. The bison is an animal. Somewhere is someone."
						))
					Text: 'The bison is cool!'; SCD: '. the computer calculates results. the bison is an animal. somewhere'

				- `core.model.scdmatrix.models.iSCDMatrix`

					>>> matrix = iSCDMatrix(annotated_corpus_train, annotated_corpus_eval)  
					>>> print(matrix.predict("The bison is cool!"))  
					(False, 'Seems to be no scd!', "Would predict as SCD: 'becom cooler reduc temperatur'")

				- `core.model.scdmatrix.models.MPSCDMatrix`

					>>> matrix = MPSCDMatrix(annotated_corpus_train, annotated_corpus_eval)  
					>>> print(matrix.predict("The bison is cool!", [
							"A car has an engine.",
							"The computer calculates results.",
							"The bison is an animal.",
							"Somewhere is someone."
						]))
					(2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'", "Would predict as SCD: 'becom cooler reduc temperatur'")	
		'''
		if not self.was_trained:
			self.train()
		return self._predict(*args)

Runs a single prediction on the model, given the input as arguments.
=> Will run train() if model is untrained!

Returns

the predicted value (format depends on model)

Examples

The results of the examples may be inaccurate, they are only examples!

    >>> bert = IsNextSCDBert(annotated_corpus_train, annotated_corpus_eval)
    >>> print(bert.predict("The bison is cool!", "The bison is an animal."))
    (True, 'Next sentence seems to be matching scd!')
    >>> bert = IsSCDBert(annotated_corpus_train, annotated_corpus_eval)  
    >>> print(bert.predict("The bison is cool!"))  
    (False, 'Seems to be no scd!')
    >>> bert = SelectSCDBert(annotated_corpus_train, annotated_corpus_eval)
    >>> print(bert.predict([
                            "The bison is cool!"
                    ] * 4, [
                            "A car has an engine.",
                            "The computer calculates results.",
                            "The bison is an animal.",
                            "Somewhere is someone."
                    ]))
    (2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'")
    >>> b = GivenTextFindSCDBert(annotated_corpus_train, annotated_corpus_eval)
    >>> print(b.predict(
                    "The bison is cool!",
                    "A car has an engine. The computer calculates results. The bison is an animal. Somewhere is someone."
            ))
    Text: 'The bison is cool!'; SCD: '. the computer calculates results. the bison is an animal. somewhere'
    >>> matrix = iSCDMatrix(annotated_corpus_train, annotated_corpus_eval)  
    >>> print(matrix.predict("The bison is cool!"))  
    (False, 'Seems to be no scd!', "Would predict as SCD: 'becom cooler reduc temperatur'")
    >>> matrix = MPSCDMatrix(annotated_corpus_train, annotated_corpus_eval)  
    >>> print(matrix.predict("The bison is cool!", [
                    "A car has an engine.",
                    "The computer calculates results.",
                    "The bison is an animal.",
                    "Somewhere is someone."
            ]))
    (2, "Text: 'The bison is cool!'; SCD: 'The bison is an animal.'", "Would predict as SCD: 'becom cooler reduc temperatur'")