core.evaluation.evaluate

Evaluation

This modul runs the evaluations, its a wrapper around core.model.exec.Executor.

View Source

"""
# Evaluation

This modul runs the evaluations, its a wrapper around
`core.model.exec.Executor`.
"""

import secrets

from core.model.exec import Executor

from core.utils import Mail, print_info, Random, const

class Evaluation():
	"""
		Run one or multiple evaluations, on one or more models.

		Examples:
			<pre>
			e = Evaluation(samples=4)
			&nbsp;
			e.set_corpus(TwentyNews(subgroups=['misc-forsale'])) 
			e.set_annotator(Wiktionary) 
			&nbsp;
			e.add_model(iSCDMatrix, num_scds_train=2) # OR
			e.add_models([iSCDMatrix, IsSCDBert]) # OR
			e.all_models()
			&nbsp;
			e.run()
			</pre>
	"""

	def __init__(self, ignore_plattform=False, samples=2, seeds=[], ignore_cache=False, write_model=True, percentage_train=0.8):
		"""
			Args:
				ignore_plattform (bool): The class detects if running on GPU or CPU, and will only run GPU models on GPU and CPU models on CPU if this is set to ``True`` 
				**kwargs: See `set_parameter()`
		"""
		self.ignore_plattform = ignore_plattform

		self.parameters = {
			'samples' : samples,
			'seeds' : seeds,
			'ignore_cache' : ignore_cache,
			'write_model' : write_model,
			'percentage_train' : percentage_train
		}
		self._init_parameters()

		self.models = []
		
	def set_parameters(self, samples=None, seeds=None, ignore_cache=None, write_model=None, percentage_train=None):
		"""
			Set parameters about the evaluations to run.

			Setting a kwarg to `None` will let it unchanged.

			Args:
				samples (int): The number of times to run each evaluation
				seeds (list of int, optional): The seeds to use for each evaluation (len == samples!)
					If list is smaller, generate random seeds
				ignore_cache (bool): Will be passed to each model, see `core.model.base.Model`
				write_model (bool): Will be passed to each model, see `core.model.base.Model`
				percentage_train (float): The percantage of the corpus to use for training (rest will be used for evaluation)
		"""
		if samples != None:
			self.parameters['samples'] = samples
		if seeds != None:
			self.parameters['seeds'] = seeds
		if ignore_cache != None:
			self.parameters['ignore_cache'] = ignore_cache
		if write_model != None:
			self.parameters['write_model'] = write_model
		if percentage_train != None:
			self.parameters['percentage_train'] = percentage_train

		self._init_parameters()

	def _init_parameters(self):
		if not isinstance(self.parameters['samples'], int) or self.parameters['samples'] < 1:
			raise AttributeError("The samples parameter needs a positive integer!")

		if self.parameters['seeds'] == None:
			self.parameters['seeds'] = [ secrets.randbelow(2**32) for _ in range(self.parameters['samples']) ]
			print("Created ranodom seeds!")
		if not isinstance(self.parameters['seeds'], list):
			raise AttributeError("The samples parameter needs to be a list of seeds!")
		if len(self.parameters['seeds']) < self.parameters['samples']:
			self.parameters['seeds'].extend([ secrets.randbelow(2**32) for _ in range(self.parameters['samples'] - len(self.parameters['seeds'])) ])
			print("Added random seeds to match number of samples!")
		elif len(self.parameters['seeds']) > self.parameters['samples']:
			self.parameters['seeds'] = self.parameters['seeds'][0:self.parameters['samples']]

		if not isinstance(self.parameters['ignore_cache'], bool) or not isinstance(self.parameters['write_model'], bool) :
			raise AttributeError("The write_model and ignore_cache parameters needs a be boolean!")

		if not isinstance(self.parameters['percentage_train'], float) or self.parameters['percentage_train'] < 0.01 or self.parameters['percentage_train'] > 0.99:
			raise AttributeError("The percentage_train needs to be a floats in [0.01, 0.99]!")

	def set_corpus(self, corpus):
		"""
			Set the corpus to use
			Args:
				coprus (`core.corpus.corpus.Corpus` or list of `core.corpus.corpus.Corpus`): The corpus to evaluate and train on (as **initiated object**)
		"""
		self.corpus = corpus 

	def set_annotator(self, annotator_class, annotator_preprocessor=None):
		"""
			Set the annotator to use
			Args:
				annotator_class (`core.corpus.annotator.Annotator` or list of `core.corpus.annotator.Annotator`): The annotator to use (as class, not an **initiated object!**)
				annotator_preprocessor (`core.corpus.preprocess.Preprocessor`): The preprocessor to use for annotator, if `None` uses `core.corpus.preprocess.DefaultPreprocessor` (no list allowed, will use same for all corpora!)
		"""
		self.annotator_class = annotator_class
		self.annotator_preprocessor = annotator_preprocessor

	def add_model(self, model_class, **kwargs):
		"""
			Add a model to the list of models to evaluate.

			Args:
				model_class (`core.model.base.Model`): The model to use
				**kwargs: Parameters to pass to the model
		"""
		self.models.append((model_class, kwargs))

	def add_models(self, model_classes):
		"""
			Add multiple models to the list of models to evaluate.  
			Use `add_model()` to add custom parameters
		"""
		for model_class in model_classes:
			self.add_model(model_class)

	def all_models(self):
		"""
			Evaluate on all models.

			Will run `add_models()` for `core.model.exec.Executor.ALL_MODELS`.
		"""
		self.add_models(Executor.ALL_MODELS)


	def run(self, mail_each=False, mail_sample=False, mail_all=True):
		"""
			Start the evaluation.

			Args:
				mail_each (bool): Send a mail after each evaluated model
				mail_sample (bool): Send a mail after each compeleted sample (each model done)
				mail_all (bool): Send a mail after all is done

		"""
		print_info()

		num_samples = self.parameters['samples']

		for i_samples, seed_sample in zip(range(num_samples), self.parameters['seeds']):
			print("\t", "Starting sample", i_samples + 1, "of", num_samples)

			Random.set_seed(seed_sample)

			for model, kwargs in self.models:
				

				if (model.is_gpu_optimized() and not const.RUNNING_ON_GPU) and not self.ignore_plattform:
					print("\t\t","Model", model.__name__, "is GPU optimized, but we are not on a GPU! -> Skipped")
				elif (const.RUNNING_ON_GPU and not model.is_gpu_optimized()) and not self.ignore_plattform:
					print("\t\t", "We are on a GPU, but model", model.__name__, "is not GPU optimized!-> Skipped")
				else:
					print("\t\t", "Running model", model.__name__)
					kwargs['ignore_cache'] = self.parameters['ignore_cache']
					kwargs['write_model'] = self.parameters['write_model']

					e = Executor(
						model,
						self.annotator_class,
						self.corpus,
						annotator_preprocessor=self.annotator_preprocessor,
						percentage_train=self.parameters['percentage_train']
					)
					r = e.exec(**kwargs)

					if mail_each:
						Mail.send_variable(r, subject=str(model.__name__) + " has finished")

			if mail_sample:
				Mail.send_variable({
					'models' : [ model.__name__ for model, _ in self.models ],
					'sample' : str(i_samples+1) + " of " + str(num_samples),
					'seed' : seed_sample
				}, subject="A sample has finished")

		if mail_all:
			Mail.send_variable({
					'models' : [ model.__name__ for model, _ in self.models ],
					'samples' : num_samples,
					'seeds' : self.parameters['seeds']
				}, subject="The Evaluation has finished")

# class Evaluation:

View Source

class Evaluation():
	"""
		Run one or multiple evaluations, on one or more models.

		Examples:
			<pre>
			e = Evaluation(samples=4)
			&nbsp;
			e.set_corpus(TwentyNews(subgroups=['misc-forsale'])) 
			e.set_annotator(Wiktionary) 
			&nbsp;
			e.add_model(iSCDMatrix, num_scds_train=2) # OR
			e.add_models([iSCDMatrix, IsSCDBert]) # OR
			e.all_models()
			&nbsp;
			e.run()
			</pre>
	"""

	def __init__(self, ignore_plattform=False, samples=2, seeds=[], ignore_cache=False, write_model=True, percentage_train=0.8):
		"""
			Args:
				ignore_plattform (bool): The class detects if running on GPU or CPU, and will only run GPU models on GPU and CPU models on CPU if this is set to ``True`` 
				**kwargs: See `set_parameter()`
		"""
		self.ignore_plattform = ignore_plattform

		self.parameters = {
			'samples' : samples,
			'seeds' : seeds,
			'ignore_cache' : ignore_cache,
			'write_model' : write_model,
			'percentage_train' : percentage_train
		}
		self._init_parameters()

		self.models = []
		
	def set_parameters(self, samples=None, seeds=None, ignore_cache=None, write_model=None, percentage_train=None):
		"""
			Set parameters about the evaluations to run.

			Setting a kwarg to `None` will let it unchanged.

			Args:
				samples (int): The number of times to run each evaluation
				seeds (list of int, optional): The seeds to use for each evaluation (len == samples!)
					If list is smaller, generate random seeds
				ignore_cache (bool): Will be passed to each model, see `core.model.base.Model`
				write_model (bool): Will be passed to each model, see `core.model.base.Model`
				percentage_train (float): The percantage of the corpus to use for training (rest will be used for evaluation)
		"""
		if samples != None:
			self.parameters['samples'] = samples
		if seeds != None:
			self.parameters['seeds'] = seeds
		if ignore_cache != None:
			self.parameters['ignore_cache'] = ignore_cache
		if write_model != None:
			self.parameters['write_model'] = write_model
		if percentage_train != None:
			self.parameters['percentage_train'] = percentage_train

		self._init_parameters()

	def _init_parameters(self):
		if not isinstance(self.parameters['samples'], int) or self.parameters['samples'] < 1:
			raise AttributeError("The samples parameter needs a positive integer!")

		if self.parameters['seeds'] == None:
			self.parameters['seeds'] = [ secrets.randbelow(2**32) for _ in range(self.parameters['samples']) ]
			print("Created ranodom seeds!")
		if not isinstance(self.parameters['seeds'], list):
			raise AttributeError("The samples parameter needs to be a list of seeds!")
		if len(self.parameters['seeds']) < self.parameters['samples']:
			self.parameters['seeds'].extend([ secrets.randbelow(2**32) for _ in range(self.parameters['samples'] - len(self.parameters['seeds'])) ])
			print("Added random seeds to match number of samples!")
		elif len(self.parameters['seeds']) > self.parameters['samples']:
			self.parameters['seeds'] = self.parameters['seeds'][0:self.parameters['samples']]

		if not isinstance(self.parameters['ignore_cache'], bool) or not isinstance(self.parameters['write_model'], bool) :
			raise AttributeError("The write_model and ignore_cache parameters needs a be boolean!")

		if not isinstance(self.parameters['percentage_train'], float) or self.parameters['percentage_train'] < 0.01 or self.parameters['percentage_train'] > 0.99:
			raise AttributeError("The percentage_train needs to be a floats in [0.01, 0.99]!")

	def set_corpus(self, corpus):
		"""
			Set the corpus to use
			Args:
				coprus (`core.corpus.corpus.Corpus` or list of `core.corpus.corpus.Corpus`): The corpus to evaluate and train on (as **initiated object**)
		"""
		self.corpus = corpus 

	def set_annotator(self, annotator_class, annotator_preprocessor=None):
		"""
			Set the annotator to use
			Args:
				annotator_class (`core.corpus.annotator.Annotator` or list of `core.corpus.annotator.Annotator`): The annotator to use (as class, not an **initiated object!**)
				annotator_preprocessor (`core.corpus.preprocess.Preprocessor`): The preprocessor to use for annotator, if `None` uses `core.corpus.preprocess.DefaultPreprocessor` (no list allowed, will use same for all corpora!)
		"""
		self.annotator_class = annotator_class
		self.annotator_preprocessor = annotator_preprocessor

	def add_model(self, model_class, **kwargs):
		"""
			Add a model to the list of models to evaluate.

			Args:
				model_class (`core.model.base.Model`): The model to use
				**kwargs: Parameters to pass to the model
		"""
		self.models.append((model_class, kwargs))

	def add_models(self, model_classes):
		"""
			Add multiple models to the list of models to evaluate.  
			Use `add_model()` to add custom parameters
		"""
		for model_class in model_classes:
			self.add_model(model_class)

	def all_models(self):
		"""
			Evaluate on all models.

			Will run `add_models()` for `core.model.exec.Executor.ALL_MODELS`.
		"""
		self.add_models(Executor.ALL_MODELS)


	def run(self, mail_each=False, mail_sample=False, mail_all=True):
		"""
			Start the evaluation.

			Args:
				mail_each (bool): Send a mail after each evaluated model
				mail_sample (bool): Send a mail after each compeleted sample (each model done)
				mail_all (bool): Send a mail after all is done

		"""
		print_info()

		num_samples = self.parameters['samples']

		for i_samples, seed_sample in zip(range(num_samples), self.parameters['seeds']):
			print("\t", "Starting sample", i_samples + 1, "of", num_samples)

			Random.set_seed(seed_sample)

			for model, kwargs in self.models:
				

				if (model.is_gpu_optimized() and not const.RUNNING_ON_GPU) and not self.ignore_plattform:
					print("\t\t","Model", model.__name__, "is GPU optimized, but we are not on a GPU! -> Skipped")
				elif (const.RUNNING_ON_GPU and not model.is_gpu_optimized()) and not self.ignore_plattform:
					print("\t\t", "We are on a GPU, but model", model.__name__, "is not GPU optimized!-> Skipped")
				else:
					print("\t\t", "Running model", model.__name__)
					kwargs['ignore_cache'] = self.parameters['ignore_cache']
					kwargs['write_model'] = self.parameters['write_model']

					e = Executor(
						model,
						self.annotator_class,
						self.corpus,
						annotator_preprocessor=self.annotator_preprocessor,
						percentage_train=self.parameters['percentage_train']
					)
					r = e.exec(**kwargs)

					if mail_each:
						Mail.send_variable(r, subject=str(model.__name__) + " has finished")

			if mail_sample:
				Mail.send_variable({
					'models' : [ model.__name__ for model, _ in self.models ],
					'sample' : str(i_samples+1) + " of " + str(num_samples),
					'seed' : seed_sample
				}, subject="A sample has finished")

		if mail_all:
			Mail.send_variable({
					'models' : [ model.__name__ for model, _ in self.models ],
					'samples' : num_samples,
					'seeds' : self.parameters['seeds']
				}, subject="The Evaluation has finished")

Run one or multiple evaluations, on one or more models.

Examples

e = Evaluation(samples=4)
 
e.set_corpus(TwentyNews(subgroups=['misc-forsale'])) 
e.set_annotator(Wiktionary) 
 
e.add_model(iSCDMatrix, num_scds_train=2) # OR
e.add_models([iSCDMatrix, IsSCDBert]) # OR
e.all_models()
 
e.run()

# Evaluation( ignore_plattform=False, samples=2, seeds=[], ignore_cache=False, write_model=True, percentage_train=0.8 )

View Source

	def __init__(self, ignore_plattform=False, samples=2, seeds=[], ignore_cache=False, write_model=True, percentage_train=0.8):
		"""
			Args:
				ignore_plattform (bool): The class detects if running on GPU or CPU, and will only run GPU models on GPU and CPU models on CPU if this is set to ``True`` 
				**kwargs: See `set_parameter()`
		"""
		self.ignore_plattform = ignore_plattform

		self.parameters = {
			'samples' : samples,
			'seeds' : seeds,
			'ignore_cache' : ignore_cache,
			'write_model' : write_model,
			'percentage_train' : percentage_train
		}
		self._init_parameters()

		self.models = []

Args

ignore_plattform (bool): The class detects if running on GPU or CPU, and will only run GPU models on GPU and CPU models on CPU if this is set to True
**kwargs: See set_parameter()

# def set_parameters( self, samples=None, seeds=None, ignore_cache=None, write_model=None, percentage_train=None ):

View Source

	def set_parameters(self, samples=None, seeds=None, ignore_cache=None, write_model=None, percentage_train=None):
		"""
			Set parameters about the evaluations to run.

			Setting a kwarg to `None` will let it unchanged.

			Args:
				samples (int): The number of times to run each evaluation
				seeds (list of int, optional): The seeds to use for each evaluation (len == samples!)
					If list is smaller, generate random seeds
				ignore_cache (bool): Will be passed to each model, see `core.model.base.Model`
				write_model (bool): Will be passed to each model, see `core.model.base.Model`
				percentage_train (float): The percantage of the corpus to use for training (rest will be used for evaluation)
		"""
		if samples != None:
			self.parameters['samples'] = samples
		if seeds != None:
			self.parameters['seeds'] = seeds
		if ignore_cache != None:
			self.parameters['ignore_cache'] = ignore_cache
		if write_model != None:
			self.parameters['write_model'] = write_model
		if percentage_train != None:
			self.parameters['percentage_train'] = percentage_train

		self._init_parameters()

Set parameters about the evaluations to run.

Setting a kwarg to None will let it unchanged.

Args

samples (int): The number of times to run each evaluation
seeds (list of int, optional): The seeds to use for each evaluation (len == samples!) If list is smaller, generate random seeds
ignore_cache (bool): Will be passed to each model, see core.model.base.Model
write_model (bool): Will be passed to each model, see core.model.base.Model
percentage_train (float): The percantage of the corpus to use for training (rest will be used for evaluation)

# def set_corpus(self, corpus):

View Source

	def set_corpus(self, corpus):
		"""
			Set the corpus to use
			Args:
				coprus (`core.corpus.corpus.Corpus` or list of `core.corpus.corpus.Corpus`): The corpus to evaluate and train on (as **initiated object**)
		"""
		self.corpus = corpus

Set the corpus to use

Args

coprus (core.corpus.corpus.Corpus or list of core.corpus.corpus.Corpus): The corpus to evaluate and train on (as initiated object)

# def set_annotator(self, annotator_class, annotator_preprocessor=None):

View Source

	def set_annotator(self, annotator_class, annotator_preprocessor=None):
		"""
			Set the annotator to use
			Args:
				annotator_class (`core.corpus.annotator.Annotator` or list of `core.corpus.annotator.Annotator`): The annotator to use (as class, not an **initiated object!**)
				annotator_preprocessor (`core.corpus.preprocess.Preprocessor`): The preprocessor to use for annotator, if `None` uses `core.corpus.preprocess.DefaultPreprocessor` (no list allowed, will use same for all corpora!)
		"""
		self.annotator_class = annotator_class
		self.annotator_preprocessor = annotator_preprocessor

Set the annotator to use

Args

annotator_class (core.corpus.annotator.Annotator or list of core.corpus.annotator.Annotator): The annotator to use (as class, not an initiated object!)
annotator_preprocessor (core.corpus.preprocess.Preprocessor): The preprocessor to use for annotator, if None uses core.corpus.preprocess.DefaultPreprocessor (no list allowed, will use same for all corpora!)

# def add_model(self, model_class, **kwargs):

View Source

	def add_model(self, model_class, **kwargs):
		"""
			Add a model to the list of models to evaluate.

			Args:
				model_class (`core.model.base.Model`): The model to use
				**kwargs: Parameters to pass to the model
		"""
		self.models.append((model_class, kwargs))

Add a model to the list of models to evaluate.

Args

model_class (core.model.base.Model): The model to use
**kwargs: Parameters to pass to the model

# def add_models(self, model_classes):

View Source

	def add_models(self, model_classes):
		"""
			Add multiple models to the list of models to evaluate.  
			Use `add_model()` to add custom parameters
		"""
		for model_class in model_classes:
			self.add_model(model_class)

Add multiple models to the list of models to evaluate.
Use add_model() to add custom parameters

# def all_models(self):

View Source

	def all_models(self):
		"""
			Evaluate on all models.

			Will run `add_models()` for `core.model.exec.Executor.ALL_MODELS`.
		"""
		self.add_models(Executor.ALL_MODELS)

Evaluate on all models.

Will run add_models() for core.model.exec.Executor.ALL_MODELS.

# def run(self, mail_each=False, mail_sample=False, mail_all=True):

View Source

	def run(self, mail_each=False, mail_sample=False, mail_all=True):
		"""
			Start the evaluation.

			Args:
				mail_each (bool): Send a mail after each evaluated model
				mail_sample (bool): Send a mail after each compeleted sample (each model done)
				mail_all (bool): Send a mail after all is done

		"""
		print_info()

		num_samples = self.parameters['samples']

		for i_samples, seed_sample in zip(range(num_samples), self.parameters['seeds']):
			print("\t", "Starting sample", i_samples + 1, "of", num_samples)

			Random.set_seed(seed_sample)

			for model, kwargs in self.models:
				

				if (model.is_gpu_optimized() and not const.RUNNING_ON_GPU) and not self.ignore_plattform:
					print("\t\t","Model", model.__name__, "is GPU optimized, but we are not on a GPU! -> Skipped")
				elif (const.RUNNING_ON_GPU and not model.is_gpu_optimized()) and not self.ignore_plattform:
					print("\t\t", "We are on a GPU, but model", model.__name__, "is not GPU optimized!-> Skipped")
				else:
					print("\t\t", "Running model", model.__name__)
					kwargs['ignore_cache'] = self.parameters['ignore_cache']
					kwargs['write_model'] = self.parameters['write_model']

					e = Executor(
						model,
						self.annotator_class,
						self.corpus,
						annotator_preprocessor=self.annotator_preprocessor,
						percentage_train=self.parameters['percentage_train']
					)
					r = e.exec(**kwargs)

					if mail_each:
						Mail.send_variable(r, subject=str(model.__name__) + " has finished")

			if mail_sample:
				Mail.send_variable({
					'models' : [ model.__name__ for model, _ in self.models ],
					'sample' : str(i_samples+1) + " of " + str(num_samples),
					'seed' : seed_sample
				}, subject="A sample has finished")

		if mail_all:
			Mail.send_variable({
					'models' : [ model.__name__ for model, _ in self.models ],
					'samples' : num_samples,
					'seeds' : self.parameters['seeds']
				}, subject="The Evaluation has finished")

Start the evaluation.

Args

mail_each (bool): Send a mail after each evaluated model
mail_sample (bool): Send a mail after each compeleted sample (each model done)
mail_all (bool): Send a mail after all is done