core.evaluation.evaluate
Evaluation
This modul runs the evaluations, its a wrapper around
core.model.exec.Executor
.
View Source
""" # Evaluation This modul runs the evaluations, its a wrapper around `core.model.exec.Executor`. """ import secrets from core.model.exec import Executor from core.utils import Mail, print_info, Random, const class Evaluation(): """ Run one or multiple evaluations, on one or more models. Examples: <pre> e = Evaluation(samples=4) e.set_corpus(TwentyNews(subgroups=['misc-forsale'])) e.set_annotator(Wiktionary) e.add_model(iSCDMatrix, num_scds_train=2) # OR e.add_models([iSCDMatrix, IsSCDBert]) # OR e.all_models() e.run() </pre> """ def __init__(self, ignore_plattform=False, samples=2, seeds=[], ignore_cache=False, write_model=True, percentage_train=0.8): """ Args: ignore_plattform (bool): The class detects if running on GPU or CPU, and will only run GPU models on GPU and CPU models on CPU if this is set to ``True`` **kwargs: See `set_parameter()` """ self.ignore_plattform = ignore_plattform self.parameters = { 'samples' : samples, 'seeds' : seeds, 'ignore_cache' : ignore_cache, 'write_model' : write_model, 'percentage_train' : percentage_train } self._init_parameters() self.models = [] def set_parameters(self, samples=None, seeds=None, ignore_cache=None, write_model=None, percentage_train=None): """ Set parameters about the evaluations to run. Setting a kwarg to `None` will let it unchanged. Args: samples (int): The number of times to run each evaluation seeds (list of int, optional): The seeds to use for each evaluation (len == samples!) If list is smaller, generate random seeds ignore_cache (bool): Will be passed to each model, see `core.model.base.Model` write_model (bool): Will be passed to each model, see `core.model.base.Model` percentage_train (float): The percantage of the corpus to use for training (rest will be used for evaluation) """ if samples != None: self.parameters['samples'] = samples if seeds != None: self.parameters['seeds'] = seeds if ignore_cache != None: self.parameters['ignore_cache'] = ignore_cache if write_model != None: self.parameters['write_model'] = write_model if percentage_train != None: self.parameters['percentage_train'] = percentage_train self._init_parameters() def _init_parameters(self): if not isinstance(self.parameters['samples'], int) or self.parameters['samples'] < 1: raise AttributeError("The samples parameter needs a positive integer!") if self.parameters['seeds'] == None: self.parameters['seeds'] = [ secrets.randbelow(2**32) for _ in range(self.parameters['samples']) ] print("Created ranodom seeds!") if not isinstance(self.parameters['seeds'], list): raise AttributeError("The samples parameter needs to be a list of seeds!") if len(self.parameters['seeds']) < self.parameters['samples']: self.parameters['seeds'].extend([ secrets.randbelow(2**32) for _ in range(self.parameters['samples'] - len(self.parameters['seeds'])) ]) print("Added random seeds to match number of samples!") elif len(self.parameters['seeds']) > self.parameters['samples']: self.parameters['seeds'] = self.parameters['seeds'][0:self.parameters['samples']] if not isinstance(self.parameters['ignore_cache'], bool) or not isinstance(self.parameters['write_model'], bool) : raise AttributeError("The write_model and ignore_cache parameters needs a be boolean!") if not isinstance(self.parameters['percentage_train'], float) or self.parameters['percentage_train'] < 0.01 or self.parameters['percentage_train'] > 0.99: raise AttributeError("The percentage_train needs to be a floats in [0.01, 0.99]!") def set_corpus(self, corpus): """ Set the corpus to use Args: coprus (`core.corpus.corpus.Corpus` or list of `core.corpus.corpus.Corpus`): The corpus to evaluate and train on (as **initiated object**) """ self.corpus = corpus def set_annotator(self, annotator_class, annotator_preprocessor=None): """ Set the annotator to use Args: annotator_class (`core.corpus.annotator.Annotator` or list of `core.corpus.annotator.Annotator`): The annotator to use (as class, not an **initiated object!**) annotator_preprocessor (`core.corpus.preprocess.Preprocessor`): The preprocessor to use for annotator, if `None` uses `core.corpus.preprocess.DefaultPreprocessor` (no list allowed, will use same for all corpora!) """ self.annotator_class = annotator_class self.annotator_preprocessor = annotator_preprocessor def add_model(self, model_class, **kwargs): """ Add a model to the list of models to evaluate. Args: model_class (`core.model.base.Model`): The model to use **kwargs: Parameters to pass to the model """ self.models.append((model_class, kwargs)) def add_models(self, model_classes): """ Add multiple models to the list of models to evaluate. Use `add_model()` to add custom parameters """ for model_class in model_classes: self.add_model(model_class) def all_models(self): """ Evaluate on all models. Will run `add_models()` for `core.model.exec.Executor.ALL_MODELS`. """ self.add_models(Executor.ALL_MODELS) def run(self, mail_each=False, mail_sample=False, mail_all=True): """ Start the evaluation. Args: mail_each (bool): Send a mail after each evaluated model mail_sample (bool): Send a mail after each compeleted sample (each model done) mail_all (bool): Send a mail after all is done """ print_info() num_samples = self.parameters['samples'] for i_samples, seed_sample in zip(range(num_samples), self.parameters['seeds']): print("\t", "Starting sample", i_samples + 1, "of", num_samples) Random.set_seed(seed_sample) for model, kwargs in self.models: if (model.is_gpu_optimized() and not const.RUNNING_ON_GPU) and not self.ignore_plattform: print("\t\t","Model", model.__name__, "is GPU optimized, but we are not on a GPU! -> Skipped") elif (const.RUNNING_ON_GPU and not model.is_gpu_optimized()) and not self.ignore_plattform: print("\t\t", "We are on a GPU, but model", model.__name__, "is not GPU optimized!-> Skipped") else: print("\t\t", "Running model", model.__name__) kwargs['ignore_cache'] = self.parameters['ignore_cache'] kwargs['write_model'] = self.parameters['write_model'] e = Executor( model, self.annotator_class, self.corpus, annotator_preprocessor=self.annotator_preprocessor, percentage_train=self.parameters['percentage_train'] ) r = e.exec(**kwargs) if mail_each: Mail.send_variable(r, subject=str(model.__name__) + " has finished") if mail_sample: Mail.send_variable({ 'models' : [ model.__name__ for model, _ in self.models ], 'sample' : str(i_samples+1) + " of " + str(num_samples), 'seed' : seed_sample }, subject="A sample has finished") if mail_all: Mail.send_variable({ 'models' : [ model.__name__ for model, _ in self.models ], 'samples' : num_samples, 'seeds' : self.parameters['seeds'] }, subject="The Evaluation has finished")
View Source
class Evaluation(): """ Run one or multiple evaluations, on one or more models. Examples: <pre> e = Evaluation(samples=4) e.set_corpus(TwentyNews(subgroups=['misc-forsale'])) e.set_annotator(Wiktionary) e.add_model(iSCDMatrix, num_scds_train=2) # OR e.add_models([iSCDMatrix, IsSCDBert]) # OR e.all_models() e.run() </pre> """ def __init__(self, ignore_plattform=False, samples=2, seeds=[], ignore_cache=False, write_model=True, percentage_train=0.8): """ Args: ignore_plattform (bool): The class detects if running on GPU or CPU, and will only run GPU models on GPU and CPU models on CPU if this is set to ``True`` **kwargs: See `set_parameter()` """ self.ignore_plattform = ignore_plattform self.parameters = { 'samples' : samples, 'seeds' : seeds, 'ignore_cache' : ignore_cache, 'write_model' : write_model, 'percentage_train' : percentage_train } self._init_parameters() self.models = [] def set_parameters(self, samples=None, seeds=None, ignore_cache=None, write_model=None, percentage_train=None): """ Set parameters about the evaluations to run. Setting a kwarg to `None` will let it unchanged. Args: samples (int): The number of times to run each evaluation seeds (list of int, optional): The seeds to use for each evaluation (len == samples!) If list is smaller, generate random seeds ignore_cache (bool): Will be passed to each model, see `core.model.base.Model` write_model (bool): Will be passed to each model, see `core.model.base.Model` percentage_train (float): The percantage of the corpus to use for training (rest will be used for evaluation) """ if samples != None: self.parameters['samples'] = samples if seeds != None: self.parameters['seeds'] = seeds if ignore_cache != None: self.parameters['ignore_cache'] = ignore_cache if write_model != None: self.parameters['write_model'] = write_model if percentage_train != None: self.parameters['percentage_train'] = percentage_train self._init_parameters() def _init_parameters(self): if not isinstance(self.parameters['samples'], int) or self.parameters['samples'] < 1: raise AttributeError("The samples parameter needs a positive integer!") if self.parameters['seeds'] == None: self.parameters['seeds'] = [ secrets.randbelow(2**32) for _ in range(self.parameters['samples']) ] print("Created ranodom seeds!") if not isinstance(self.parameters['seeds'], list): raise AttributeError("The samples parameter needs to be a list of seeds!") if len(self.parameters['seeds']) < self.parameters['samples']: self.parameters['seeds'].extend([ secrets.randbelow(2**32) for _ in range(self.parameters['samples'] - len(self.parameters['seeds'])) ]) print("Added random seeds to match number of samples!") elif len(self.parameters['seeds']) > self.parameters['samples']: self.parameters['seeds'] = self.parameters['seeds'][0:self.parameters['samples']] if not isinstance(self.parameters['ignore_cache'], bool) or not isinstance(self.parameters['write_model'], bool) : raise AttributeError("The write_model and ignore_cache parameters needs a be boolean!") if not isinstance(self.parameters['percentage_train'], float) or self.parameters['percentage_train'] < 0.01 or self.parameters['percentage_train'] > 0.99: raise AttributeError("The percentage_train needs to be a floats in [0.01, 0.99]!") def set_corpus(self, corpus): """ Set the corpus to use Args: coprus (`core.corpus.corpus.Corpus` or list of `core.corpus.corpus.Corpus`): The corpus to evaluate and train on (as **initiated object**) """ self.corpus = corpus def set_annotator(self, annotator_class, annotator_preprocessor=None): """ Set the annotator to use Args: annotator_class (`core.corpus.annotator.Annotator` or list of `core.corpus.annotator.Annotator`): The annotator to use (as class, not an **initiated object!**) annotator_preprocessor (`core.corpus.preprocess.Preprocessor`): The preprocessor to use for annotator, if `None` uses `core.corpus.preprocess.DefaultPreprocessor` (no list allowed, will use same for all corpora!) """ self.annotator_class = annotator_class self.annotator_preprocessor = annotator_preprocessor def add_model(self, model_class, **kwargs): """ Add a model to the list of models to evaluate. Args: model_class (`core.model.base.Model`): The model to use **kwargs: Parameters to pass to the model """ self.models.append((model_class, kwargs)) def add_models(self, model_classes): """ Add multiple models to the list of models to evaluate. Use `add_model()` to add custom parameters """ for model_class in model_classes: self.add_model(model_class) def all_models(self): """ Evaluate on all models. Will run `add_models()` for `core.model.exec.Executor.ALL_MODELS`. """ self.add_models(Executor.ALL_MODELS) def run(self, mail_each=False, mail_sample=False, mail_all=True): """ Start the evaluation. Args: mail_each (bool): Send a mail after each evaluated model mail_sample (bool): Send a mail after each compeleted sample (each model done) mail_all (bool): Send a mail after all is done """ print_info() num_samples = self.parameters['samples'] for i_samples, seed_sample in zip(range(num_samples), self.parameters['seeds']): print("\t", "Starting sample", i_samples + 1, "of", num_samples) Random.set_seed(seed_sample) for model, kwargs in self.models: if (model.is_gpu_optimized() and not const.RUNNING_ON_GPU) and not self.ignore_plattform: print("\t\t","Model", model.__name__, "is GPU optimized, but we are not on a GPU! -> Skipped") elif (const.RUNNING_ON_GPU and not model.is_gpu_optimized()) and not self.ignore_plattform: print("\t\t", "We are on a GPU, but model", model.__name__, "is not GPU optimized!-> Skipped") else: print("\t\t", "Running model", model.__name__) kwargs['ignore_cache'] = self.parameters['ignore_cache'] kwargs['write_model'] = self.parameters['write_model'] e = Executor( model, self.annotator_class, self.corpus, annotator_preprocessor=self.annotator_preprocessor, percentage_train=self.parameters['percentage_train'] ) r = e.exec(**kwargs) if mail_each: Mail.send_variable(r, subject=str(model.__name__) + " has finished") if mail_sample: Mail.send_variable({ 'models' : [ model.__name__ for model, _ in self.models ], 'sample' : str(i_samples+1) + " of " + str(num_samples), 'seed' : seed_sample }, subject="A sample has finished") if mail_all: Mail.send_variable({ 'models' : [ model.__name__ for model, _ in self.models ], 'samples' : num_samples, 'seeds' : self.parameters['seeds'] }, subject="The Evaluation has finished")
Run one or multiple evaluations, on one or more models.
Examples
e = Evaluation(samples=4) e.set_corpus(TwentyNews(subgroups=['misc-forsale'])) e.set_annotator(Wiktionary) e.add_model(iSCDMatrix, num_scds_train=2) # OR e.add_models([iSCDMatrix, IsSCDBert]) # OR e.all_models() e.run()
#  
Evaluation(
ignore_plattform=False,
samples=2,
seeds=[],
ignore_cache=False,
write_model=True,
percentage_train=0.8
)
View Source
def __init__(self, ignore_plattform=False, samples=2, seeds=[], ignore_cache=False, write_model=True, percentage_train=0.8): """ Args: ignore_plattform (bool): The class detects if running on GPU or CPU, and will only run GPU models on GPU and CPU models on CPU if this is set to ``True`` **kwargs: See `set_parameter()` """ self.ignore_plattform = ignore_plattform self.parameters = { 'samples' : samples, 'seeds' : seeds, 'ignore_cache' : ignore_cache, 'write_model' : write_model, 'percentage_train' : percentage_train } self._init_parameters() self.models = []
Args
- ignore_plattform (bool): The class detects if running on GPU or CPU, and will only run GPU models on GPU and CPU models on CPU if this is set to
True
- **kwargs: See
set_parameter()
#  
def
set_parameters(
self,
samples=None,
seeds=None,
ignore_cache=None,
write_model=None,
percentage_train=None
):
View Source
def set_parameters(self, samples=None, seeds=None, ignore_cache=None, write_model=None, percentage_train=None): """ Set parameters about the evaluations to run. Setting a kwarg to `None` will let it unchanged. Args: samples (int): The number of times to run each evaluation seeds (list of int, optional): The seeds to use for each evaluation (len == samples!) If list is smaller, generate random seeds ignore_cache (bool): Will be passed to each model, see `core.model.base.Model` write_model (bool): Will be passed to each model, see `core.model.base.Model` percentage_train (float): The percantage of the corpus to use for training (rest will be used for evaluation) """ if samples != None: self.parameters['samples'] = samples if seeds != None: self.parameters['seeds'] = seeds if ignore_cache != None: self.parameters['ignore_cache'] = ignore_cache if write_model != None: self.parameters['write_model'] = write_model if percentage_train != None: self.parameters['percentage_train'] = percentage_train self._init_parameters()
Set parameters about the evaluations to run.
Setting a kwarg to None
will let it unchanged.
Args
- samples (int): The number of times to run each evaluation
- seeds (list of int, optional): The seeds to use for each evaluation (len == samples!) If list is smaller, generate random seeds
- ignore_cache (bool): Will be passed to each model, see
core.model.base.Model
- write_model (bool): Will be passed to each model, see
core.model.base.Model
- percentage_train (float): The percantage of the corpus to use for training (rest will be used for evaluation)
View Source
def set_corpus(self, corpus): """ Set the corpus to use Args: coprus (`core.corpus.corpus.Corpus` or list of `core.corpus.corpus.Corpus`): The corpus to evaluate and train on (as **initiated object**) """ self.corpus = corpus
Set the corpus to use
Args
- coprus (
core.corpus.corpus.Corpus
or list ofcore.corpus.corpus.Corpus
): The corpus to evaluate and train on (as initiated object)
View Source
def set_annotator(self, annotator_class, annotator_preprocessor=None): """ Set the annotator to use Args: annotator_class (`core.corpus.annotator.Annotator` or list of `core.corpus.annotator.Annotator`): The annotator to use (as class, not an **initiated object!**) annotator_preprocessor (`core.corpus.preprocess.Preprocessor`): The preprocessor to use for annotator, if `None` uses `core.corpus.preprocess.DefaultPreprocessor` (no list allowed, will use same for all corpora!) """ self.annotator_class = annotator_class self.annotator_preprocessor = annotator_preprocessor
Set the annotator to use
Args
- annotator_class (
core.corpus.annotator.Annotator
or list ofcore.corpus.annotator.Annotator
): The annotator to use (as class, not an initiated object!) - annotator_preprocessor (
core.corpus.preprocess.Preprocessor
): The preprocessor to use for annotator, ifNone
usescore.corpus.preprocess.DefaultPreprocessor
(no list allowed, will use same for all corpora!)
View Source
def add_model(self, model_class, **kwargs): """ Add a model to the list of models to evaluate. Args: model_class (`core.model.base.Model`): The model to use **kwargs: Parameters to pass to the model """ self.models.append((model_class, kwargs))
Add a model to the list of models to evaluate.
Args
- model_class (
core.model.base.Model
): The model to use - **kwargs: Parameters to pass to the model
View Source
def add_models(self, model_classes): """ Add multiple models to the list of models to evaluate. Use `add_model()` to add custom parameters """ for model_class in model_classes: self.add_model(model_class)
Add multiple models to the list of models to evaluate.
Use add_model()
to add custom parameters
View Source
def all_models(self): """ Evaluate on all models. Will run `add_models()` for `core.model.exec.Executor.ALL_MODELS`. """ self.add_models(Executor.ALL_MODELS)
Evaluate on all models.
Will run add_models()
for core.model.exec.Executor.ALL_MODELS
.
View Source
def run(self, mail_each=False, mail_sample=False, mail_all=True): """ Start the evaluation. Args: mail_each (bool): Send a mail after each evaluated model mail_sample (bool): Send a mail after each compeleted sample (each model done) mail_all (bool): Send a mail after all is done """ print_info() num_samples = self.parameters['samples'] for i_samples, seed_sample in zip(range(num_samples), self.parameters['seeds']): print("\t", "Starting sample", i_samples + 1, "of", num_samples) Random.set_seed(seed_sample) for model, kwargs in self.models: if (model.is_gpu_optimized() and not const.RUNNING_ON_GPU) and not self.ignore_plattform: print("\t\t","Model", model.__name__, "is GPU optimized, but we are not on a GPU! -> Skipped") elif (const.RUNNING_ON_GPU and not model.is_gpu_optimized()) and not self.ignore_plattform: print("\t\t", "We are on a GPU, but model", model.__name__, "is not GPU optimized!-> Skipped") else: print("\t\t", "Running model", model.__name__) kwargs['ignore_cache'] = self.parameters['ignore_cache'] kwargs['write_model'] = self.parameters['write_model'] e = Executor( model, self.annotator_class, self.corpus, annotator_preprocessor=self.annotator_preprocessor, percentage_train=self.parameters['percentage_train'] ) r = e.exec(**kwargs) if mail_each: Mail.send_variable(r, subject=str(model.__name__) + " has finished") if mail_sample: Mail.send_variable({ 'models' : [ model.__name__ for model, _ in self.models ], 'sample' : str(i_samples+1) + " of " + str(num_samples), 'seed' : seed_sample }, subject="A sample has finished") if mail_all: Mail.send_variable({ 'models' : [ model.__name__ for model, _ in self.models ], 'samples' : num_samples, 'seeds' : self.parameters['seeds'] }, subject="The Evaluation has finished")
Start the evaluation.
Args
- mail_each (bool): Send a mail after each evaluated model
- mail_sample (bool): Send a mail after each compeleted sample (each model done)
- mail_all (bool): Send a mail after all is done