core.experiments.preprocess_once

Experiments with minimal preprocessing

This is an entrypoint file runnable via ./experiments/preprocess_once.py

File to run the default experiments but with minimal preprocessing only.

=> But it may be also interesting to take a look at this source →

View Source
#!/usr/bin/env python3
'''
	# Experiments with minimal preprocessing

	> This is an *entrypoint file* runnable via ``./experiments/preprocess_once.py``

	File to run the default experiments but with minimal preprocessing only.  
	
	=> But it may be also interesting to take a look at this source →
'''
from core.evaluation import Results, Evaluation

from core.corpus import TwentyNews, Wiktionary, MinimalPreprocessor

from core.model.transformer import IsSCDBert, IsNextSCDBert, SelectSCDBert, GivenTextFindSCDBert
from core.model.scdmatrix import iSCDMatrix, MPSCDMatrix

if __name__ == "__main__":
	e = Evaluation(samples=2, seeds=[123, 456])
	
	e.set_corpus(TwentyNews(preprocessor=MinimalPreprocessor())) 
	e.set_annotator(Wiktionary, annotator_preprocessor=MinimalPreprocessor()) 

	# default models 
	e.add_models([iSCDMatrix, IsSCDBert, IsNextSCDBert, SelectSCDBert, GivenTextFindSCDBert])

	# works better when not using d2v
	e.add_model(MPSCDMatrix, scd_mapping='ia')

	# with splitted annotator worse performance (task too difficult?)
	e.add_model(MPSCDMatrix, split_annotator=False)
	e.add_model(GivenTextFindSCDBert, split_annotator=False)

	e.run(mail_sample=True, mail_all=False)

	r = Results()
	r.write_json()
	r.write_csv()