core.experiments.other_corpus

Default on other corpus

This is an entrypoint file runnable via ./experiments/other_corpus.py

File to run the default experiments but in another corpus.

=> But it may be also interesting to take a look at this source →

View Source

#!/usr/bin/env python3
'''
	# Default on other corpus

	> This is an *entrypoint file* runnable via ``./experiments/other_corpus.py``

	File to run the default experiments but in another corpus.  
	
	=> But it may be also interesting to take a look at this source &rarr;
'''
from core.evaluation import Results, Evaluation

from core.corpus import Wiktionary, ManuscriptCultures

from core.model.transformer import IsSCDBert, IsNextSCDBert, SelectSCDBert, GivenTextFindSCDBert
from core.model.scdmatrix import iSCDMatrix, MPSCDMatrix

if __name__ == "__main__":
	e = Evaluation(samples=5, seeds=[123, 456])
	
	e.set_corpus(ManuscriptCultures()) 
	e.set_annotator(Wiktionary) 

	# default models 
	e.add_models([iSCDMatrix, IsSCDBert, IsNextSCDBert, SelectSCDBert, GivenTextFindSCDBert])

	# works better when not using d2v
	e.add_model(MPSCDMatrix, scd_mapping='ia')

	# with splitted annotator worse performance (task too difficult?)
	e.add_model(MPSCDMatrix, split_annotator=False)
	e.add_model(GivenTextFindSCDBert, split_annotator=False)

	e.run(mail_sample=True, mail_all=False)

	r = Results()
	r.write_json()
	r.write_csv()