core.experiments.context_sensitive

Experiments of context-sensitive model

This is an entrypoint file runnable via ./experiments/context_sensitive.py

File to run the experiments for the context-sensitive model.

=> But it may be also interesting to take a look at this source →

View Source
#!/usr/bin/env python3
'''
	# Experiments of context-sensitive model

	> This is an *entrypoint file* runnable via ``./experiments/context_sensitive.py``

	File to run the experiments for the context-sensitive model.  
	
	=> But it may be also interesting to take a look at this source →
'''
from core.evaluation import Results, Evaluation

from core.corpus import TwentyNews, Wiktionary, Quotes, ManuscriptCultures

from core.model.transformer import IsSCDBert, IsNextSCDBert, SelectSCDBert, GivenTextFindSCDBert
from core.model.scdmatrix import iSCDMatrix, MPSCDMatrix

if __name__ == "__main__":
	e = Evaluation(samples=5, seeds=[123, 456])

	groups_c1 = [
		'comp-graphics',
		'comp-os-ms-windows-misc',
		'comp-sys-ibm-pc-hardware',
		'comp-sys-mac-hardware',
		'comp-windows-x',
		'sci-crypt',
		'sci-electronics',
		'sci-med',
		'sci-space'
	]
	
	e.set_corpus([TwentyNews(subgroups=groups_c1), ManuscriptCultures()]) 
	e.set_annotator([Wiktionary, Quotes]) 

	# default models 
	e.add_models([iSCDMatrix, IsSCDBert, IsNextSCDBert, SelectSCDBert, GivenTextFindSCDBert])

	# works better when not using d2v
	e.add_model(MPSCDMatrix, scd_mapping='ia')

	# with splitted annotator worse performance (task too difficult?)
	e.add_model(MPSCDMatrix, split_annotator=False)
	e.add_model(GivenTextFindSCDBert, split_annotator=False)

	e.run(mail_sample=True, mail_all=False)

	r = Results()
	r.write_json()
	r.write_csv()