core.experiments.preprocess_once
Experiments with minimal preprocessing
This is an entrypoint file runnable via
./experiments/preprocess_once.py
File to run the default experiments but with minimal preprocessing only.
=> But it may be also interesting to take a look at this source →
View Source
#!/usr/bin/env python3 ''' # Experiments with minimal preprocessing > This is an *entrypoint file* runnable via ``./experiments/preprocess_once.py`` File to run the default experiments but with minimal preprocessing only. => But it may be also interesting to take a look at this source → ''' from core.evaluation import Results, Evaluation from core.corpus import TwentyNews, Wiktionary, MinimalPreprocessor from core.model.transformer import IsSCDBert, IsNextSCDBert, SelectSCDBert, GivenTextFindSCDBert from core.model.scdmatrix import iSCDMatrix, MPSCDMatrix if __name__ == "__main__": e = Evaluation(samples=2, seeds=[123, 456]) e.set_corpus(TwentyNews(preprocessor=MinimalPreprocessor())) e.set_annotator(Wiktionary, annotator_preprocessor=MinimalPreprocessor()) # default models e.add_models([iSCDMatrix, IsSCDBert, IsNextSCDBert, SelectSCDBert, GivenTextFindSCDBert]) # works better when not using d2v e.add_model(MPSCDMatrix, scd_mapping='ia') # with splitted annotator worse performance (task too difficult?) e.add_model(MPSCDMatrix, split_annotator=False) e.add_model(GivenTextFindSCDBert, split_annotator=False) e.run(mail_sample=True, mail_all=False) r = Results() r.write_json() r.write_csv()