core.utils.const
Directories and other parameters are globally defined here, according to work with the docker-compose setup!
See source for values!
View Source
""" Directories and other parameters are globally defined here, according to work with the docker-compose setup! See source for values! """ import os import torch ######## # directories and paths ######## # used by nltk to store data for stemming etc. NLTK_DATA = os.environ.get('NLTK_DATA') # defined in Dockerfile # corpora and caches DATASET_DIR = '/home/user/data/' CORPUS_CACHEDIR = '/home/user/data/cache/' TRANSFORMERS_CACHE = os.environ.get('TRANSFORMERS_CACHE') # defined in Dockerfile TOKENIZER_CACHEDIR = '/home/user/models/tokenizer_cache/' # results and reports RESULTS_DIR = '/home/user/results/' TRAINING_LOGFILES = os.path.join(RESULTS_DIR, "logs") FINETUNED_MODELDIR = '/home/user/models/finetuned/' MATRIX_MODELDIR = '/home/user/models/matrix/' ######## # data sets ######## # data sets to download by downloader DATA_SETS = [ #'arxiv', 'twentynews', 'wiktionary', 'quotes', 'manuscriptcultures' ] # pretrained models to download by downloader BERT_MODELS = [ 'bert-base-uncased' ] # default model BERT uses BERT_MODEL_DEFAULT = BERT_MODELS[0] # disable that transformers checks for updates of the cached pretrained models # before each run # core.download.init_transformers() will always check for newer models TRANSFORMERS_NO_NETWORK = True ######## # global parameters ######## # do we have a GPU? RUNNING_ON_GPU = torch.cuda.is_available() # size of batches (for single row of input_ids) if RUNNING_ON_GPU: # using a 40 GB NVIDA A 100 TRAIN_BATCH_SIZE = 40 EVAL_BATCH_SIZE = 40 else: # using a machine with 16 GB RAM TRAIN_BATCH_SIZE = 4 EVAL_BATCH_SIZE = 4