core.utils.functions

View Source
import re, json, os

import core.utils.const as const
from core.utils.external import NumpyEncoder

def clear_filename(s):
	'''
		Clear a string to use as filename
	'''
	return re.sub(r'[^0-9a-zA-Z_\-]', '', s.replace(' ', '_'))

def read_file(filename):
	'''
		Read file and return content as string
	'''
	content = ""
	with open(filename, 'r', errors='ignore') as f:
		for line in f:
			line = line.strip()
			if len(line) > 0:
				content += "\n" + line
	return content

def read_json_file(filename, errors=None, encoding=None):
	'''
		Read a file and return contained json-object 
	'''
	return json.load(open(filename, "r", errors=errors, encoding=encoding))

def write_file(filename, content):
	'''
		Write content (= string) to file
	'''
	f = open(filename, "w")
	f.write(content)
	f.close()

def write_json_file(filename, content):
	'''
		Write content to file, but encode via json (so dicts, lists may be dumped to file)
	'''
	write_file(filename, json.dumps(content, indent=2, cls=NumpyEncoder))

def check_and_create_folder(path):
	'''
		Make sure that the given path/folder (=string) exists (will create if not)
	'''
	if not os.path.isdir(path):
		os.mkdir(path)

def strip_all(cachename):
	'''
		Classes like Corpus, Annotator and AnnotatedCorpus add the used seed and other *meta data* to their cachename,
		this function strips this data from a cachename.

		``strip_all(foo.get_cachename())`` returns same name, but no values like seeds, splits, ...
		
		(see also `core.utils.random.Random.strip_seed`)
	'''
	return re.sub('_[^_]+_', '', cachename)

def print_info():
	"""
		Prints info about runtime (GPU or CPU and number of GPUs found).
	"""
	if const.RUNNING_ON_GPU:
		print("=>", "Found GPU to run on via PyTorch.")
		dev = os.environ.get('CUDA_VISIBLE_DEVICES', None)
		if dev == None:
			print("\t->", "Found no GPU selection, will use all available.")
		else:
			print("\t->", "Found", len(dev.split(',')), "GPU(s):", dev)
	else:
		print("=>", "No GPU found, running on CPU!")
#   def clear_filename(s):
View Source
def clear_filename(s):
	'''
		Clear a string to use as filename
	'''
	return re.sub(r'[^0-9a-zA-Z_\-]', '', s.replace(' ', '_'))

Clear a string to use as filename

#   def read_file(filename):
View Source
def read_file(filename):
	'''
		Read file and return content as string
	'''
	content = ""
	with open(filename, 'r', errors='ignore') as f:
		for line in f:
			line = line.strip()
			if len(line) > 0:
				content += "\n" + line
	return content

Read file and return content as string

#   def read_json_file(filename, errors=None, encoding=None):
View Source
def read_json_file(filename, errors=None, encoding=None):
	'''
		Read a file and return contained json-object 
	'''
	return json.load(open(filename, "r", errors=errors, encoding=encoding))

Read a file and return contained json-object

#   def write_file(filename, content):
View Source
def write_file(filename, content):
	'''
		Write content (= string) to file
	'''
	f = open(filename, "w")
	f.write(content)
	f.close()

Write content (= string) to file

#   def write_json_file(filename, content):
View Source
def write_json_file(filename, content):
	'''
		Write content to file, but encode via json (so dicts, lists may be dumped to file)
	'''
	write_file(filename, json.dumps(content, indent=2, cls=NumpyEncoder))

Write content to file, but encode via json (so dicts, lists may be dumped to file)

#   def check_and_create_folder(path):
View Source
def check_and_create_folder(path):
	'''
		Make sure that the given path/folder (=string) exists (will create if not)
	'''
	if not os.path.isdir(path):
		os.mkdir(path)

Make sure that the given path/folder (=string) exists (will create if not)

#   def strip_all(cachename):
View Source
def strip_all(cachename):
	'''
		Classes like Corpus, Annotator and AnnotatedCorpus add the used seed and other *meta data* to their cachename,
		this function strips this data from a cachename.

		``strip_all(foo.get_cachename())`` returns same name, but no values like seeds, splits, ...
		
		(see also `core.utils.random.Random.strip_seed`)
	'''
	return re.sub('_[^_]+_', '', cachename)

Classes like Corpus, Annotator and AnnotatedCorpus add the used seed and other meta data to their cachename, this function strips this data from a cachename.

strip_all(foo.get_cachename()) returns same name, but no values like seeds, splits, ...

(see also core.utils.random.Random.strip_seed)