Source code for wlcsim.input

""" This module "understands" the input format of wlcsim.exe """
from __future__ import print_function

import re
import os
from pathlib import Path
from enum import Enum

# def __init__(self, sim_dir):
#     input_dir = os.path.join(sim_dir, 'input')
#     input_file = os.path.join(input_dir, 'input')
#     if not os.path.isdir(sim_dir) \
#             or not os.path.isdir(input_dir) \
#             or not os.path.isfile(input_file):
#         raise IOError('Simulation directory ' + sim_dir + ' does not'
#                       ' contain input/input')

[docs]class InputFormat(Enum): ORIGINAL=1 LENA=2 DEFINES=3
renamer = {'COL_TYPE': 'COLLISIONDETECTIONTYPE', 'COLTYPE': 'COLLISIONDETECTIONTYPE', 'FPT_DIST': 'COLLISIONRADIUS', 'INTON': 'INTERPBEADLENNARDJONES', 'N': 'NB', 'INDMAX': 'NUMSAVEPOINTS'}
[docs]def correct_param_name(name): """Takes messy param names from different generations of the simulator and converts them to their newest forms so that simulations from across different years can be tabulated together.""" name = name.upper() if name not in renamer: return name else: return renamer[name]
def brown_to_codename(name, value): new_name = 'CODENAME' if int(value) == 1: return (new_name, 'bruno') else: return (new_name, 'brad') # for each old param name that needs to go through correct_param_value, the # function that will convert its value. recall all "values" are strings at this # point revalueer = {'BROWN': brown_to_codename}
[docs]def correct_param_value(name, value): """Some old param names also have new types. This takes messy param names from different generations of the simulator and converts their names and values to their newest forms so that simulations from across different years can be tabulated together.""" if name in revalueer: return revalueer[name](name, value) else: return (name, value)
[docs]class ParsedInput(object): """Knows how to handle various input file types used by wlcsim simulator over the years, and transparently converts into new parameter naming conventions. input = ParsedInput(file_name) print(input.ordered_param_names) # see params in order defined print(input.ordered_param_values) # to see values input.write(outfile_name) # write clone of input file """ def __init__(self, input_file=None, params=None): """Can be constructed from an input file or from params directly. If an input file is provided, params are just ignored if passed in.""" self.params = {} self.ordered_param_names = [] self.file_format = InputFormat.DEFINES self.input_file = Path(input_file) if input_file is not None: # if we get the sim dir or the input dir, resolve to the actual input file if not os.path.isfile(input_file) and os.path.isdir(input_file): input_file = os.path.join(input_file, 'input') if not os.path.isfile(input_file) and os.path.isdir(input_file): input_file = os.path.join(input_file, 'input') self.decide_input_format() self.parse_params_file() elif params is not None: for name, value in params.items(): self.append_param(name, value) else: Warning('ParsedInput: no params or input file provided!') def __repr__(self): return str(self.params) @property def ordered_param_values(self): return [self.params[name] for name in self.ordered_param_names] def append_param(self, name, value): name = correct_param_name(name) name, value = correct_param_value(name, value) self.ordered_param_names.append(name) self.params.update({name: value})
[docs] def write(self, output_file_name): """ writes out a valid input file for wlcsim.exe given parameters in the format returned by read_file """ with open(output_file_name, 'w') as f: if self.file_format == InputFormat.ORIGINAL: # write the three garbage lines f.write('!\n!\n\n') for i,name in enumerate(self.ordered_param_names): f.write('!-Record ' + str(i) + '\n! ' + name + '\n') f.write(str(self.params[name]) + '\n\n') elif self.file_format == InputFormat.LENA: for name in self.ordered_param_names: f.write(str(name) + ' ' + str(self.params[name]) + '\n') elif self.file_format == InputFormat.DEFINES: for name in self.ordered_param_names: f.write('#define WLC_P__' + str(name) + ' ' + str(self.params[name]) + '\n') else: raise ValueError('wlcsim.input: attempt to print a ParsedInput' ' with unknown file_format.')
[docs] def decide_input_format(self): """Decide between the two input formats we know of. Not too hard, since one uses Fortran-style comments, which we can look out for, and the other uses bash style comments. Further, the former specifies param names and values on separate lines, while the latter specifies them on the same line.""" # first see if the file has the expected name for the defines file if self.input_file.name == 'defines.inc': self.input_format = InputFormat.DEFINES return # then see if there are any comment lines. if so, we immediately know # the file type with open(self.input_file) as f: for line in f: if not line: continue elif re.match('\s*!', line): self.input_format = InputFormat.ORIGINAL return elif line[0] == '#': self.input_format = InputFormat.LENA return else: continue # if there are no comments, then for now, it must in fact be Lena's # intput file type, otherwise, we would not be able to infer the param # names, since these are in comment lines in the original-type input # files self.input_format = InputFormat.LENA return
[docs] def parse_params_file(self): """Parse and populate ParsedInput's params, ordered_param_names This parser currently understands three file formats: 1) "ORIGINAL" is the input method understood by Andy's hardcoded input reader in the original code used the Spakowitz lab was founded. 2) "LENA" is a spec using slightly more general input reader written by Elena Koslover while Andy's student. 3) "DEFINES" is the format of the src/defines.inc file. """ if self.input_format == InputFormat.ORIGINAL: self.parse_original_params_file() elif self.input_format == InputFormat.LENA: self.parse_lena_params_file() elif self.input_format == InputFormat.DEFINES: self.parse_defines_params_file()
[docs] def parse_lena_params_file(self): """Lena-style input files have comment lines starting with a "#". Any other non-blank lines must be of the form "[identifier][whitespace][value]", where an identifier is of the form "[_a-zA-Z][_a-zA-Z0-9]*", and a value can be a boolean, float, int or string. They will always be stored as strings in the params dictionary for downstream parsing as needed. Identifiers, like fortran variables, are interpreted in a case-insensitive manner by the wlcsim program, and so will be store in all-caps within the ParsedInput to signify this.""" name_value_re = re.compile('([_A-Za-z][_A-Za-z0-9]*)\s*(.*)\s*') with open(self.input_file) as f: for line in f: if not line or line[0] == '#': continue match = name_value_re.match(line) if match is None: continue name, value = match.groups() self.append_param(name, value)
[docs] def parse_defines_params_file(self): """Parse file in the format of src/defines.inc. Each line begins with #define WLC_P__[PARAM_NAME] [_a-zA-Z0-9] where WLC_P__[A-Z]* is the parameter name and the piece after the space is the value of the parameter. TODO:test """ name_value_re = re.compile('#define WLC_P__([_A-Z]*) ([_a-zA-Z0-9]*)') with open(self.input_file) as f: for line in f: if not line: continue match = name_value_re.match(line) if match is None: continue name, value = match.groups() self.append_param(name, value)
[docs] def parse_original_params_file(self): """Original-style input files have three garbage lines at the top used for commenting then triplets of lines describing one variable each. The first line of the triplet is a counter, for ease of hardcoding input reader, the second line contains the variable name (which is not used by the input reader) in order to make parsing possible outside of the ahrdcoded wlcsim input reader, and the third line contains the value of the parameter itself. The first two lines of each triplet have a fixed form that we use to extract the record numbers and parameter names, but these forms are not used by wlcsim itself, which ignores these lnies completely. Thus, it is possible that the user specified a particular name for a parameter but that name does not match what wlcsim interpreted it as, since wlcsim simply uses the order of the parameters in this file to determine their identities.""" record_re = re.compile('!-Record (\d+)') name_re = re.compile('! *([_A-Za-z0-9]+)') contains_period_re = re.compile('\.') next_line_is_val = False with open(self.input_file) as f: # first three lines are garbage for i in range(3): f.readline() for line in f: if next_line_is_val: if contains_period_re.search(line): value = float(line.strip()) else: value = int(line.strip()) self.append_param(name, value) name = None record_number = None value = None next_line_is_val = False record_match = record_re.search(line) if record_match: record_number = int(record_match.groups()[0]) continue name_match = name_re.search(line) if name_match: name = name_match.groups()[0] name = name.upper() next_line_is_val = True continue