diff --git a/weio/fast_output_file.py b/weio/fast_output_file.py index b5e7a53..1b81547 100644 --- a/weio/fast_output_file.py +++ b/weio/fast_output_file.py @@ -81,7 +81,10 @@ def formatName(): def __init__(self, filename=None, **kwargs): """ Class constructor. If a `filename` is given, the file is read. """ - self.filename = filename + # Data + self.filename = filename + self.data = None # pandas.DataFrame + self.description = '' # string if filename: self.read(**kwargs) @@ -97,10 +100,8 @@ def read(self, filename=None, **kwargs): raise OSError(2,'File not found:',self.filename) if os.stat(self.filename).st_size == 0: raise EmptyFileError('File is empty:',self.filename) - # --- Calling (children) function to read - self._read(**kwargs) - def _read(self): + # --- Actual reading def readline(iLine): with open(self.filename) as f: for i, line in enumerate(f): @@ -110,26 +111,26 @@ def readline(iLine): break ext = os.path.splitext(self.filename.lower())[1] - self.info={} + info={} self['binary']=False try: if ext in ['.out','.elev','.dbg','.dbg2']: - self.data, self.info = load_ascii_output(self.filename) + self.data, info = load_ascii_output(self.filename) elif ext=='.outb': - self.data, self.info = load_binary_output(self.filename) + self.data, info = load_binary_output(self.filename) self['binary']=True elif ext=='.elm': F=CSVFile(filename=self.filename, sep=' ', commentLines=[0,2],colNamesLine=1) self.data = F.data del F - self.info['attribute_units']=readline(3).replace('sec','s').split() - self.info['attribute_names']=self.data.columns.values + info['attribute_units']=readline(3).replace('sec','s').split() + info['attribute_names']=self.data.columns.values else: if isBinary(self.filename): - self.data, self.info = load_binary_output(self.filename) + self.data, info = load_binary_output(self.filename) self['binary']=True else: - self.data, self.info = load_ascii_output(self.filename) + self.data, info = load_ascii_output(self.filename) self['binary']=False except MemoryError as e: raise BrokenReaderError('FAST Out File {}: Memory error encountered\n{}'.format(self.filename,e)) @@ -138,11 +139,33 @@ def readline(iLine): if self.data.shape[0]==0: raise EmptyFileError('This FAST output file contains no data: {}'.format(self.filename)) - if self.info['attribute_units'] is not None: - self.info['attribute_units'] = [re.sub(r'[()\[\]]','',u) for u in self.info['attribute_units']] - def _write(self, binary=None, fileID=4): + # --- Convert to DataFrame + if info['attribute_units'] is not None: + info['attribute_units'] = [re.sub(r'[()\[\]]','',u) for u in info['attribute_units']] + if len(info['attribute_names'])!=len(info['attribute_units']): + cols=info['attribute_names'] + print('[WARN] not all columns have units! Skipping units') + else: + cols=[n+'_['+u.replace('sec','s')+']' for n,u in zip(info['attribute_names'], info['attribute_units'])] + else: + cols=info['attribute_names'] + + if isinstance(self.data, pd.DataFrame): + self.data.columns = cols + else: + if len(cols)!=self.data.shape[1]: + raise BrokenFormatError('Inconstistent number of columns between headers ({}) and data ({}) for file {}'.format(len(cols), self.data.shape[1], self.filename)) + self.data = pd.DataFrame(data=self.data, columns=cols) + + + def write(self, filename=None, binary=None, fileID=4): + if filename: + self.filename = filename + if not self.filename: + raise Exception('No filename provided') + # Calling children function if binary is None: binary = self['binary'] @@ -152,40 +175,48 @@ def _write(self, binary=None, fileID=4): else: # ascii output with open(self.filename,'w') as f: - f.write('\t'.join(['{:>10s}'.format(c) for c in self.info['attribute_names']])+'\n') - f.write('\t'.join(['{:>10s}'.format('('+u+')') for u in self.info['attribute_units']])+'\n') + f.write('\t'.join(['{:>10s}'.format(c) for c in self.channels])+'\n') + f.write('\t'.join(['{:>10s}'.format('('+u+')') for u in self.units])+'\n') # TODO better.. f.write('\n'.join(['\t'.join(['{:10.4f}'.format(y[0])]+['{:10.3e}'.format(x) for x in y[1:]]) for y in self.data])) + @property + def channels(self): + if self.data is None: + return [] + def no_unit(s): + s=s.replace('(','[').replace(')',']').replace(' [','_[').strip(']') + try: + return s.split('_[')[0].strip() + except: + return s.strip() + channels = [no_unit(c) for c in self.data.columns] + return channels + + @property + def units(self): + if self.data is None: + return [] + def unit(s): + s=s.replace('(','[').replace(')',']').replace(' [','_[').strip(']') + try: + return s.split('_[')[1].strip() + except: + return s.strip() + units = [unit(c) for c in self.data.columns] + return units + def toDataFrame(self): """ Returns object into one DataFrame, or a dictionary of DataFrames""" - # --- Example (returning one DataFrame): - # return pd.DataFrame(data=np.zeros((10,2)),columns=['Col1','Col2']) - if self.info['attribute_units'] is not None: - if len(self.info['attribute_names'])!=len(self.info['attribute_units']): - cols=self.info['attribute_names'] - print('[WARN] not all columns have units! Skipping units') - else: - cols=[n+'_['+u.replace('sec','s')+']' for n,u in zip(self.info['attribute_names'],self.info['attribute_units'])] - else: - cols=self.info['attribute_names'] - if isinstance(self.data, pd.DataFrame): - df= self.data - df.columns=cols - else: - if len(cols)!=self.data.shape[1]: - raise BrokenFormatError('Inconstistent number of columns between headers ({}) and data ({}) for file {}'.format(len(cols), self.data.shape[1], self.filename)) - df = pd.DataFrame(data=self.data,columns=cols) - - return df + return self.data def writeDataFrame(self, df, filename, binary=True): writeDataFrame(df, filename, binary=binary) def __repr__(self): s='<{} object> with attributes:\n'.format(type(self).__name__) - s+=' - info ({})\n'.format(type(self.info)) s+=' - data ({})\n'.format(type(self.data)) + s+=' - description: {}\n'.format(self.description) s+='and keys: {}\n'.format(self.keys()) return s @@ -206,9 +237,9 @@ def toOUTB(self, filename=None, extension='.outb', fileID=4, noOverWrite=True, * # NOTE: fileID=2 will chop the channels name of long channels use fileID4 instead channels = self.data - chanNames = self.info['attribute_names'] - chanUnits = self.info['attribute_units'] - descStr = self.info['description'] + chanNames = self.channels + chanUnits = self.units + descStr = self.description if isinstance(descStr, list): descStr=(''.join(descStr[:2])).replace('\n','') writeBinary(filename, channels, chanNames, chanUnits, fileID=fileID, descStr=descStr) @@ -270,7 +301,7 @@ def load_ascii_output(filename, method='numpy', encoding='ascii'): headerRead=True break if not headerRead: - raise WrongFormatError('Could not find the keyword "Time" or "Alpha" in the first {} lines of the file'.format(maxHeaderLines)) + raise WrongFormatError('Could not find the keyword "Time" or "Alpha" in the first {} lines of the file {}'.format(maxHeaderLines, filename)) nHeader = len(header)+1 nCols = len(info['attribute_names']) @@ -309,29 +340,30 @@ def load_ascii_output(filename, method='numpy', encoding='ascii'): return data, info -def load_binary_output(filename, use_buffer=True): +def load_binary_output(filename, use_buffer=False): """ 03/09/15: Ported from ReadFASTbinary.m by Mads M Pedersen, DTU Wind 24/10/18: Low memory/buffered version by E. Branlard, NREL - 18/01/19: New file format for exctended channels, by E. Branlard, NREL - - Info about ReadFASTbinary.m: - % Author: Bonnie Jonkman, National Renewable Energy Laboratory - % (c) 2012, National Renewable Energy Laboratory - % - % Edited for FAST v7.02.00b-bjj 22-Oct-2012 + 18/01/19: New file format for extended channels, by E. Branlard, NREL + 20/11/23: Improved performances using np.fromfile, by E. Branlard, NREL """ StructDict = { 'uint8': ('B', 1, np.uint8), 'int16':('h', 2, np.int16), 'int32':('i', 4, np.int32), 'float32':('f', 4, np.float32), - 'float64':('d', 8, np.float64)} - def fread(fid, n, dtype): + 'float64': ('d', 8, np.float64) + } + + def freadLegacy(fid, n, dtype): fmt, nbytes, npdtype = StructDict[dtype] - #return np.array(struct.unpack(fmt * n, fid.read(nbytes * n)), dtype=npdtype) return struct.unpack(fmt * n, fid.read(nbytes * n)) + def fread(fid, n, dtype): + fmt, nbytes, npdtype = StructDict[dtype] + return np.fromfile(fid, count=n, dtype=npdtype) # Improved performances + #return struct.unpack(fmt * n, fid.read(nbytes * n)) + def freadRowOrderTableBuffered(fid, n, type_in, nCols, nOff=0, type_out='float64'): """ Reads of row-ordered table from a binary file. @@ -346,7 +378,7 @@ def freadRowOrderTableBuffered(fid, n, type_in, nCols, nOff=0, type_out='float64 @author E.Branlard, NREL """ - fmt, nbytes = {'uint8': ('B', 1), 'int16':('h', 2), 'int32':('i', 4), 'float32':('f', 4), 'float64':('d', 8)}[type_in] + fmt, nbytes = StructDict[type_in][:2] nLines = int(n/nCols) GoodBufferSize = 4096*40 nLinesPerBuffer = int(GoodBufferSize/nCols) @@ -361,7 +393,8 @@ def freadRowOrderTableBuffered(fid, n, type_in, nCols, nOff=0, type_out='float64 while nIntRead