Skip to content

Commit

Permalink
Merge pull request #2 from greole/0.6
Browse files Browse the repository at this point in the history
0.6
  • Loading branch information
greole committed Jan 20, 2015
2 parents 20326f6 + 31d4da4 commit 2022ff3
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 48 deletions.
5 changes: 4 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@ language: python
python:
- 2.7
install:
- python setup.py install --user
- pip install .

script:
py.test
77 changes: 47 additions & 30 deletions Owls/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
from pandas import DataFrame
from pandas import concat

Series.__repr__ = (lambda x: ("Hash: {}\nTimes: {}\nLoc: {}\nValues: {}".format(
io.hash_series(x),
list(set(x.keys().get_level_values('Time'))), # avoid back and forth conversion
list(set(x.keys().get_level_values('Loc'))),
x.values))) #TODO monkey patch to use hashes

case_data_base = shelve.open(os.path.expanduser('~') + "/.owls/db")

def items_from_dict(dict, func, **kwargs):
Expand Down Expand Up @@ -229,6 +235,7 @@ def __init__(self):

def insert(self, field, properties):
self.properties[field].update(properties)
return self

def select(self, field, prop, default=None):
field = self.properties[field]
Expand Down Expand Up @@ -336,9 +343,9 @@ def __init__(self, *args, **kwargs):
else:
os.chdir(folder) #FIXME necessary for read in?
if case_data_base.has_key(folder):
print "re-importing ",
print "re-importing",
else:
print "importing ",
print "importing",
print name + ": ",
origins, data = io.import_foam_folder(
search_format=search,
Expand All @@ -361,36 +368,38 @@ def __init__(self, *args, **kwargs):
case_data_base.sync()

def validate_origins(self, folder, origins):
origins.update_hashes()
if case_data_base.has_key(folder):
if (case_data_base[folder]["hash"] == origins["hash"]):
if (case_data_base[folder]["hash"] == origins.dct["hash"]):
print " [consistent]"
else:
print " [inconsistent]",
for time_key, time in origins.iteritems():
if time_key == "hash":
continue
if not case_data_base[folder].get(time_key, False):
print " new timestep " + str(time_key)
else:
for loc_key, loc in time.iteritems():
if loc_key == "hash":
loc_hash = loc_key
continue
for field_key, files in loc['fields'].iteritems():
if field_key == "hash":
field_hash = fields_key
continue
if files[1] != case_data_base[folder][time_key][loc_key]['fields'][field_key][1]:
# FIXME for all fields it prints that one column is corrupted
print "corrupted: " + field_key + " in file: " + files[0]
print "overwriting"
# TODO think what to do
# raise an error, flag as dirty, backup old
case_data_base[folder] = origins
entries_new = len(origins.dct.keys())
entries_old = len(case_data_base[folder].keys())
if entries_new > entries_old:
print "[new timestep] "
# print origins.dct.keys()
case_data_base[folder] = origins.dct
elif entries_new < entries_old:
# print folder
# print origins.dct.keys()
# print case_data_base[folder].keys()
print "[missing timestep]"
case_data_base[folder] = origins.dct
elif entries_new == entries_old:
print "[corrupted]",
for time, loc, field, item in origins.hashes():
time_name, time_hash = time
loc_name, loc_hash = loc
field_name, field_hash = field
filename, item_hash = item
orig_hash = case_data_base[folder][time_name][loc_name][field_name][1]
if (item_hash != orig_hash):
print ""
print "corrupted fields:"
print "\t" + field_name + " in " + filename
case_data_base[folder] = origins.dct
else:
print "[stored]"
case_data_base[folder] = origins

case_data_base[folder] = origins.dct
def add(self, data, label):
"""
Add a given Series
Expand Down Expand Up @@ -436,6 +445,14 @@ def latest(self):
ret.properties = self.properties
return ret

# def _iter_names(self)
# pass
#
# def get_hashes(self):
# """ returns hashes of current selection based
# on the data read from disk """
# pass

def at(self, idx_name, idx_val):
""" select from foamframe based on index name and value"""
ret = self[self.index.get_level_values(idx_name) == idx_val]
Expand Down Expand Up @@ -500,9 +517,9 @@ def draw(self, x, y, z, title, func, **kwargs):
def _label(axis, field):
label = kwargs.get(axis + '_label', False)
if label:
self.properties.plot_properties.insert(field, {'label':label})
self.properties.plot_properties.insert(field, {axis + '_label':label})
else:
label = self.properties.plot_properties.select(field, 'label', "None")
label = self.properties.plot_properties.select(field, axis + '_label', "None")
return label

bk.xaxis().axis_label = _label('x', x)
Expand Down
104 changes: 87 additions & 17 deletions Owls/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,80 @@ def dataframe_to_foam(fullname, ftype, dataframe, boundaries):
f.write("}")
f.write("\n// ************************************************************************* //")

class Origins():
""" Class to manage fields to file relation and store hashes
dct = {'hash':34jd
0.0:{'hash':234s #time
'centreline':{'hash':94143e #loc
'U':filename,3424}
}
}
"""
from collections import defaultdict
def __init__(self):
self.dct = defaultdict(dict)

@classmethod
def from_dict(cls, dct):
pass

def to_dict(self):
pass

def insert(self, time, loc, field, filename, fieldhash):
try:
self.dct[time][loc][field] = filename, fieldhash
except:
self.dct[time].update({loc:{field: (filename, fieldhash)}})

def update_hashes(self):
for time_key, time in self.dct.iteritems():
if time_key == "hash":
continue
for loc_key, loc in time.iteritems():
if loc_key == "hash":
continue
self.dct[time_key][loc_key]["hash"] = sum(
[field[1] for key,field in loc.iteritems() if key != "hash"]
)
self.dct[time_key]["hash"] = sum(
[field["hash"] for key,field in time.iteritems() if key != "hash"]
)
self.dct["hash"] = sum([field["hash"] for key,field in self.dct.iteritems()
if key != "hash"]
)

def hashes(self):
""" generator """
# self.update_hashes()
for time_key, time in self.dct.iteritems():
if time_key == "hash":
continue
for loc_key, loc in time.iteritems():
if loc_key == "hash":
continue
for field, item in loc.iteritems():
if field == "hash":
continue
fn, field_hash = item
yield ((time_key, self.dct["hash"]),
(loc_key, time["hash"]),
(field, loc["hash"]),
(fn, field_hash)
)

def find(self, search_hash):
for time, loc, field, item in self.hashes():
time_name, time_hash = time
loc_name, loc_hash = loc
field_name, field_hash = field
filename, item_hash = item
if (search_hash == item_hash):
return field_name, filename
else:
return None,None

class ProgressBar():
""" A class providing progress bars """

Expand Down Expand Up @@ -172,10 +246,9 @@ def import_foam_folder(
df = DataFrame()
#df.index = MultiIndex.from_tuples(zip([],[]),names=['Loc',0])
from collections import defaultdict
origins = defaultdict(dict) #
origins = Origins()
for time, files in fileList.iteritems(): #FIXME dont iterate twice
df_tmp = DataFrame()
origin_field = dict()
for fn in files:
#ret = read_table(StringIO.StringIO(foam_to_csv(fn)))
ret = read_data_file(fn, skiplines, maxlines)
Expand All @@ -199,21 +272,13 @@ def import_foam_folder(
except Exception as e:
print x
print e
for i, field in enumerate(field_names):
origin_field[field] = fn, hashes[field]
origins[time][loc] = {"hash": sum([_[1] for _ in origin_field.values()]),
"fields":origin_field}
origins[time].update({"hash": sum(
[_["hash"] for _ in origins[time].values()])}
)
for field in field_names:
origins.insert(time,loc,field,fn,hashes[field])
df_tmp['Time'] = float(time)
if df.empty:
df = df_tmp
else:
df = df.append(df_tmp)
origins.update({"hash": sum(
[_["hash"] for _ in origins.values()])}
)
df.set_index('Time', append=True, inplace=True)
df = df.reorder_levels(['Time','Loc','Id'])
p_bar.done()
Expand Down Expand Up @@ -290,13 +355,11 @@ def read_data_file(fn, skiplines=1, maxlines=False):
df.set_index('Loc', append=True, inplace=True)
df.index.names=['Id','Loc']
df = df.reorder_levels(['Loc','Id'])
df = df.astype(float)
hashes = {}
for row in df.columns:
d = df[row].values
d.flags.writeable = False
hash_ = int(hashlib.md5(str(d)).hexdigest(),16)
hashes.update({row:hash_})
return names, df.astype(float), hashes
hashes.update({row: hash_series(df[row])})
return names, df, hashes
else:
data = [np.float32(x) for x in content[start:end:skiplines]]
entries = 1
Expand All @@ -313,6 +376,13 @@ def read_data_file(fn, skiplines=1, maxlines=False):
print e
return None

def hash_series(series):
d = series.values
d.flags.writeable = False #TODO needed?
s = str(list(d))
return int(hashlib.md5(s).hexdigest(),16) #NOT


def evaluate_names(fullfilename, num_entries):
""" Infere field names and Loc from given filename
Expand Down
File renamed without changes.
42 changes: 42 additions & 0 deletions tests/test_imports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os

basepath = os.getcwd() + "/examples/buoyantCavity"
setspath = basepath + "/sets"


setsfiles = ['y0.1_T.xy', 'y0.1_U.xy', 'y0.2_T.xy', 'y0.2_U.xy', 'y0.3_T.xy', 'y0.3_U.xy', 'y0.4_T.xy', 'y0.4_U.xy', 'y0.5_T.xy', 'y0.5_U.xy', 'y0.6_T.xy', 'y0.6_U.xy', 'y0.7_T.xy', 'y0.7_U.xy', 'y0.8_T.xy', 'y0.8_U.xy', 'y0.9_T.xy', 'y0.9_U.xy']

basefiles = ['T', 'U', 'alphat', 'k', 'mut', 'omega', 'p', 'p_rgh', 'phi']

def test_imports():
""" are the main modules importable """
from Owls import io
from Owls import frames

# def test_find_times():
# import Owls as ow
# ow.read_sets(folder=basepath)

def test_findtimes():
""" are all times and times in sets found """
from Owls import io
def contains_all(res):
times = [str(_*50) for _ in range(21)]
return all([time in res for time in times])

assert contains_all(io.find_times(fold=basepath))
assert contains_all(io.find_times(fold=setspath))

def test_findDataFiles():
""" are all files in the the sets and times folder are found """
from Owls import io
read_folder = lambda x=False,filt=False: [path.replace(x,'') for path in io._get_datafiles_from_dir(x, filt)]
setsfolder = setspath + "/1000/"
basefolder = basepath + "/1000/"
datafilessets = read_folder(setsfolder)
datafilesbase = read_folder(basefolder)
assert setsfiles == datafilessets
assert basefiles == datafilesbase
for fn in basefiles:
# print read_folder(basefolder,fn)
assert [fn] == read_folder(basefolder,[fn])

0 comments on commit 2022ff3

Please sign in to comment.