Skip to content

Commit

Permalink
Merge pull request #82 from lanl/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
MaksimEkin authored Jan 29, 2024
2 parents b5c6b3d + 5840e93 commit c4f6bd6
Show file tree
Hide file tree
Showing 87 changed files with 201 additions and 195 deletions.
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ authors:
- family-names: Alexandrov
given-names: Boian
title: "Tensor Extraction of Latent Features (T-ELF)"
version: 0.0.5
version: 0.0.6
url: https://github.com/lanl/T-ELF
doi: 10.5281/zenodo.10257897
date-released: 2023-12-04
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ If you use T-ELF please cite.

**APA:**
```latex
Eren, M., Solovyev, N., Barron, R., Bhattarai, M., Truong, D., Boureima, I., Skau, E., Rasmussen, K., & Alexandrov, B. (2023). Tensor Extraction of Latent Features (T-ELF) (Version 0.0.5) [Computer software]. https://doi.org/10.5281/zenodo.10257897
Eren, M., Solovyev, N., Barron, R., Bhattarai, M., Truong, D., Boureima, I., Skau, E., Rasmussen, K., & Alexandrov, B. (2023). Tensor Extraction of Latent Features (T-ELF) (Version 0.0.6) [Computer software]. https://doi.org/10.5281/zenodo.10257897
```

**BibTeX:**
Expand Down
3 changes: 1 addition & 2 deletions TELF/pre_processing/Vulture/modules/simple_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,7 @@ def _remove_stop_words(self, text, stop_words=None):
tokens = self.sw_pattern.findall(text)
cleaned_words = [t for t in tokens if
t in self.frozen or # entire term in frozen
#self._remove_stop_words_helper(t) or # term partially in frozen
not any(part in self.effective_stop_words for part in t.split('-'))] # no part in stopwords
not any(part.lower() in self.effective_stop_words for part in t.split('-'))] # no part in stopwords
return ' '.join(cleaned_words)


Expand Down
9 changes: 4 additions & 5 deletions TELF/pre_processing/Vulture/modules/substitute.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,14 @@ def run(self, document):
Tuple of document id and substituted text
"""
doc_id, doc_text = document

if self.lower: # TODO: test removing this conditional
doc_text = doc_text.lower()
flags = re.IGNORECASE if self.lower else 0
for term, target in self.substitution_map.items():
if term == target: # if reflective substition (freezing some important term)
continue
if target.strip() == '': # if using substitution for deletion
doc_text = re.sub(r'\b{}\b(?=\s|$)'.format(re.escape(term)), target, doc_text)
doc_text = re.sub(r'\b{}\b(?=\s|$)'.format(re.escape(term)), target, doc_text, flags=flags)
else:
doc_text = re.sub(r'\b{}\b'.format(re.escape(term)), target, doc_text)
doc_text = re.sub(r'\b{}\b'.format(re.escape(term)), target, doc_text, flags=flags)
return (doc_id, doc_text)


Expand Down Expand Up @@ -285,6 +283,7 @@ def substitution_map(self, substitution_map):
'Ignoring substitution!', RuntimeWarning)
self._substitution_map = {}
elif isinstance(substitution_map, dict):
substitution_map = substitution_map.copy() # break reference to input argument
is_valid = self._validate_substitutions(substitution_map)
if is_valid:
if self.lower:
Expand Down
5 changes: 4 additions & 1 deletion TELF/pre_processing/Vulture/vulture.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import pickle
import pathlib
import warnings
import numpy as np
import pandas as pd
from tqdm import tqdm
import multiprocessing
from joblib import Parallel, delayed, parallel_backend
Expand Down Expand Up @@ -118,7 +120,7 @@ def __init__(self, *, n_jobs = -1, n_nodes = 1, parallel_backend = "multiprocess
self.rank = self.comm.Get_rank()
self.size = self.comm.Get_size()

# generate unique ID only on the root process
# generate unique ID only on the root process
if self.rank == 0:
self.unique_id = str(uuid.uuid4())
else:
Expand Down Expand Up @@ -200,6 +202,7 @@ def clean_dataframe(self, df, columns, steps=None, substitutions=None,
# add the clean data back to the DataFrame
for col, clean_docs in clean_documents.items():
df[col] = df.index.map(clean_docs)
df[col].replace('', np.nan, inplace=True) # set null entries to None
return df


Expand Down
2 changes: 1 addition & 1 deletion TELF/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.0.5'
__version__ = '0.0.6'
6 changes: 3 additions & 3 deletions docs/Beaver.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />

<title>TELF.pre_processing.Beaver: Fast matrix and tensor building tool &#8212; TELF 0.0.5 documentation</title>
<title>TELF.pre_processing.Beaver: Fast matrix and tensor building tool &#8212; TELF 0.0.6 documentation</title>



Expand Down Expand Up @@ -37,7 +37,7 @@
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=365ca57ee442770a23c6" />
<script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=365ca57ee442770a23c6"></script>

<script src="_static/documentation_options.js?v=6424ca4d"></script>
<script src="_static/documentation_options.js?v=73504a2b"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
Expand Down Expand Up @@ -127,7 +127,7 @@



<p class="title logo__title">TELF 0.0.5 documentation</p>
<p class="title logo__title">TELF 0.0.6 documentation</p>

</a></div>
<div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
Expand Down
6 changes: 3 additions & 3 deletions docs/NMFk.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />

<title>TELF.factorization.NMFk: Non-negative Matrix Factorization with Automatic Model Determination &#8212; TELF 0.0.5 documentation</title>
<title>TELF.factorization.NMFk: Non-negative Matrix Factorization with Automatic Model Determination &#8212; TELF 0.0.6 documentation</title>



Expand Down Expand Up @@ -37,7 +37,7 @@
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=365ca57ee442770a23c6" />
<script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=365ca57ee442770a23c6"></script>

<script src="_static/documentation_options.js?v=6424ca4d"></script>
<script src="_static/documentation_options.js?v=73504a2b"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
Expand Down Expand Up @@ -127,7 +127,7 @@



<p class="title logo__title">TELF 0.0.5 documentation</p>
<p class="title logo__title">TELF 0.0.6 documentation</p>

</a></div>
<div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
Expand Down
6 changes: 3 additions & 3 deletions docs/RESCALk.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />

<title>TELF.factorization.RESCALk: RESCAL with Automatic Model Determination &#8212; TELF 0.0.5 documentation</title>
<title>TELF.factorization.RESCALk: RESCAL with Automatic Model Determination &#8212; TELF 0.0.6 documentation</title>



Expand Down Expand Up @@ -37,7 +37,7 @@
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=365ca57ee442770a23c6" />
<script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=365ca57ee442770a23c6"></script>

<script src="_static/documentation_options.js?v=6424ca4d"></script>
<script src="_static/documentation_options.js?v=73504a2b"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
Expand Down Expand Up @@ -127,7 +127,7 @@



<p class="title logo__title">TELF 0.0.5 documentation</p>
<p class="title logo__title">TELF 0.0.6 documentation</p>

</a></div>
<div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
Expand Down
6 changes: 3 additions & 3 deletions docs/SymNMFk.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />

<title>TELF.factorization.SymNMFk: Symmetric Non-negative Matrix Factorization with Automatic Model Determination &#8212; TELF 0.0.5 documentation</title>
<title>TELF.factorization.SymNMFk: Symmetric Non-negative Matrix Factorization with Automatic Model Determination &#8212; TELF 0.0.6 documentation</title>



Expand Down Expand Up @@ -37,7 +37,7 @@
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=365ca57ee442770a23c6" />
<script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=365ca57ee442770a23c6"></script>

<script src="_static/documentation_options.js?v=6424ca4d"></script>
<script src="_static/documentation_options.js?v=73504a2b"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
Expand Down Expand Up @@ -127,7 +127,7 @@



<p class="title logo__title">TELF 0.0.5 documentation</p>
<p class="title logo__title">TELF 0.0.6 documentation</p>

</a></div>
<div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
Expand Down
6 changes: 3 additions & 3 deletions docs/TELF.factorization.decompositions.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />

<title>TELF.factorization.decompositions package &#8212; TELF 0.0.5 documentation</title>
<title>TELF.factorization.decompositions package &#8212; TELF 0.0.6 documentation</title>



Expand Down Expand Up @@ -37,7 +37,7 @@
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=365ca57ee442770a23c6" />
<script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=365ca57ee442770a23c6"></script>

<script src="_static/documentation_options.js?v=6424ca4d"></script>
<script src="_static/documentation_options.js?v=73504a2b"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
Expand Down Expand Up @@ -128,7 +128,7 @@



<p class="title logo__title">TELF 0.0.5 documentation</p>
<p class="title logo__title">TELF 0.0.6 documentation</p>

</a></div>
<div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
Expand Down
6 changes: 3 additions & 3 deletions docs/TELF.factorization.decompositions.utilities.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />

<title>TELF.factorization.decompositions.utilities package &#8212; TELF 0.0.5 documentation</title>
<title>TELF.factorization.decompositions.utilities package &#8212; TELF 0.0.6 documentation</title>



Expand Down Expand Up @@ -37,7 +37,7 @@
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=365ca57ee442770a23c6" />
<script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=365ca57ee442770a23c6"></script>

<script src="_static/documentation_options.js?v=6424ca4d"></script>
<script src="_static/documentation_options.js?v=73504a2b"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
Expand Down Expand Up @@ -127,7 +127,7 @@



<p class="title logo__title">TELF 0.0.5 documentation</p>
<p class="title logo__title">TELF 0.0.6 documentation</p>

</a></div>
<div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
Expand Down
6 changes: 3 additions & 3 deletions docs/TELF.factorization.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />

<title>TELF.factorization package &#8212; TELF 0.0.5 documentation</title>
<title>TELF.factorization package &#8212; TELF 0.0.6 documentation</title>



Expand Down Expand Up @@ -37,7 +37,7 @@
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=365ca57ee442770a23c6" />
<script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=365ca57ee442770a23c6"></script>

<script src="_static/documentation_options.js?v=6424ca4d"></script>
<script src="_static/documentation_options.js?v=73504a2b"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
Expand Down Expand Up @@ -127,7 +127,7 @@



<p class="title logo__title">TELF 0.0.5 documentation</p>
<p class="title logo__title">TELF 0.0.6 documentation</p>

</a></div>
<div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
Expand Down
6 changes: 3 additions & 3 deletions docs/TELF.factorization.utilities.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />

<title>TELF.factorization.utilities package &#8212; TELF 0.0.5 documentation</title>
<title>TELF.factorization.utilities package &#8212; TELF 0.0.6 documentation</title>



Expand Down Expand Up @@ -37,7 +37,7 @@
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=365ca57ee442770a23c6" />
<script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=365ca57ee442770a23c6"></script>

<script src="_static/documentation_options.js?v=6424ca4d"></script>
<script src="_static/documentation_options.js?v=73504a2b"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
Expand Down Expand Up @@ -127,7 +127,7 @@



<p class="title logo__title">TELF 0.0.5 documentation</p>
<p class="title logo__title">TELF 0.0.6 documentation</p>

</a></div>
<div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
Expand Down
6 changes: 3 additions & 3 deletions docs/TELF.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />

<title>TELF package &#8212; TELF 0.0.5 documentation</title>
<title>TELF package &#8212; TELF 0.0.6 documentation</title>



Expand Down Expand Up @@ -37,7 +37,7 @@
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=365ca57ee442770a23c6" />
<script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=365ca57ee442770a23c6"></script>

<script src="_static/documentation_options.js?v=6424ca4d"></script>
<script src="_static/documentation_options.js?v=73504a2b"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
Expand Down Expand Up @@ -127,7 +127,7 @@



<p class="title logo__title">TELF 0.0.5 documentation</p>
<p class="title logo__title">TELF 0.0.6 documentation</p>

</a></div>
<div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
Expand Down
6 changes: 3 additions & 3 deletions docs/TELF.pre_processing.Beaver.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />

<title>TELF.pre_processing.Beaver package &#8212; TELF 0.0.5 documentation</title>
<title>TELF.pre_processing.Beaver package &#8212; TELF 0.0.6 documentation</title>



Expand Down Expand Up @@ -37,7 +37,7 @@
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=365ca57ee442770a23c6" />
<script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=365ca57ee442770a23c6"></script>

<script src="_static/documentation_options.js?v=6424ca4d"></script>
<script src="_static/documentation_options.js?v=73504a2b"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
Expand Down Expand Up @@ -127,7 +127,7 @@



<p class="title logo__title">TELF 0.0.5 documentation</p>
<p class="title logo__title">TELF 0.0.6 documentation</p>

</a></div>
<div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
Expand Down
Loading

0 comments on commit c4f6bd6

Please sign in to comment.