-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathanfriutils.py
41 lines (31 loc) · 1.06 KB
/
anfriutils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# anfri utils
# feature1
def SPMFin_to_list(filename):
import numpy as np
import re
l = list()
with open(filename, 'r') as f:
for line in f:
s = re.sub('<[0-9]+> ', '', line)
s = re.sub(' -1 -2', '', s)
s = re.sub(' -1', ',', s)
s = s.split(',')
s = [int(el) for el in s]
l.append(s)
return l
def SPMFout_to_list(filename, reverse=True):
import re
l = list()
with open(filename, 'r') as f:
for line in f:
s = re.sub('<[0-9]+> ', '', line)
s = re.sub(' -1', ',', s)
s = re.sub(' #SUP: ', '', s)
s = s.split(',')
s = [int(el) for el in s]
l.append( (s[:-1], s[-1]) )
return sorted(l, key=lambda (pattern, sup): sup + len(pattern), reverse=reverse)
def occurrences(seq, pattern):
seq_list = [val for val in seq]
occs = [(i, i+len(pattern)) for i in range( len(seq_list) ) if seq_list[i:i+len(pattern)] == pattern]
return occs