-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathphonoEditDistanceWITHINsubjects.py
63 lines (48 loc) · 1.83 KB
/
phonoEditDistanceWITHINsubjects.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from corpustools.corpus import io
from corpustools.symbolsim import phono_edit_distance
import numpy
import itertools
#---------------------------------------------------------------------//
# //
# calculates the phonological edit distance of all words within a set //
# //
#---------------------------------------------------------------------//
phonoSimList = []
#Load a corpus. replace 'myCorpus.csv' with the corpus of your choice.
#Ensure that the first 3 columns are headed by 'spelling','transcription', and 'frequency'.
myCorpus = io.csv.load_corpus_csv(
"myCorpus",
"myCorpus.csv",
",",
".",
annotation_types=None,
feature_system_path=None,
stop_check=None,
call_back=None)
print ("Loaded: ",myCorpus)
#downloads a Hayes feature matrix
io.binary.download_binary("ipa2hayes", "/matrix", call_back=None)
ipa2hayes = io.binary.load_binary("/matrix")
io.binary.save_binary(ipa2hayes, "/matrix")
print("Features: ",ipa2hayes.features)
#generate all unique combinations of 2 from the corpus
wordCombinations = itertools.combinations(myCorpus.wordlist,2)
#iterates through word combinations and calcuates the edit distance
for wordCombo in wordCombinations:
print(wordCombo)
phonoEditDistance = phono_edit_distance.phono_edit_distance(
myCorpus.wordlist.get(wordCombo[0]),
myCorpus.wordlist.get(wordCombo[1]),
"transcription",
io.binary.load_binary("/matrix")
)
print("comparing: ",
myCorpus.wordlist.get(wordCombo[0]).transcription,
" to: ",
myCorpus.wordlist.get(wordCombo[1]).transcription,
": ",
phonoEditDistance
)
phonoSimList.append(phonoEditDistance)
#the mean result of all edit distances from the corpus
print("mean result: ", numpy.mean(phonoSimList))