-
Notifications
You must be signed in to change notification settings - Fork 0
/
Ngram.pde
147 lines (122 loc) · 3.22 KB
/
Ngram.pde
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
class Ngram {
String words;
int count;
Occurrence[] occurrences;
Ngram(String words_, int count_, Occurrence[] occ)
{
words = words_;
count = count_;
occurrences = occ;
}
}
class Occurrence {
int season;
int episode;
int lineno;
Occurrence(int s, int e, int l)
{
season = s;
episode = e;
lineno = l;
}
boolean precedes(Episode ep)
{
return (season < ep.season || (season == ep.season && episode < ep.number));
}
boolean within(Episode ep)
{
return (season == ep.season && episode == ep.number);
}
}
class CharNgram {
Ngram ngram;
int count;
float pvalue;
CharNgram(Ngram ngram_, int count_, float pvalue_)
{
ngram = ngram_;
count = count_;
pvalue = pvalue_;
}
}
class CharNgramTable extends TSVBase implements ListDataSource {
ArrayList<CharNgram> charNgramList;
HashMap<String,CharNgram> charNgramMap;
CharNgramTable(String filename) {
super(filename, false); // this loads the data
}
void allocateData(int rows)
{
charNgramMap = new HashMap<String,CharNgram>(rows);
charNgramList = new ArrayList<CharNgram>(rows);
}
boolean createItem(int i, String[] pieces)
{
String words = pieces[0];
int count = parseInt(pieces[1]);
float pvalue = parseFloat(pieces[2]);
Ngram ng = ngrams.get(words);
if (ng == null) return false; /* we're skipping common ones */
CharNgram cng = new CharNgram(ng, count, pvalue);
charNgramMap.put(words, cng);
charNgramList.add(cng);
return true;
}
void resizeData(int rows) {}
int count() {
return charNgramList.size();
}
CharNgram get(int index) {
return charNgramList.get(index);
}
String getText(int index) {
return charNgramList.get(index).ngram.count + " "+ charNgramList.get(index).ngram.words;
}
boolean selected(int index) {
return get(index).ngram == activeNgram;
}
}
class NgramTable extends TSVBase {
HashMap<String,Ngram> ngramMap;
NgramTable(String filename) {
super(filename, false); // this loads the data
}
void allocateData(int rows)
{
ngramMap = new HashMap<String,Ngram>(rows);
}
boolean createItem(int i, String[] pieces)
{
int count = parseInt(pieces[0]);
String words = pieces[1];
boolean common = pieces[2].equals("C");
if (common) return false; /* skip ngrams that are common in general english */
String[] occStrs = pieces[3].split(":");
Occurrence[] occs = new Occurrence[occStrs.length];
for (int j = 0; j < occStrs.length; j++) {
String[] groups = match(occStrs[j], "S(\\d+)E(\\d+)L(\\d+)");
int season = parseInt(groups[1]);
int epnum = parseInt(groups[2]);
int lineno = parseInt(groups[3]);
occs[j] = new Occurrence(season, epnum, lineno);
}
ngramMap.put(words, new Ngram(words, count, occs));
return true;
}
void resizeData(int rows) {}
Ngram get(String words)
{
return (Ngram)ngramMap.get(words);
}
}
class NgramView extends View {
NgramView(float x_, float y_, float w_, float h_) {
super(x_,y_,w_,h_);
}
void drawContent() {
if(activeNgramChar != null) {
fill(0);
text("Significant n-grams for " + activeNgramChar.name, 0, 8);
}
}
}