Skip to content

Commit 9a74767

Browse files
committed
Made changes to API definition
1 parent 1ef32e6 commit 9a74767

4 files changed

Lines changed: 115 additions & 106 deletions

File tree

clusters.go

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,38 +11,60 @@ type Online struct {
1111
Dimension int
1212
}
1313

14-
type HardCluster [][]float64
14+
/* Events represent intermediate results of computation of both kinds of algorithms
15+
* transmitted periodically to the caller */
16+
type HCEvent struct {
17+
Cluster int
18+
Observation []float64
19+
}
1520

16-
type SoftCluster struct {
17-
sizes []int
18-
data []struct {
19-
probabilities, observation []float64
20-
}
21+
type SCEvent struct {
22+
Probabilities []float64
23+
Observation []float64
2124
}
2225

26+
/* Clusterer denotes a single operation of learning
27+
* common for both Hard and Soft clusterers */
2328
type Clusterer interface {
2429
Learn(data [][]float64) error
2530
}
2631

32+
/* HardClusterer defines a set of operations for hard clustering algorithms */
2733
type HardClusterer interface {
28-
Guesses() []HardCluster
2934

30-
Predict(observation []float64) HardCluster
35+
/* Returns sizes of respective clusters */
36+
Sizes() []int
37+
38+
/* Returns mapping from data point indices to cluster index */
39+
Guesses() []int
3140

32-
Online(observations chan []float64, done chan struct{}, callback func([]float64, int))
41+
/* Returns index of cluster to which the observation was assigned */
42+
Predict(observation []float64) int
3343

44+
/* Provides a method to train the algorithm online and receive intermediate results of computation */
45+
Online(observations chan []float64, done chan struct{}) chan *HCEvent
46+
47+
/* Allows to configure the algorithms for online learning */
3448
WithOnline(Online) HardClusterer
3549

3650
Clusterer
3751
}
3852

3953
type SoftClusterer interface {
40-
Guesses() []*SoftCluster
4154

42-
Predict(observation []float64) *SoftCluster
55+
/* Returns average probabilities of respective clusters */
56+
Probabilities() []float64
57+
58+
/* Returns mapping from data point indices to cluster probabilities */
59+
Guesses() [][]float64
60+
61+
/* Returns probabilities of the observation being assigned to respective clusters */
62+
Predict(observation []float64) []float64
4363

44-
Online(observations chan []float64, done chan struct{}, callback func())
64+
/* Provides a method to train the algorithm online and receive intermediate results of computation */
65+
Online(observations chan []float64, done chan struct{}) chan *SCEvent
4566

67+
/* Allows to configure the algorithms for online learning */
4668
WithOnline(Online) SoftClusterer
4769

4870
Clusterer

dbscan.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package clusters
2+
3+
type dbscanClusterer struct {
4+
iterations int
5+
minpts int
6+
eps float64
7+
8+
// For online learning only
9+
alpha float64
10+
dimension int
11+
12+
distance DistanceFunc
13+
14+
dataset [][]float64
15+
}

em.go

Lines changed: 0 additions & 14 deletions
This file was deleted.

kmeans.go

Lines changed: 66 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -16,31 +16,24 @@ const (
1616
type kmeansClusterer struct {
1717
iterations int
1818
number int
19-
dimension int
2019

2120
// Variables keeping count of changes of points' membership every iteration. User as a stopping condition.
2221
changes, oldchanges, counter, threshold int
2322

2423
// For online learning only
25-
alpha float64
24+
alpha float64
25+
dimension int
2626

2727
distance DistanceFunc
2828

29-
// Mapping from training set points to clusters' numbers.
30-
a map[int]int
29+
// a holds the mapping of data point indices to cluster numbers, b holds the sizes of each cluster
30+
mu sync.RWMutex
31+
a, b []int
3132

32-
// Mapping from clusters' numbers to set of points they contain.
33-
b [][]int
33+
// variables holding values of centroids of each clusters
34+
m, n [][]float64
3435

35-
// Mapping from clusters' numbers to their means
36-
m [][]float64
37-
38-
// Training set
3936
d [][]float64
40-
41-
// Computed clusters. Access is synchronized.
42-
mu sync.RWMutex
43-
c []HardCluster
4437
}
4538

4639
func KmeansClusterer(iterations, clusters int, distance DistanceFunc) (HardClusterer, error) {
@@ -88,8 +81,12 @@ func (c *kmeansClusterer) Learn(data [][]float64) error {
8881

8982
c.d = data
9083

91-
c.a = make(map[int]int, len(data))
92-
c.b = make([][]int, c.number)
84+
c.a = make([]int, len(data))
85+
c.b = make([]int, c.number)
86+
87+
for i := 0; i < len(data); i++ {
88+
c.a[i] = -1
89+
}
9390

9491
c.counter = 0
9592
c.threshold = CHANGES_THRESHOLD
@@ -98,69 +95,60 @@ func (c *kmeansClusterer) Learn(data [][]float64) error {
9895

9996
c.initializeMeansWithData()
10097

101-
for i := 0; i < c.iterations && c.notConverged(); i++ {
98+
for i := 0; i < c.iterations && c.counter != c.threshold; i++ {
10299
c.run()
100+
c.check()
103101
}
104102

105-
var wg sync.WaitGroup
106-
{
107-
wg.Add(c.number)
108-
}
109-
110-
for j := 0; j < c.number; j++ {
111-
go func(n int) {
112-
defer wg.Done()
113-
114-
c.c[n] = make([][]float64, len(c.b[n]))
115-
116-
for k := 0; k < len(c.b[n]); k++ {
117-
c.c[n][k] = c.d[c.b[n][k]]
118-
}
119-
}(j)
120-
}
121-
122-
wg.Wait()
123-
124103
c.mu.Unlock()
125104

126-
c.a = nil
127-
c.b = nil
105+
c.n = nil
128106

129107
return nil
130108
}
131109

132-
func (c *kmeansClusterer) Guesses() []HardCluster {
110+
func (c *kmeansClusterer) Sizes() []int {
111+
c.mu.RLock()
112+
defer c.mu.RUnlock()
113+
114+
return c.b
115+
}
116+
117+
func (c *kmeansClusterer) Guesses() []int {
133118
c.mu.RLock()
134119
defer c.mu.RUnlock()
135120

136-
return c.c
121+
return c.a
137122
}
138123

139-
func (c *kmeansClusterer) Predict(p []float64) HardCluster {
124+
func (c *kmeansClusterer) Predict(p []float64) int {
140125
var (
141-
l HardCluster
126+
l int
142127
d float64
143-
m float64 = math.MaxFloat64
128+
m float64 = c.distance(p, c.m[0])
144129
)
145130

146-
for i := 0; i < len(c.c); i++ {
131+
for i := 1; i < c.number; i++ {
147132
if d = c.distance(p, c.m[i]); d < m {
148133
m = d
149-
l = c.c[i]
134+
l = i
150135
}
151136
}
152137

153138
return l
154139
}
155140

156-
func (c *kmeansClusterer) Online(observations chan []float64, done chan struct{}, callback func([]float64, int)) {
141+
func (c *kmeansClusterer) Online(observations chan []float64, done chan struct{}) chan *HCEvent {
157142
c.mu.Lock()
158143

159144
var (
160-
l, f int = len(c.m), len(c.m[0])
161-
h float64 = 1 - c.alpha
145+
r chan *HCEvent = make(chan *HCEvent)
146+
l, f int = len(c.m), len(c.m[0])
147+
h float64 = 1 - c.alpha
162148
)
163149

150+
c.b = make([]int, c.number)
151+
164152
/* The first step of online learning is adjusting the centroids by finding the one closes to new data point
165153
* and modifying it's location using given alpha. Once the client quits sending new data, the actual clusters
166154
* are computed and the mutex is unlocked. */
@@ -182,7 +170,10 @@ func (c *kmeansClusterer) Online(observations chan []float64, done chan struct{}
182170
}
183171
}
184172

185-
go callback(o, k)
173+
r <- &HCEvent{
174+
Cluster: k,
175+
Observation: o,
176+
}
186177

187178
for i := 0; i < f; i++ {
188179
c.m[k][i] = c.alpha*o[i] + h*c.m[k][i]
@@ -193,13 +184,10 @@ func (c *kmeansClusterer) Online(observations chan []float64, done chan struct{}
193184
go func() {
194185
var (
195186
n int
196-
l int = len(c.d) / c.number
197187
d, m float64
198188
)
199189

200-
for i := 0; i < c.number; i++ {
201-
c.c[n] = make([][]float64, 0, l)
202-
}
190+
c.a = make([]int, len(c.d))
203191

204192
for i := 0; i < len(c.d); i++ {
205193
m = c.distance(c.d[i], c.m[0])
@@ -212,7 +200,8 @@ func (c *kmeansClusterer) Online(observations chan []float64, done chan struct{}
212200
}
213201
}
214202

215-
c.c[n] = append(c.c[n], c.d[i])
203+
c.a[i] = n
204+
c.b[n]++
216205
}
217206

218207
c.mu.Unlock()
@@ -222,12 +211,14 @@ func (c *kmeansClusterer) Online(observations chan []float64, done chan struct{}
222211
}
223212
}
224213
}()
214+
215+
return r
225216
}
226217

227218
// private
228219
func (c *kmeansClusterer) initializeMeansWithData() {
229220
c.m = make([][]float64, c.number)
230-
c.c = make([]HardCluster, c.number)
221+
c.n = make([][]float64, c.number)
231222

232223
rand.Seed(time.Now().UTC().Unix())
233224

@@ -264,11 +255,13 @@ func (c *kmeansClusterer) initializeMeansWithData() {
264255
c.m[i] = c.d[k]
265256
}
266257

258+
for i := 0; i < c.number; i++ {
259+
c.n[i] = make([]float64, len(c.m[0]))
260+
}
267261
}
268262

269263
func (c *kmeansClusterer) initializeMeans() {
270264
c.m = make([][]float64, c.number)
271-
c.c = make([]HardCluster, c.number)
272265

273266
rand.Seed(time.Now().UTC().Unix())
274267

@@ -282,56 +275,49 @@ func (c *kmeansClusterer) initializeMeans() {
282275

283276
func (c *kmeansClusterer) run() {
284277
var (
285-
l, n int
278+
n, l int = 0, len(c.m[0])
286279
d, m float64
287280
)
288281

289-
for i := 0; i < len(c.c); i++ {
290-
if l = len(c.b[i]); l == 0 {
291-
continue
292-
}
293-
294-
c.m[i] = make([]float64, len(c.d[0]))
295-
296-
for j := 0; j < l; j++ {
297-
floats.Add(c.m[i], c.d[c.b[i][j]])
298-
}
299-
300-
floats.Scale(1/float64(l), c.m[i])
301-
302-
c.b[i] = c.b[i][:0]
282+
for i := 0; i < c.number; i++ {
283+
c.b[i] = 0
303284
}
304285

305286
for i := 0; i < len(c.d); i++ {
306287
m = c.distance(c.d[i], c.m[0])
307288
n = 0
308289

309-
for j := 1; j < len(c.c); j++ {
290+
for j := 1; j < c.number; j++ {
310291
if d = c.distance(c.d[i], c.m[j]); d < m {
311292
m = d
312293
n = j
313294
}
314295
}
315296

316-
if v, ok := c.a[i]; ok && v != n {
297+
if c.a[i] != n {
317298
c.changes++
318299
}
319300

320301
c.a[i] = n
321-
c.b[n] = append(c.b[n], i)
302+
c.b[n]++
303+
304+
floats.Add(c.n[n], c.d[i])
322305
}
323-
}
324306

325-
func (c *kmeansClusterer) notConverged() bool {
326-
if c.counter == c.threshold {
327-
return false
307+
for i := 0; i < c.number; i++ {
308+
floats.Scale(1/float64(c.b[i]), c.n[i])
309+
310+
for j := 0; j < l; j++ {
311+
c.m[i][j] = c.n[i][j]
312+
c.n[i][j] = 0
313+
}
328314
}
315+
}
329316

317+
func (c *kmeansClusterer) check() {
330318
if c.changes == c.oldchanges {
331319
c.counter++
332320
}
333321

334322
c.oldchanges = c.changes
335-
336-
return true
337323
}

0 commit comments

Comments
 (0)