Skip to content

Commit 3df6c17

Browse files
committed
Implemented DBCSCAN
1 parent 9a74767 commit 3df6c17

3 files changed

Lines changed: 231 additions & 7 deletions

File tree

clusters.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ type HardClusterer interface {
3535
/* Returns sizes of respective clusters */
3636
Sizes() []int
3737

38-
/* Returns mapping from data point indices to cluster index */
38+
/* Returns mapping from data point indices to cluster index. Cluster indices begin at 1, not 0. */
3939
Guesses() []int
4040

4141
/* Returns index of cluster to which the observation was assigned */

dbscan.go

Lines changed: 228 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,239 @@
11
package clusters
22

3+
import (
4+
"sync"
5+
)
6+
37
type dbscanClusterer struct {
4-
iterations int
5-
minpts int
6-
eps float64
8+
minpts int
9+
eps float64
10+
11+
l, s, o, f int
712

813
// For online learning only
914
alpha float64
1015
dimension int
1116

1217
distance DistanceFunc
1318

14-
dataset [][]float64
19+
mu sync.RWMutex
20+
a, b []int
21+
22+
// visited points
23+
v []bool
24+
25+
d [][]float64
26+
}
27+
28+
func DbscanClusterer(minpts int, eps float64, distance DistanceFunc) (HardClusterer, error) {
29+
var d DistanceFunc
30+
{
31+
if distance != nil {
32+
d = distance
33+
} else {
34+
d = EuclideanDistance
35+
}
36+
}
37+
38+
return &dbscanClusterer{
39+
minpts: minpts,
40+
eps: eps,
41+
distance: d,
42+
}, nil
43+
}
44+
45+
func (c *dbscanClusterer) WithOnline(o Online) HardClusterer {
46+
c.alpha = o.Alpha
47+
c.dimension = o.Dimension
48+
49+
c.d = make([][]float64, 0, 100)
50+
51+
return c
52+
}
53+
54+
func (c *dbscanClusterer) Learn(data [][]float64) error {
55+
if len(data) == 0 {
56+
return ErrEmptySet
57+
}
58+
59+
c.mu.Lock()
60+
61+
c.l = len(data)
62+
c.s = numGoroutines(c.l)
63+
c.o = c.s - 1
64+
c.f = c.l / c.s
65+
66+
c.d = data
67+
68+
c.v = make([]bool, c.l)
69+
70+
c.a = make([]int, c.l)
71+
c.b = make([]int, 0)
72+
73+
c.run()
74+
75+
c.v = nil
76+
77+
c.mu.Unlock()
78+
79+
return nil
80+
}
81+
82+
func (c *dbscanClusterer) Sizes() []int {
83+
c.mu.RLock()
84+
defer c.mu.RUnlock()
85+
86+
return c.b
87+
}
88+
89+
func (c *dbscanClusterer) Guesses() []int {
90+
c.mu.RLock()
91+
defer c.mu.RUnlock()
92+
93+
return c.a
94+
}
95+
96+
func (c *dbscanClusterer) Predict(p []float64) int {
97+
var (
98+
l int
99+
d float64
100+
m float64 = c.distance(p, c.d[0])
101+
)
102+
103+
for i := 1; i < len(c.d); i++ {
104+
if d = c.distance(p, c.d[i]); d < m {
105+
m = d
106+
l = i
107+
}
108+
}
109+
110+
return c.a[l]
111+
}
112+
113+
func (c *dbscanClusterer) Online(observations chan []float64, done chan struct{}) chan *HCEvent {
114+
c.mu.Lock()
115+
116+
var (
117+
r chan *HCEvent = make(chan *HCEvent)
118+
)
119+
120+
go func() {
121+
for {
122+
select {
123+
case o := <-observations:
124+
c.d = append(c.d, o)
125+
case <-done:
126+
go func() {
127+
c.mu.Unlock()
128+
}()
129+
130+
return
131+
}
132+
}
133+
}()
134+
135+
return r
136+
}
137+
138+
func (c *dbscanClusterer) run() {
139+
var (
140+
n, m, l, k = 1, 0, 0, 0
141+
ns, nss = make([]int, 0), make([]int, 0)
142+
)
143+
144+
for i := 0; i < c.l; i++ {
145+
if c.v[i] {
146+
continue
147+
}
148+
149+
c.v[i] = true
150+
151+
c.nearest(i, &l, &ns)
152+
153+
if l < c.minpts {
154+
c.a[i] = -1
155+
} else {
156+
c.a[i] = n
157+
158+
c.b = append(c.b, 0)
159+
c.b[m]++
160+
161+
for j := 0; j < l; j++ {
162+
if !c.v[ns[j]] {
163+
c.v[ns[j]] = true
164+
165+
c.nearest(ns[j], &k, &nss)
166+
167+
if k >= c.minpts {
168+
l += k
169+
ns = append(ns, nss...)
170+
}
171+
}
172+
173+
if c.a[ns[j]] == 0 {
174+
c.a[ns[j]] = n
175+
c.b[m]++
176+
}
177+
}
178+
179+
n++
180+
m++
181+
}
182+
}
183+
}
184+
185+
func (c *dbscanClusterer) nearest(p int, l *int, r *[]int) {
186+
var (
187+
m sync.Mutex
188+
w sync.WaitGroup
189+
190+
b int
191+
v []float64 = c.d[p]
192+
)
193+
194+
*r = (*r)[:0]
195+
196+
w.Add(c.s)
197+
198+
for i := 0; i < c.s; i++ {
199+
if i == c.o {
200+
b = c.l - 1
201+
} else {
202+
b = (i + 1) * c.f
203+
}
204+
205+
go func(a, b int) {
206+
for j := a; j < b; j++ {
207+
if c.distance(v, c.d[j]) < c.eps {
208+
m.Lock()
209+
*r = append(*r, j)
210+
m.Unlock()
211+
}
212+
}
213+
214+
w.Done()
215+
}(i*c.f, b)
216+
}
217+
218+
w.Wait()
219+
220+
*l = len(*r)
221+
}
222+
223+
func numGoroutines(a int) int {
224+
if a < 1000 {
225+
return 1
226+
} else if a < 10000 {
227+
return 10
228+
} else if a < 100000 {
229+
return 100
230+
} else if a < 1000000 {
231+
return 1000
232+
} else if a < 10000000 {
233+
return 10000
234+
} else if a < 100000000 {
235+
return 100000
236+
} else {
237+
return 1000000
238+
}
15239
}

kmeans.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ func (c *kmeansClusterer) Learn(data [][]float64) error {
100100
c.check()
101101
}
102102

103-
c.mu.Unlock()
104-
105103
c.n = nil
106104

105+
c.mu.Unlock()
106+
107107
return nil
108108
}
109109

0 commit comments

Comments
 (0)