Skip to content

Commit f533249

Browse files
committed
Started work on ExtractClusters algorithm for OPTICS proposed by Ankerst et al.
1 parent 3b63066 commit f533249

3 files changed

Lines changed: 162 additions & 21 deletions

File tree

dbscan.go

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,6 @@ func (c *dbscanClusterer) Learn(data [][]float64) error {
9595
c.endWorkers()
9696

9797
c.v = nil
98-
c.m = nil
99-
c.w = nil
10098
c.p = nil
10199
c.r = nil
102100

@@ -202,15 +200,15 @@ func (c *dbscanClusterer) nearest(p int, l *int, r *[]int) {
202200

203201
c.w.Add(c.s)
204202

205-
for i := 0; i < c.s; i++ {
206-
if i == c.o {
203+
for i := 0; i < c.l; i += c.f {
204+
if c.l-i <= c.f {
207205
b = c.l - 1
208206
} else {
209-
b = (i + 1) * c.f
207+
b = i + c.f
210208
}
211209

212210
c.j <- &rangeJob{
213-
a: i * c.f,
211+
a: i,
214212
b: b,
215213
}
216214
}
@@ -233,6 +231,11 @@ func (c *dbscanClusterer) startWorkers() {
233231

234232
func (c *dbscanClusterer) endWorkers() {
235233
close(c.j)
234+
235+
c.j = nil
236+
237+
c.m = nil
238+
c.w = nil
236239
}
237240

238241
func (c *dbscanClusterer) nearestWorker() {
@@ -258,10 +261,8 @@ func (c *dbscanClusterer) numWorkers() int {
258261
b = 10
259262
} else if c.l < 100000 {
260263
b = 100
261-
} else if c.l < 1000000 {
262-
b = 1000
263264
} else {
264-
b = 10000
265+
b = 1000
265266
}
266267

267268
if c.workers == 0 {

errors.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ var (
1010
ErrZeroEpsilon = errors.New("Epsilon cannot be 0")
1111
ErrZeroMinpts = errors.New("MinPts cannot be 0")
1212
ErrZeroWorkers = errors.New("Number of workers cannot be less than 0")
13+
ErrZeroXi = errors.New("Xi cannot be 0")
1314
)

optics.go

Lines changed: 151 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,19 @@ import (
55
"sync"
66
)
77

8+
type steepDownArea struct {
9+
start, end int
10+
mib float64
11+
}
12+
813
type opticsClusterer struct {
914
minpts int
1015
workers int
1116
eps float64
1217

18+
// 1 - xi
19+
x float64
20+
1321
distance DistanceFunc
1422

1523
// slices holding the cluster mapping and sizes
@@ -38,7 +46,7 @@ type opticsClusterer struct {
3846
}
3947

4048
/* Implementation of OPTICS algorithm with concurrent nearest neighbour computation */
41-
func OPTICS(minpts int, eps float64, workers int, distance DistanceFunc) (HardClusterer, error) {
49+
func OPTICS(minpts int, eps, xi float64, workers int, distance DistanceFunc) (HardClusterer, error) {
4250
if minpts < 1 {
4351
return nil, ErrZeroMinpts
4452
}
@@ -51,6 +59,10 @@ func OPTICS(minpts int, eps float64, workers int, distance DistanceFunc) (HardCl
5159
return nil, ErrZeroEpsilon
5260
}
5361

62+
if xi <= 0 {
63+
return nil, ErrZeroXi
64+
}
65+
5466
var d DistanceFunc
5567
{
5668
if distance != nil {
@@ -64,6 +76,7 @@ func OPTICS(minpts int, eps float64, workers int, distance DistanceFunc) (HardCl
6476
minpts: minpts,
6577
workers: workers,
6678
eps: eps,
79+
x: 1 - xi,
6780
distance: d,
6881
}, nil
6982
}
@@ -103,13 +116,14 @@ func (c *opticsClusterer) Learn(data [][]float64) error {
103116
c.endWorkers()
104117

105118
c.v = nil
106-
c.re = nil
107-
c.so = nil
108-
c.m = nil
109-
c.w = nil
110119
c.p = nil
111120
c.r = nil
112121

122+
c.extract()
123+
124+
c.re = nil
125+
c.so = nil
126+
113127
c.mu.Unlock()
114128

115129
return nil
@@ -230,6 +244,128 @@ func (c *opticsClusterer) update(p int, l *int, r *[]int, q *priorityQueue) {
230244
}
231245
}
232246

247+
func (c *opticsClusterer) extract() {
248+
var (
249+
i, e int
250+
mib float64
251+
areas []*steepDownArea = make([]*steepDownArea, 0)
252+
)
253+
254+
for i < c.l-1 {
255+
if c.re[c.so[i]] == nil || c.re[c.so[i+1]] == nil {
256+
continue
257+
}
258+
259+
mib = math.Max(mib, c.re[c.so[i]].p)
260+
261+
if c.isSteepDown(i, &e) {
262+
as := areas[:0]
263+
for j := 0; j < len(areas); j++ {
264+
if c.re[c.so[areas[j].start]].p*c.x < mib {
265+
continue
266+
}
267+
268+
as = append(as, &steepDownArea{
269+
start: areas[j].start,
270+
end: areas[j].end,
271+
mib: math.Max(areas[j].mib, mib),
272+
})
273+
}
274+
areas = as
275+
276+
areas = append(areas, &steepDownArea{
277+
start: i,
278+
end: e,
279+
})
280+
281+
i = e + 1
282+
mib = c.re[c.so[i]].p
283+
284+
} else if c.isSteepUp(i, &e) {
285+
as := areas[:0]
286+
for j := 0; j < len(areas); j++ {
287+
if c.re[c.so[areas[j].start]].p*c.x < mib {
288+
continue
289+
}
290+
291+
as = append(as, &steepDownArea{
292+
start: areas[j].start,
293+
end: areas[j].end,
294+
mib: math.Max(areas[j].mib, mib),
295+
})
296+
}
297+
areas = as
298+
299+
i = e + 1
300+
mib = c.re[c.so[i]].p
301+
302+
for j := 0; j < len(areas); j++ {
303+
// check for cluster satisfying conditions outlined by Ankerst at al.
304+
}
305+
} else {
306+
i++
307+
}
308+
}
309+
}
310+
311+
func (c *opticsClusterer) isSteepDown(i int, e *int) bool {
312+
if c.re[c.so[i]].p*c.x > c.re[c.so[i+1]].p {
313+
return false
314+
}
315+
316+
var counter, j int = 0, i + 1
317+
318+
*e = j
319+
320+
for {
321+
if c.re[c.so[j]].p < c.re[c.so[j+1]].p {
322+
break
323+
}
324+
325+
if c.re[c.so[j]].p*c.x <= c.re[c.so[j+1]].p {
326+
*e = j
327+
counter = 0
328+
} else {
329+
counter++
330+
}
331+
332+
if counter > c.minpts {
333+
break
334+
}
335+
}
336+
337+
return *e != j
338+
}
339+
340+
func (c *opticsClusterer) isSteepUp(i int, e *int) bool {
341+
if c.re[c.so[i]].p > c.re[c.so[i+1]].p*c.x {
342+
return false
343+
}
344+
345+
var counter, j int = 0, i + 1
346+
347+
*e = j
348+
349+
for {
350+
if c.re[c.so[j]].p > c.re[c.so[j+1]].p {
351+
break
352+
}
353+
354+
if c.re[c.so[j]].p <= c.re[c.so[j+1]].p*c.x {
355+
*e = j
356+
counter = 0
357+
} else {
358+
counter++
359+
}
360+
361+
if counter > c.minpts {
362+
break
363+
}
364+
}
365+
366+
return *e != j
367+
}
368+
233369
/* Divide work among c.s workers, where c.s is determined
234370
* by the size of the data. This is based on an assumption that neighbour points of p
235371
* are located in relatively small subsection of the input data, so the dataset can be scanned
@@ -244,15 +380,15 @@ func (c *opticsClusterer) nearest(p int, l *int, r *[]int) {
244380

245381
c.w.Add(c.s)
246382

247-
for i := 0; i < c.s; i++ {
248-
if i == c.o {
383+
for i := 0; i < c.l; i += c.f {
384+
if c.l-i <= c.f {
249385
b = c.l - 1
250386
} else {
251-
b = (i + 1) * c.f
387+
b = i + c.f
252388
}
253389

254390
c.j <- &rangeJob{
255-
a: i * c.f,
391+
a: i,
256392
b: b,
257393
}
258394
}
@@ -275,6 +411,11 @@ func (c *opticsClusterer) startWorkers() {
275411

276412
func (c *opticsClusterer) endWorkers() {
277413
close(c.j)
414+
415+
c.j = nil
416+
417+
c.m = nil
418+
c.w = nil
278419
}
279420

280421
func (c *opticsClusterer) nearestWorker() {
@@ -300,10 +441,8 @@ func (c *opticsClusterer) numWorkers() int {
300441
b = 10
301442
} else if c.l < 100000 {
302443
b = 100
303-
} else if c.l < 1000000 {
304-
b = 1000
305444
} else {
306-
b = 10000
445+
b = 1000
307446
}
308447

309448
if c.workers == 0 {

0 commit comments

Comments
 (0)