@@ -5,11 +5,19 @@ import (
55 "sync"
66)
77
8+ type steepDownArea struct {
9+ start , end int
10+ mib float64
11+ }
12+
813type opticsClusterer struct {
914 minpts int
1015 workers int
1116 eps float64
1217
18+ // 1 - xi
19+ x float64
20+
1321 distance DistanceFunc
1422
1523 // slices holding the cluster mapping and sizes
@@ -38,7 +46,7 @@ type opticsClusterer struct {
3846}
3947
4048/* Implementation of OPTICS algorithm with concurrent nearest neighbour computation */
41- func OPTICS (minpts int , eps float64 , workers int , distance DistanceFunc ) (HardClusterer , error ) {
49+ func OPTICS (minpts int , eps , xi float64 , workers int , distance DistanceFunc ) (HardClusterer , error ) {
4250 if minpts < 1 {
4351 return nil , ErrZeroMinpts
4452 }
@@ -51,6 +59,10 @@ func OPTICS(minpts int, eps float64, workers int, distance DistanceFunc) (HardCl
5159 return nil , ErrZeroEpsilon
5260 }
5361
62+ if xi <= 0 {
63+ return nil , ErrZeroXi
64+ }
65+
5466 var d DistanceFunc
5567 {
5668 if distance != nil {
@@ -64,6 +76,7 @@ func OPTICS(minpts int, eps float64, workers int, distance DistanceFunc) (HardCl
6476 minpts : minpts ,
6577 workers : workers ,
6678 eps : eps ,
79+ x : 1 - xi ,
6780 distance : d ,
6881 }, nil
6982}
@@ -103,13 +116,14 @@ func (c *opticsClusterer) Learn(data [][]float64) error {
103116 c .endWorkers ()
104117
105118 c .v = nil
106- c .re = nil
107- c .so = nil
108- c .m = nil
109- c .w = nil
110119 c .p = nil
111120 c .r = nil
112121
122+ c .extract ()
123+
124+ c .re = nil
125+ c .so = nil
126+
113127 c .mu .Unlock ()
114128
115129 return nil
@@ -230,6 +244,128 @@ func (c *opticsClusterer) update(p int, l *int, r *[]int, q *priorityQueue) {
230244 }
231245}
232246
247+ func (c * opticsClusterer ) extract () {
248+ var (
249+ i , e int
250+ mib float64
251+ areas []* steepDownArea = make ([]* steepDownArea , 0 )
252+ )
253+
254+ for i < c .l - 1 {
255+ if c.re [c.so [i ]] == nil || c .re [c .so [i + 1 ]] == nil {
256+ continue
257+ }
258+
259+ mib = math .Max (mib , c.re [c.so [i ]].p )
260+
261+ if c .isSteepDown (i , & e ) {
262+ as := areas [:0 ]
263+ for j := 0 ; j < len (areas ); j ++ {
264+ if c .re [c .so [areas [j ].start ]].p * c .x < mib {
265+ continue
266+ }
267+
268+ as = append (as , & steepDownArea {
269+ start : areas [j ].start ,
270+ end : areas [j ].end ,
271+ mib : math .Max (areas [j ].mib , mib ),
272+ })
273+ }
274+ areas = as
275+
276+ areas = append (areas , & steepDownArea {
277+ start : i ,
278+ end : e ,
279+ })
280+
281+ i = e + 1
282+ mib = c.re [c.so [i ]].p
283+
284+ } else if c .isSteepUp (i , & e ) {
285+ as := areas [:0 ]
286+ for j := 0 ; j < len (areas ); j ++ {
287+ if c .re [c .so [areas [j ].start ]].p * c .x < mib {
288+ continue
289+ }
290+
291+ as = append (as , & steepDownArea {
292+ start : areas [j ].start ,
293+ end : areas [j ].end ,
294+ mib : math .Max (areas [j ].mib , mib ),
295+ })
296+ }
297+ areas = as
298+
299+ i = e + 1
300+ mib = c.re [c.so [i ]].p
301+
302+ for j := 0 ; j < len (areas ); j ++ {
303+ // check for cluster satisfying conditions outlined by Ankerst at al.
304+ }
305+ } else {
306+ i ++
307+ }
308+ }
309+ }
310+
311+ func (c * opticsClusterer ) isSteepDown (i int , e * int ) bool {
312+ if c.re [c.so [i ]].p * c .x > c .re [c .so [i + 1 ]].p {
313+ return false
314+ }
315+
316+ var counter , j int = 0 , i + 1
317+
318+ * e = j
319+
320+ for {
321+ if c.re [c.so [j ]].p < c .re [c .so [j + 1 ]].p {
322+ break
323+ }
324+
325+ if c.re [c.so [j ]].p * c .x <= c .re [c .so [j + 1 ]].p {
326+ * e = j
327+ counter = 0
328+ } else {
329+ counter ++
330+ }
331+
332+ if counter > c .minpts {
333+ break
334+ }
335+ }
336+
337+ return * e != j
338+ }
339+
340+ func (c * opticsClusterer ) isSteepUp (i int , e * int ) bool {
341+ if c.re [c.so [i ]].p > c .re [c .so [i + 1 ]].p * c .x {
342+ return false
343+ }
344+
345+ var counter , j int = 0 , i + 1
346+
347+ * e = j
348+
349+ for {
350+ if c.re [c.so [j ]].p > c .re [c .so [j + 1 ]].p {
351+ break
352+ }
353+
354+ if c.re [c.so [j ]].p <= c .re [c .so [j + 1 ]].p * c .x {
355+ * e = j
356+ counter = 0
357+ } else {
358+ counter ++
359+ }
360+
361+ if counter > c .minpts {
362+ break
363+ }
364+ }
365+
366+ return * e != j
367+ }
368+
233369/* Divide work among c.s workers, where c.s is determined
234370 * by the size of the data. This is based on an assumption that neighbour points of p
235371 * are located in relatively small subsection of the input data, so the dataset can be scanned
@@ -244,15 +380,15 @@ func (c *opticsClusterer) nearest(p int, l *int, r *[]int) {
244380
245381 c .w .Add (c .s )
246382
247- for i := 0 ; i < c .s ; i ++ {
248- if i == c .o {
383+ for i := 0 ; i < c .l ; i += c . f {
384+ if c . l - i <= c .f {
249385 b = c .l - 1
250386 } else {
251- b = ( i + 1 ) * c .f
387+ b = i + c .f
252388 }
253389
254390 c .j <- & rangeJob {
255- a : i * c . f ,
391+ a : i ,
256392 b : b ,
257393 }
258394 }
@@ -275,6 +411,11 @@ func (c *opticsClusterer) startWorkers() {
275411
276412func (c * opticsClusterer ) endWorkers () {
277413 close (c .j )
414+
415+ c .j = nil
416+
417+ c .m = nil
418+ c .w = nil
278419}
279420
280421func (c * opticsClusterer ) nearestWorker () {
@@ -300,10 +441,8 @@ func (c *opticsClusterer) numWorkers() int {
300441 b = 10
301442 } else if c .l < 100000 {
302443 b = 100
303- } else if c .l < 1000000 {
304- b = 1000
305444 } else {
306- b = 10000
445+ b = 1000
307446 }
308447
309448 if c .workers == 0 {
0 commit comments