Skip to content

Commit 3b63066

Browse files
committed
Implemented OPTICS algorithm
1 parent 7daa0dd commit 3b63066

4 files changed

Lines changed: 354 additions & 102 deletions

File tree

common.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package clusters
2+
3+
import (
4+
"container/heap"
5+
)
6+
7+
// struct denoting start and end indices of database portion to be scanned for nearest neighbours by workers in DBSCAN and OPTICS
8+
type rangeJob struct {
9+
a, b int
10+
}
11+
12+
// priority queue
13+
type pItem struct {
14+
v int
15+
p float64
16+
i int
17+
}
18+
19+
type priorityQueue []*pItem
20+
21+
func newPriorityQueue(size int) priorityQueue {
22+
q := make(priorityQueue, 0, size)
23+
heap.Init(&q)
24+
25+
return q
26+
}
27+
28+
func (pq priorityQueue) Len() int { return len(pq) }
29+
30+
func (pq priorityQueue) Less(i, j int) bool {
31+
return pq[i].p > pq[j].p
32+
}
33+
34+
func (pq priorityQueue) Swap(i, j int) {
35+
pq[i], pq[j] = pq[j], pq[i]
36+
pq[i].i = i
37+
pq[j].i = j
38+
}
39+
40+
func (pq *priorityQueue) Push(x interface{}) {
41+
n := len(*pq)
42+
item := x.(*pItem)
43+
item.i = n
44+
*pq = append(*pq, item)
45+
heap.Fix(pq, item.i)
46+
}
47+
48+
func (pq *priorityQueue) Pop() interface{} {
49+
old := *pq
50+
n := len(old)
51+
item := old[n-1]
52+
item.i = -1
53+
*pq = old[0 : n-1]
54+
return item
55+
}
56+
57+
func (pq *priorityQueue) NotEmpty() bool {
58+
return len(*pq) > 0
59+
}
60+
61+
func (pq *priorityQueue) Update(item *pItem, value int, priority float64) {
62+
item.v = value
63+
item.p = priority
64+
heap.Fix(pq, item.i)
65+
}

dbscan.go

Lines changed: 22 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,34 +9,29 @@ type dbscanClusterer struct {
99
workers int
1010
eps float64
1111

12-
l, s, o, f int
13-
1412
distance DistanceFunc
1513

14+
// slices holding the cluster mapping and sizes
1615
mu sync.RWMutex
1716
a, b []int
1817

19-
// channel for distributed searching for nearest neighbours
20-
j chan *nearestJob
21-
22-
// variabes for calculating nearest neighbours concurrently
23-
m *sync.Mutex
24-
w *sync.WaitGroup
25-
p *[]float64
26-
r *[]int
18+
// variables used for concurrent computation of nearest neighbours
19+
l, s, o, f int
20+
j chan *rangeJob
21+
m *sync.Mutex
22+
w *sync.WaitGroup
23+
p *[]float64
24+
r *[]int
2725

2826
// visited points
2927
v []bool
3028

29+
// dataset
3130
d [][]float64
3231
}
3332

34-
type nearestJob struct {
35-
a, b int
36-
}
37-
38-
/* Implementation of DBSCAN algorithm with concurrent moditication */
39-
func DbscanClusterer(minpts int, eps float64, workers int, distance DistanceFunc) (HardClusterer, error) {
33+
/* Implementation of DBSCAN algorithm with concurrent nearest neighbour computation */
34+
func DBSCAN(minpts int, eps float64, workers int, distance DistanceFunc) (HardClusterer, error) {
4035
if minpts < 1 {
4136
return nil, ErrZeroMinpts
4237
}
@@ -159,7 +154,7 @@ func (c *dbscanClusterer) run() {
159154

160155
c.v[i] = true
161156

162-
c.nearest(&i, &l, &ns)
157+
c.nearest(i, &l, &ns)
163158

164159
if l < c.minpts {
165160
c.a[i] = -1
@@ -173,7 +168,7 @@ func (c *dbscanClusterer) run() {
173168
if !c.v[ns[j]] {
174169
c.v[ns[j]] = true
175170

176-
c.nearest(&ns[j], &k, &nss)
171+
c.nearest(ns[j], &k, &nss)
177172

178173
if k >= c.minpts {
179174
l += k
@@ -197,12 +192,12 @@ func (c *dbscanClusterer) run() {
197192
* by the size of the data. This is based on an assumption that neighbour points of p
198193
* are located in relatively small subsection of the input data, so the dataset can be scanned
199194
* concurrently without blocking a big number of goroutines trying to write to r */
200-
func (c *dbscanClusterer) nearest(p *int, l *int, r *[]int) {
195+
func (c *dbscanClusterer) nearest(p int, l *int, r *[]int) {
201196
var b int
202197

203198
*r = (*r)[:0]
204199

205-
c.p = &c.d[*p]
200+
c.p = &c.d[p]
206201
c.r = r
207202

208203
c.w.Add(c.s)
@@ -214,7 +209,7 @@ func (c *dbscanClusterer) nearest(p *int, l *int, r *[]int) {
214209
b = (i + 1) * c.f
215210
}
216211

217-
c.j <- &nearestJob{
212+
c.j <- &rangeJob{
218213
a: i * c.f,
219214
b: b,
220215
}
@@ -226,7 +221,7 @@ func (c *dbscanClusterer) nearest(p *int, l *int, r *[]int) {
226221
}
227222

228223
func (c *dbscanClusterer) startWorkers() {
229-
c.j = make(chan *nearestJob, c.l)
224+
c.j = make(chan *rangeJob, c.l)
230225

231226
c.m = &sync.Mutex{}
232227
c.w = &sync.WaitGroup{}
@@ -255,18 +250,15 @@ func (c *dbscanClusterer) nearestWorker() {
255250
}
256251

257252
func (c *dbscanClusterer) numWorkers() int {
258-
var (
259-
a int = c.l
260-
b int
261-
)
253+
var b int
262254

263-
if a < 1000 {
255+
if c.l < 1000 {
264256
b = 1
265-
} else if a < 10000 {
257+
} else if c.l < 10000 {
266258
b = 10
267-
} else if a < 100000 {
259+
} else if c.l < 100000 {
268260
b = 100
269-
} else if a < 1000000 {
261+
} else if c.l < 1000000 {
270262
b = 1000
271263
} else {
272264
b = 10000

kmeans.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ type kmeansClusterer struct {
1818
iterations int
1919
number int
2020

21-
// Variables keeping count of changes of points' membership every iteration. User as a stopping condition.
21+
// variables keeping count of changes of points' membership every iteration. User as a stopping condition.
2222
changes, oldchanges, counter, threshold int
2323

2424
// For online learning only
@@ -27,17 +27,18 @@ type kmeansClusterer struct {
2727

2828
distance DistanceFunc
2929

30-
// a holds the mapping of data point indices to cluster numbers, b holds the sizes of each cluster
30+
// slices holding the cluster mapping and sizes
3131
mu sync.RWMutex
3232
a, b []int
3333

34-
// variables holding values of centroids of each clusters
34+
// slices holding values of centroids of each clusters
3535
m, n [][]float64
3636

37+
// dataset
3738
d [][]float64
3839
}
3940

40-
func KmeansClusterer(iterations, clusters int, distance DistanceFunc) (HardClusterer, error) {
41+
func KMeans(iterations, clusters int, distance DistanceFunc) (HardClusterer, error) {
4142
if iterations < 1 {
4243
return nil, ErrZeroIterations
4344
}

0 commit comments

Comments
 (0)