@@ -9,34 +9,29 @@ type dbscanClusterer struct {
99 workers int
1010 eps float64
1111
12- l , s , o , f int
13-
1412 distance DistanceFunc
1513
14+ // slices holding the cluster mapping and sizes
1615 mu sync.RWMutex
1716 a , b []int
1817
19- // channel for distributed searching for nearest neighbours
20- j chan * nearestJob
21-
22- // variabes for calculating nearest neighbours concurrently
23- m * sync.Mutex
24- w * sync.WaitGroup
25- p * []float64
26- r * []int
18+ // variables used for concurrent computation of nearest neighbours
19+ l , s , o , f int
20+ j chan * rangeJob
21+ m * sync.Mutex
22+ w * sync.WaitGroup
23+ p * []float64
24+ r * []int
2725
2826 // visited points
2927 v []bool
3028
29+ // dataset
3130 d [][]float64
3231}
3332
34- type nearestJob struct {
35- a , b int
36- }
37-
38- /* Implementation of DBSCAN algorithm with concurrent moditication */
39- func DbscanClusterer (minpts int , eps float64 , workers int , distance DistanceFunc ) (HardClusterer , error ) {
33+ /* Implementation of DBSCAN algorithm with concurrent nearest neighbour computation */
34+ func DBSCAN (minpts int , eps float64 , workers int , distance DistanceFunc ) (HardClusterer , error ) {
4035 if minpts < 1 {
4136 return nil , ErrZeroMinpts
4237 }
@@ -159,7 +154,7 @@ func (c *dbscanClusterer) run() {
159154
160155 c .v [i ] = true
161156
162- c .nearest (& i , & l , & ns )
157+ c .nearest (i , & l , & ns )
163158
164159 if l < c .minpts {
165160 c .a [i ] = - 1
@@ -173,7 +168,7 @@ func (c *dbscanClusterer) run() {
173168 if ! c.v [ns [j ]] {
174169 c.v [ns [j ]] = true
175170
176- c .nearest (& ns [j ], & k , & nss )
171+ c .nearest (ns [j ], & k , & nss )
177172
178173 if k >= c .minpts {
179174 l += k
@@ -197,12 +192,12 @@ func (c *dbscanClusterer) run() {
197192 * by the size of the data. This is based on an assumption that neighbour points of p
198193 * are located in relatively small subsection of the input data, so the dataset can be scanned
199194 * concurrently without blocking a big number of goroutines trying to write to r */
200- func (c * dbscanClusterer ) nearest (p * int , l * int , r * []int ) {
195+ func (c * dbscanClusterer ) nearest (p int , l * int , r * []int ) {
201196 var b int
202197
203198 * r = (* r )[:0 ]
204199
205- c .p = & c .d [* p ]
200+ c .p = & c .d [p ]
206201 c .r = r
207202
208203 c .w .Add (c .s )
@@ -214,7 +209,7 @@ func (c *dbscanClusterer) nearest(p *int, l *int, r *[]int) {
214209 b = (i + 1 ) * c .f
215210 }
216211
217- c .j <- & nearestJob {
212+ c .j <- & rangeJob {
218213 a : i * c .f ,
219214 b : b ,
220215 }
@@ -226,7 +221,7 @@ func (c *dbscanClusterer) nearest(p *int, l *int, r *[]int) {
226221}
227222
228223func (c * dbscanClusterer ) startWorkers () {
229- c .j = make (chan * nearestJob , c .l )
224+ c .j = make (chan * rangeJob , c .l )
230225
231226 c .m = & sync.Mutex {}
232227 c .w = & sync.WaitGroup {}
@@ -255,18 +250,15 @@ func (c *dbscanClusterer) nearestWorker() {
255250}
256251
257252func (c * dbscanClusterer ) numWorkers () int {
258- var (
259- a int = c .l
260- b int
261- )
253+ var b int
262254
263- if a < 1000 {
255+ if c . l < 1000 {
264256 b = 1
265- } else if a < 10000 {
257+ } else if c . l < 10000 {
266258 b = 10
267- } else if a < 100000 {
259+ } else if c . l < 100000 {
268260 b = 100
269- } else if a < 1000000 {
261+ } else if c . l < 1000000 {
270262 b = 1000
271263 } else {
272264 b = 10000
0 commit comments