Skip to content

Commit 4c8a3aa

Browse files
committed
Augamented importer to specify range and size of data to be imported. Added basic test for kmeans clusterer.
1 parent f7598b0 commit 4c8a3aa

7 files changed

Lines changed: 19895 additions & 47 deletions

File tree

data/bus-stops.csv

Lines changed: 19824 additions & 0 deletions
Large diffs are not rendered by default.

dbscan.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ type dbscanClusterer struct {
1919
j chan *rangeJob
2020
m *sync.Mutex
2121
w *sync.WaitGroup
22-
p []float64
2322
r *[]int
23+
p []float64
2424

2525
// visited points
2626
v []bool

errors.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ var (
1111
ErrZeroMinpts = errors.New("MinPts cannot be 0")
1212
ErrZeroWorkers = errors.New("Number of workers cannot be less than 0")
1313
ErrZeroXi = errors.New("Xi cannot be 0")
14+
ErrInvalidRange = errors.New("Range is invalid")
1415
)

importer.go

Lines changed: 15 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package clusters
22

33
import (
44
"bufio"
5-
"bytes"
65
"encoding/csv"
76
"io"
87
"os"
@@ -16,30 +15,26 @@ func NewImporter() *Importer {
1615
return &Importer{}
1716
}
1817

19-
func (i *Importer) Import(file string) ([][]float64, error) {
20-
f, err := os.Open(file)
21-
if err != nil {
22-
return [][]float64{}, err
18+
func (i *Importer) Import(file string, start, end, size int) ([][]float64, error) {
19+
if start < 0 || end < 0 || start > end {
20+
return [][]float64{}, ErrInvalidRange
2321
}
2422

25-
defer f.Close()
26-
27-
b := bufio.NewReader(f)
28-
29-
c, err := i.lineCount(bufio.NewReader(b))
23+
f, err := os.Open(file)
3024
if err != nil {
3125
return [][]float64{}, err
3226
}
3327

34-
f.Seek(0, 0)
35-
b.Reset(f)
28+
defer f.Close()
3629

3730
var (
38-
d = make([][]float64, c)
39-
r = csv.NewReader(b)
40-
k = 0
31+
d = make([][]float64, 0, size)
32+
r = csv.NewReader(bufio.NewReader(f))
33+
s = end - start + 1
34+
g []float64
4135
)
4236

37+
Main:
4338
for {
4439
record, err := r.Read()
4540

@@ -49,39 +44,19 @@ func (i *Importer) Import(file string) ([][]float64, error) {
4944
return [][]float64{}, err
5045
}
5146

52-
d[k] = make([]float64, 0, len(record))
47+
g = make([]float64, 0, s)
5348

54-
for j, _ := range record {
49+
for j := start; j <= end; j++ {
5550
f, err := strconv.ParseFloat(record[j], 64)
5651
if err == nil {
57-
d[k] = append(d[k], f)
52+
g = append(g, f)
5853
} else {
59-
return [][]float64{}, err
54+
continue Main
6055
}
6156
}
6257

63-
k++
58+
d = append(d, g)
6459
}
6560

6661
return d, nil
6762
}
68-
69-
func (*Importer) lineCount(r *bufio.Reader) (int, error) {
70-
var (
71-
buf = make([]byte, 32*1024)
72-
count = 0
73-
lineSep = []byte{'\n'}
74-
)
75-
76-
for {
77-
c, err := r.Read(buf)
78-
count += bytes.Count(buf[:c], lineSep)
79-
80-
switch err {
81-
case io.EOF:
82-
return count, nil
83-
default:
84-
return count, err
85-
}
86-
}
87-
}

importer_test.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ func TestImportedLoadDataOfCorrectLengh(t *testing.T) {
1414
s = 3
1515
)
1616

17-
d, e := i.Import(f)
17+
d, e := i.Import(f, 0, 2, 3)
1818
if e != nil {
19-
t.Errorf("Error importing data: %e", e)
19+
t.Errorf("Error importing data: %s", e.Error())
2020
}
2121

2222
if s != len(d) {
@@ -35,13 +35,13 @@ func TestImportedLoadCorrectData(t *testing.T) {
3535
}
3636
)
3737

38-
d, e := i.Import(f)
38+
d, e := i.Import(f, 0, 2, 3)
3939
if e != nil {
40-
t.Errorf("Error importing data: %e", e)
40+
t.Errorf("Error importing data: %s", e.Error())
4141
}
4242

4343
if !fsliceEqual(d, s) {
44-
t.Error("Imported data mismatch")
44+
t.Error("Imported data mismatch: %v vs %v", d, s)
4545
}
4646
}
4747

@@ -64,3 +64,17 @@ func fsliceEqual(a, b [][]float64) bool {
6464

6565
return true
6666
}
67+
68+
func BenchmarkImport(b *testing.B) {
69+
var (
70+
f = "data/bus-stops.csv"
71+
i = NewImporter()
72+
)
73+
74+
b.ResetTimer()
75+
76+
_, e := i.Import(f, 4, 5, 15000)
77+
if e != nil {
78+
b.Errorf("Error importing data: %s", e.Error())
79+
}
80+
}

kmeans_test.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package clusters
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestKmeansClusterNumerMatches(t *testing.T) {
8+
const (
9+
C = 8
10+
)
11+
12+
var (
13+
f = "data/bus-stops.csv"
14+
i = NewImporter()
15+
)
16+
17+
d, e := i.Import(f, 4, 5, 15000)
18+
if e != nil {
19+
t.Errorf("Error importing data: %s", e.Error())
20+
}
21+
22+
c, e := KMeans(1000, C, EuclideanDistance)
23+
if e != nil {
24+
t.Errorf("Error initializing kmeans clusterer: %s", e.Error())
25+
}
26+
27+
if e = c.Learn(d); e != nil {
28+
t.Errorf("Error learning data: %s", e.Error())
29+
}
30+
31+
if len(c.Sizes()) != C {
32+
t.Errorf("Number of clusters does not match: %d vs %d", len(c.Sizes()), C)
33+
}
34+
}

optics.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ type opticsClusterer struct {
3030
c chan *clusterJob
3131
m *sync.Mutex
3232
w *sync.WaitGroup
33-
p []float64
3433
r *[]int
34+
p []float64
3535

3636
// visited points
3737
v []bool

0 commit comments

Comments
 (0)