Skip to content

Commit 503e120

Browse files
committed
Added data importer module
1 parent 537acdb commit 503e120

3 files changed

Lines changed: 153 additions & 0 deletions

File tree

data/test.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
0.1,0.2,0.3
2+
0.4,0.5,0.6
3+
0.7,0.8,0.9

importer.go

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
package clusters
2+
3+
import (
4+
"bufio"
5+
"bytes"
6+
"encoding/csv"
7+
"io"
8+
"os"
9+
"strconv"
10+
)
11+
12+
type Importer struct {
13+
}
14+
15+
func NewImporter() *Importer {
16+
return &Importer{}
17+
}
18+
19+
func (i *Importer) Import(file string) ([][]float64, error) {
20+
f, err := os.Open(file)
21+
if err != nil {
22+
return [][]float64{}, err
23+
}
24+
25+
defer f.Close()
26+
27+
c, err := i.lineCount(bufio.NewReader(f))
28+
if err != nil {
29+
return [][]float64{}, err
30+
}
31+
32+
f.Seek(0, 0)
33+
34+
var (
35+
d = make([][]float64, c)
36+
r = csv.NewReader(f)
37+
k = 0
38+
)
39+
40+
for {
41+
record, err := r.Read()
42+
43+
if err == io.EOF {
44+
break
45+
} else if err != nil {
46+
return [][]float64{}, err
47+
}
48+
49+
d[k] = make([]float64, 0, len(record))
50+
51+
for j, _ := range record {
52+
f, err := strconv.ParseFloat(record[j], 64)
53+
if err == nil {
54+
d[k] = append(d[k], f)
55+
} else {
56+
return [][]float64{}, err
57+
}
58+
}
59+
60+
k++
61+
}
62+
63+
return d, nil
64+
}
65+
66+
func (*Importer) lineCount(r *bufio.Reader) (int, error) {
67+
var (
68+
buf = make([]byte, 32*1024)
69+
count = 0
70+
lineSep = []byte{'\n'}
71+
)
72+
73+
for {
74+
c, err := r.Read(buf)
75+
count += bytes.Count(buf[:c], lineSep)
76+
77+
switch err {
78+
case io.EOF:
79+
return count, nil
80+
default:
81+
return count, err
82+
}
83+
}
84+
}

importer_test.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package clusters
2+
3+
import (
4+
"math"
5+
"testing"
6+
)
7+
8+
const TOLERANCE = 0.000001
9+
10+
func TestImportedLoadDataOfCorrectLengh(t *testing.T) {
11+
var (
12+
f = "data/test.csv"
13+
i = NewImporter()
14+
s = 3
15+
)
16+
17+
d, e := i.Import(f)
18+
if e != nil {
19+
t.Errorf("Error importing data: %e", e)
20+
}
21+
22+
if s != len(d) {
23+
t.Errorf("Imported data size mismatch: %d vs %d", s, len(d))
24+
}
25+
}
26+
27+
func TestImportedLoadCorrectData(t *testing.T) {
28+
var (
29+
f = "data/test.csv"
30+
i = NewImporter()
31+
s = [][]float64{
32+
[]float64{0.1, 0.2, 0.3},
33+
[]float64{0.4, 0.5, 0.6},
34+
[]float64{0.7, 0.8, 0.9},
35+
}
36+
)
37+
38+
d, e := i.Import(f)
39+
if e != nil {
40+
t.Errorf("Error importing data: %e", e)
41+
}
42+
43+
if !fsliceEqual(d, s) {
44+
t.Error("Imported data mismatch")
45+
}
46+
}
47+
48+
func fsliceEqual(a, b [][]float64) bool {
49+
if len(a) != len(b) {
50+
return false
51+
}
52+
53+
for i := 0; i < len(a); i++ {
54+
if len(a[i]) != len(b[i]) {
55+
return false
56+
}
57+
58+
for j := 0; j < len(a[i]); j++ {
59+
if d := math.Abs(a[i][j] - b[i][j]); d > TOLERANCE {
60+
return false
61+
}
62+
}
63+
}
64+
65+
return true
66+
}

0 commit comments

Comments
 (0)