Skip to content

Commit a078b0f

Browse files
committed
csv to gl conversion in python, .gl file is now 64-byte aligned for AVX-512.
1 parent 8a1d970 commit a078b0f

11 files changed

Lines changed: 300 additions & 97 deletions

File tree

.gitignore

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,4 @@
22
*.gl
33
graph.gl
44
venv/
5-
genGraph-10T
6-
genGraph-100T
5+
*.exe

genGraph-100T

-48.3 KB
Binary file not shown.

genGraph-10T

-101 KB
Binary file not shown.

generateGraph.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,18 @@
1-
#include "csrFilegen.hpp"
1+
#include "src/csrFilegen.hpp"
22

3-
int main(int argc, char const *argv[])
4-
{
5-
generateLargeGraph(100000,0.05f,"graph-100T.gl");
3+
// int main(int argc, char const *argv[])
4+
// {
5+
// // generateLargeGraph(100000,0.05f,"graph-100T.gl");
6+
// return 0;
7+
// }
8+
9+
// csv_to_gl
10+
int main(int argc, char** argv) {
11+
if (argc < 3) {
12+
std::cerr << "Usage: ./converter <input.csv> <output.gl> [directed=0]" << std::endl;
13+
return 1;
14+
}
15+
bool directed = (argc > 3 && std::string(argv[3]) == "1");
16+
convert_csv(argv[1], argv[2], directed);
617
return 0;
7-
}
18+
}

src/CSR.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
#include "MemoryMap.hpp"
44
#include <span>
55
#include <cstddef>
6-
#include <sys/mman.h>
76

87
class CSR{
98
// size_t is the type used for every data!!
@@ -13,9 +12,13 @@ class CSR{
1312
// defined as nnzRow[i] = nnzRow[i-1] + no of non zero row entries of ith row
1413
size_t* nnzRow;
1514
size_t sizeofnnzRow; // size in bytes
15+
size_t N; // num of nodes
1616

1717
size_t* colPtr;
1818
size_t sizeofcolPtr; // size in bytes
19+
size_t M; // num of edges
20+
21+
size_t flags;
1922
public:
2023
CSR(const char* graphPath);
2124
~CSR();
@@ -42,6 +45,9 @@ inline CSR::CSR(const char* graphPath){
4245
}
4346
this->sizeofnnzRow = header.sizeofnnzRow;
4447
this->sizeofcolPtr = header.sizeofcolPtr;
48+
this->N = header.num_nodes;
49+
this->M = header.num_edges;
50+
this->flags = header.flags;
4551

4652
this->nnzRow = reinterpret_cast<size_t*>(this->graphMap->get_data()) + header.offset_nnz/sizeof(size_t);
4753
this->colPtr = reinterpret_cast<size_t*>(this->graphMap->get_data()) + header.offset_col/sizeof(size_t);

src/Graphzero.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,23 +165,35 @@ inline std::vector<size_t> Graphzero::batchRandomWalk(const std::vector<size_t>&
165165
std::vector<size_t> results;
166166
results.reserve(walkLength*startNodes.size());
167167

168+
// set only for random walks
169+
storage->set_access_pattern(true);
170+
168171
#pragma omp parallel for
169172
for(size_t startNode: startNodes){
170173
std::vector<size_t> walk = randomWalk(startNode,walkLength,p,q);
171174
results.insert(results.end(),walk.begin(),walk.end()); // extend the results
172175
}
176+
177+
// reset
178+
storage->set_access_pattern(false);
173179
return results;
174180
}
175181

176182
inline std::vector<size_t> Graphzero::batchRandomUniformWalk(const std::vector<size_t>& startNodes, size_t walkLength){
177183
std::vector<size_t> results;
178184
results.reserve(walkLength*startNodes.size());
185+
186+
// set only for random walks
187+
storage->set_access_pattern(true);
179188

180189
#pragma omp parallel for
181190
for(size_t startNode: startNodes){
182191
std::vector<size_t> walk = ReservoirSampling(startNode,walkLength);
183192
results.insert(results.end(),walk.begin(),walk.end()); // extend the results
184193
}
194+
195+
// reset
196+
storage->set_access_pattern(false);
185197
return results;
186198
}
187199
#endif

src/MemoryMap.hpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#include <stdexcept>
1010

1111
// only here
12-
const size_t MAGIC_NUM = 8388354976772092519; // 'graphlit' converted in size_t
12+
const uint64_t MAGIC_NUM = 8388354976772092519; // 'graphlit' converted in size_t
1313

1414
struct GraphHeader {
1515
uint64_t MAGIC_NUM
@@ -18,7 +18,10 @@ struct GraphHeader {
1818
uint64_t sizeofcolPtr; // Needed to know size of col_indices (M)
1919
uint64_t offset_nnz; // Byte offset where nnzRow start
2020
uint64_t offset_col; // Byte offset where colPtr start
21-
};
21+
uint64_t num_nodes; // Explicit count (N)
22+
uint64_t num_edges; // Explicit count (M)
23+
uint64_t flags; // flags later user
24+
}; // 64 byte header perfect AVX-512 alignment
2225

2326
class MemoryMap
2427
{
@@ -55,6 +58,9 @@ inline MemoryMap::MemoryMap(const char* path){
5558
close(fd); // Clean up the fd we just opened
5659
throw std::runtime_error("mmap failed");
5760
}
61+
62+
// memory advise to use huge pages
63+
madvise(mappedptr,length, MADV_HUGEPAGE);
5864
}
5965

6066
inline MemoryMap::~MemoryMap(){
@@ -69,7 +75,7 @@ inline MemoryMap::~MemoryMap(){
6975
}
7076

7177
fd = -1;
72-
length = 0;
78+
length = 0;
7379
}
7480
inline void* MemoryMap::get_data(){
7581
// get data pointer

src/bindings.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <nanobind/stl/tuple.h>
55
#include <nanobind/stl/string.h>
66
#include "Graphzero.hpp"
7+
#include "csrFilegen.hpp"
78
#include <vector>
89
namespace nb = nanobind;
910

@@ -90,5 +91,12 @@ NB_MODULE(graphzero,m) {
9091
return new Graphzero(filename.c_str());
9192
})
9293
;
93-
94+
95+
// convert csv to gl
96+
m.def("convert_csv_to_gl", &convert_csv,
97+
"Convert a CSV edge list to GraphZero binary format (.gl)",
98+
nb::arg("csv_path"),
99+
nb::arg("out_path"),
100+
nb::arg("directed") = false,
101+
nb::call_guard<nb::gil_scoped_release>());
94102
}

0 commit comments

Comments
 (0)