55#include < nanobind/stl/string.h>
66#include " Graphzero.hpp"
77#include " csrFilegen.hpp"
8+ #include " featureFilegen.hpp"
9+ #include " FeatureStore.hpp"
810#include < vector>
911namespace nb = nanobind;
1012
@@ -23,9 +25,9 @@ It holds the mmap / zero-copy memory.
2325)doc" ,
2426 nb::arg (" filename" )
2527 )
26- .def_rw (" num_nodes" ,&Graphzero::num_nodes)
27- .def_rw (" num_edges" ,&Graphzero::num_edges)
28- .def_rw (" has_weights" ,&Graphzero::has_weights)
28+ .def_ro (" num_nodes" ,&Graphzero::num_nodes)
29+ .def_ro (" num_edges" ,&Graphzero::num_edges)
30+ .def_ro (" has_weights" ,&Graphzero::has_weights)
2931
3032
3133 .def (" get_degree" , [](Graphzero &self, int64_t node_id) {
@@ -215,9 +217,159 @@ R"doc(Performs uniform random neighbour sampling for a node.
215217
216218 // convert csv to gl
217219 m.def (" convert_csv_to_gl" , &convert_csv,
218- " Convert a CSV edge list to GraphZero binary format (.gl)" ,
220+ R"doc( Convert a CSV edge list to GraphZero binary format (.gl))doc " ,
219221 nb::arg (" csv_path" ),
220222 nb::arg (" out_path" ),
221223 nb::arg (" directed" ) = false ,
222224 nb::call_guard<nb::gil_scoped_release>());
225+
226+ // Feature store class
227+
228+ // Bind the DataType Enum so Python can pass it to the converter
229+ nb::enum_<DataType>(m, " DataType" )
230+ .value (" INT32" , DataType::INT32)
231+ .value (" INT64" , DataType::INT64)
232+ .value (" FLOAT32" , DataType::FLOAT32)
233+ .value (" FLOAT64" , DataType::FLOAT64)
234+ .export_values ();
235+
236+ nb::class_<FeatureStore>(m, " FeatureStore" )
237+ .def (nb::init<const char *>(), // constructor
238+ R"doc( Data Class contains the Datafile and its relevant functions and methods.
239+ It holds the mmap / zero-copy memory.
240+ Args:
241+ filename (str): either absolute path or relative path (depends on the current working directory).
242+ Returns:
243+ FeatureStorage instance.
244+ )doc" ,
245+ nb::arg (" filename" )
246+ )
247+ .def_ro (" num_nodes" ,&FeatureStore::num_nodes)
248+ .def_ro (" feature_dim" ,&FeatureStore::feature_dim)
249+ .def (" get_data" ,[](FeatureStore &self,int64_t nodeId) -> nb::object {
250+ switch (self.get_dtype ()){
251+ case DataType::INT32: {
252+ auto data = self.get_data <int32_t >(nodeId);
253+ auto arr = nb::ndarray<nb::numpy, int32_t , nb::shape<1 >>(
254+ const_cast <int32_t *>(data.data ()), // pointer to data
255+ { data.size () } // shape
256+ );
257+ return nb::cast (arr);
258+ }
259+ case DataType::INT64: {
260+
261+ auto data = self.get_data <int64_t >(nodeId);
262+ auto arr = nb::ndarray<nb::numpy, int64_t , nb::shape<1 >>(
263+ const_cast <int64_t *>(data.data ()), // pointer to data
264+ { data.size () } // shape
265+ );
266+ return nb::cast (arr);
267+ }
268+ case DataType::FLOAT32: {
269+ auto data = self.get_data <float >(nodeId);
270+ auto arr = nb::ndarray<nb::numpy, float , nb::shape<1 >>(
271+ const_cast <float *>(data.data ()), // pointer to data
272+ { data.size () } // shape
273+ );
274+ return nb::cast (arr);
275+ }
276+ case DataType::FLOAT64: {
277+ auto data = self.get_data <double >(nodeId);
278+ auto arr = nb::ndarray<nb::numpy, double , nb::shape<1 >>(
279+ const_cast <double *>(data.data ()), // pointer to data
280+ { data.size () } // shape
281+ );
282+ return nb::cast (arr);
283+ }
284+ default : throw std::runtime_error (" Unsupported data type" );
285+ }
286+
287+ // Return a zero-copy view into the underlying data buffer and keep
288+ },
289+ nb::keep_alive<0 ,1 >(),
290+ R"doc( Returns the data of a node.
291+ Args:
292+ node_id (int)
293+ Returns:
294+ 1-D numpy ndarray of data for nodeId.
295+ )doc" ,
296+ nb::arg (" node_id" )
297+ )
298+ .def (" get_tensor" ,[](FeatureStore &self) -> nb::object {
299+ char * data = self.get_data_ptr ();
300+ size_t n = self.num_nodes ;
301+ size_t f = self.feature_dim ;
302+
303+ switch (self.get_dtype ()){
304+ case DataType::INT32: {
305+ int32_t * ptr = reinterpret_cast <int32_t *>(data);
306+
307+ auto arr = nb::ndarray<nb::numpy, int32_t , nb::shape<2 >>(
308+ ptr, // pointer to data
309+ {n,f } // shape
310+ );
311+ return nb::cast (arr);
312+ }
313+ case DataType::INT64: {
314+ int64_t * ptr = reinterpret_cast <int64_t *>(data);
315+
316+ auto arr = nb::ndarray<nb::numpy, int64_t , nb::shape<2 >>(
317+ ptr, // pointer to data
318+ {n,f } // shape
319+ );
320+ return nb::cast (arr);
321+ }
322+ case DataType::FLOAT32: {
323+ float * ptr = reinterpret_cast <float *>(data);
324+
325+ auto arr = nb::ndarray<nb::numpy, float , nb::shape<2 >>(
326+ ptr, // pointer to data
327+ {n,f } // shape
328+ );
329+ return nb::cast (arr);
330+ }
331+ case DataType::FLOAT64: {
332+ double * ptr = reinterpret_cast <double *>(data);
333+
334+ auto arr = nb::ndarray<nb::numpy, double , nb::shape<2 >>(
335+ ptr, // pointer to data
336+ {n,f } // shape
337+ );
338+ return nb::cast (arr);
339+ }
340+ default : throw std::runtime_error (" Unsupported data type" );
341+ }
342+ },
343+ nb::keep_alive<0 ,1 >(),
344+ R"doc( Returns the entire Data (tensor).
345+ Returns:
346+ NxF data of given dtype.
347+ )doc"
348+ )
349+
350+ // serialization (Pack)
351+ .def (" __getstate__" , [](const FeatureStore &d){
352+
353+ return nb::make_tuple (d.filename ); // only filename required to rebuild the object
354+ })
355+ // deserialization (unpack)
356+ .def (" __setstate__" ,[](nb::tuple &t){
357+
358+ if (t.size () != 1 )
359+ throw std::runtime_error (" Invalid state!" );
360+
361+ std::string filename = nb::cast<std::string>(t[0 ]);
362+
363+ // create new c++ object using the filename
364+ return new FeatureStore (filename.c_str ());
365+ })
366+ ;
367+
368+ // feature store data
369+ m.def (" convert_csv_to_gd" , &convert_csv_to_binary,
370+ R"doc( Convert a CSV data to GraphZero data format (.gd).)doc" ,
371+ nb::arg (" csv_path" ),
372+ nb::arg (" out_path" ),
373+ nb::arg (" dtype" ),
374+ nb::call_guard<nb::gil_scoped_release>());
223375}
0 commit comments