Utopia 2
Framework for studying models of complex & adaptive systems.
Loading...
Searching...
No Matches
hdfdataset.hh
Go to the documentation of this file.
1
10#ifndef UTOPIA_DATAIO_HDFDATASET_HH
11#define UTOPIA_DATAIO_HDFDATASET_HH
12
13#include <numeric>
14#include <stdexcept>
15#include <unordered_map>
16#include <utility>
17
18#include <hdf5.h>
19#include <hdf5_hl.h>
20
21#include "../core/type_traits.hh"
22
23#include "hdfattribute.hh"
24#include "hdfbufferfactory.hh"
25#include "hdfchunking.hh"
26#include "hdfdataspace.hh"
27#include "hdfobject.hh"
28#include "hdftype.hh"
29#include "hdfutilities.hh"
31
32namespace Utopia
33{
34namespace DataIO
35{
52class HDFDataset final : public HDFObject<HDFCategory::dataset>
53{
54 private:
64 template <typename Datatype> void __create_dataset__(std::size_t typesize)
65 {
66
67 this->_log->debug("Creating dataset with typesize {} at path {} ...",
68 typesize, _path);
69 this->_log->trace("refcount before creation: {}", get_refcount());
70
71 // create group property list and (potentially) intermediate groups
74
75 _type.close();
76
77 _type.open<Datatype>("datatype of " + _path, typesize);
78
79 // this is something different than typesize, which has meaning for
80 // arrays only
82 {
83 if (_chunksizes.size() != _rank)
84 {
85 this->_log->debug("Computing chunksizes ...");
88 }
89 }
90
92
93 // distinguish by chunksize; chunked dataset needed for compression
94 if (_chunksizes.size() > 0)
95 {
96 // create creation property list, set chunksize and compress level
97
98 this->_log->debug("Setting given chunksizes ...");
99 H5Pset_chunk(plist, _rank, _chunksizes.data());
100
101 if (_compress_level > 0)
102 {
104 }
105
107 // make dataspace
108 _filespace.open(_path + " file dataspace", _rank, _current_extent,
109 _capacity);
110
111 // create dataset and return
112 this->_log->debug(
113 "Creating actual dataset and binding it to object class ...");
114
118 &H5Dclose);
119
120 if (not is_valid())
121 {
122 throw std::runtime_error("Invalid dataset id " + _path + " " +
123 std::to_string(__LINE__));
124 }
125 }
126 else
127 {
128
129 // make dataspace
130 _filespace.open(_path + "file dataspace", _rank, _current_extent,
131 _capacity);
132
133 this->_log->debug(
134 "Creating actual dataset and binding it to object class ...");
135 // can create the dataset right away
139 &H5Dclose);
140
141 if (not is_valid())
142 {
143 throw std::runtime_error("Invalid dataset id " + _path + " " +
144 std::to_string(__LINE__));
145 }
146 }
147
148 this->_log->debug("refcount of dataset after creation {}: {}", _path,
149 get_refcount());
150 }
151
163 template <typename T> herr_t __write_container__(T &&data)
164 {
165 this->_log->debug("Writing container data to dataset {}...", _path);
166
167 this->_log->debug("Dataset {} 's refcount write begin {}", _path,
168 get_refcount());
169
172
173 // we can write directly if we have a plain vector, no nested or
174 // stringtype.
175 if constexpr (std::is_same_v<T, std::vector<value_type_1>> and
176 not Utils::is_container_v<value_type_1> and
177 not Utils::is_string_v<value_type_1>)
178 {
179 this->_log->debug("... of simple vectortype");
180
181 // check if attribute has been created, else do
182 if (not is_valid())
183 {
184 this->_log->debug("... dataset not yet existing, creating it "
185 "for simple vectortype");
187 }
188 else
189 {
190 this->_log->debug(
191 "... dataset existing, reading out type and writing data");
192 // check if datatypes are compatible
193
195 temp_type.open<base_type>("testtype", 0);
196
197 if (temp_type != _type)
198 {
199 throw std::runtime_error(
200 "Error, cannot write container data of a "
201 "different type into dataset " +
202 _path);
203 }
204 }
205 this->_log->debug("Dataset {} 's refcount before write {}", _path,
206 get_refcount());
207
210 }
211 // when stringtype or containertype is stored in a container, then
212 // we have to buffer. bufferfactory handles how to do this in detail
213 else
214 {
215 this->_log->debug("... of nontrivial containertype");
216
217 std::size_t typesize = 0;
218 // check if array, if yes, get typesize, else typesize is 0 and
219 // typefactory creates vlen data or string data
220 if constexpr (Utils::is_container_v<base_type> and
221 Utils::is_array_like_v<base_type>)
222 {
223 // get_size is a metafunction defined in hdfutilities.hh
225 }
226
227 if (not is_valid())
228 {
229 this->_log->debug(
230 "... dataset not yet existing, creating it for array type");
232 }
233 else
234 {
235 // check if datatypes are compatible
236 this->_log->debug("... dataset existing, reading out type");
237
239 temp_type.open<base_type>("testtype", typesize);
240
241 if (temp_type != _type)
242 {
243 throw std::runtime_error(
244 "Error, cannot write fixedsize container data of a "
245 "different type into dataset " +
246 _path);
247 }
248 }
249
250 this->_log->debug(
251 "... buffering data into vectortype appropriate for writing");
252 // the reference is needed here, because addresses of underlaying
253 // data arrays are needed.
254 auto buffer = HDFBufferFactory::buffer(
255 std::begin(data), std::end(data),
256 [](auto &value) -> value_type_1 & { return value; });
257
258 this->_log->debug("Dataset {} 's refcount before write {}", _path,
259 get_refcount());
260
261 this->_log->debug("... writing data");
263 _filespace.get_C_id(), H5P_DEFAULT, buffer.data());
264 }
265 }
266
276 template <typename T> herr_t __write_stringtype__(T data)
277 {
278 this->_log->debug("Writing string data to dataset {}...", _path);
279
280 // Since std::string cannot be written directly,
281 // (only const char*/char* can), a buffer pointer has been added
282 // to handle writing in a clearer way and with less code
283 auto len = 0;
284 const char *buffer = nullptr;
285
286 if constexpr (std::is_pointer_v<T>) // const char* or char* -> strlen
287 // needed
288 {
289 this->_log->debug("... stringtype is pointer-valued");
290 len = std::strlen(data);
291 buffer = data;
292 }
293 else // simple for strings
294 {
295 this->_log->debug("... stringtype is of not pointer-valued");
296 len = data.size();
297 buffer = data.c_str();
298 }
299
300 // check if dataset has been created, else do
301 if (not is_valid())
302 {
303 this->_log->debug(
304 "... dataset not yet existing, creating it for stringtypee");
305 // check if datatypes are compatible
306
308 }
309 else
310 {
311 this->_log->debug("... dataset existing, reading out type");
312 // check if datatypes are compatible
314 temp_type.open<const char *>("testtype", len);
315
316 if (temp_type != _type)
317 {
318 throw std::runtime_error("Error, cannot write string data of a "
319 "different type into dataset " +
320 _path);
321 }
322 }
323
324 this->_log->debug(" ... writing data");
325 // use that strings store data in consecutive memory
327 _filespace.get_C_id(), H5P_DEFAULT, buffer);
328 }
329
341 template <typename T> herr_t __write_pointertype__(T data)
342 {
343 this->_log->debug("Writing pointer data to dataset {}...", _path);
344
345 // result types removes pointers, references, and qualifiers
347
348 if (not is_valid())
349 {
350 this->_log->debug(
351 "... dataset not yet existing, creating it for pointertype");
352
354 }
355 else
356 {
357 // check if datatypes are compatible
358 this->_log->debug("... dataset existing, reading out type");
359
361 temp_type.open<basetype>("testtype", 0);
362
363 if (temp_type != _type)
364 {
365 throw std::runtime_error(
366 "Error, cannot write pointer data of a "
367 "different type into dataset " +
368 _path);
369 }
370 }
371 this->_log->debug(" ... writing data");
372
375 }
376
387 template <typename T> herr_t __write_scalartype__(T data)
388 {
389 this->_log->debug("Writing scalar data to dataset {}...", _path);
390 // because we just write a scalar, the shape tells basically that
391 // the attribute is pointlike: 1D and 1 entry.
392 if (not is_valid())
393 {
394 this->_log->debug(
395 "... dataset not yet existing, creating it for pointertype");
396
398 }
399 else
400 {
401 // check if datatypes are compatible
402 this->_log->debug("... dataset existing, reading out type");
403
405 temp_type.open<std::decay_t<T>>("testtype", 0);
406
407 if (temp_type != _type)
408 {
409 throw std::runtime_error("Error, cannot write scalar data of a "
410 "different type into dataset " +
411 _path);
412 }
413 }
414
415 this->_log->debug(" ... writing data");
416
419 }
420
422 // We could want to read into a predefined buffer for some reason (frequent
423 // reads), and thus this and the following functions expect an argument
424 // 'buffer' to store their data in. The function 'read(..)' is then
425 // overloaded to allow for automatic buffer creation or a buffer argument.
426 template <typename Type> herr_t __read_container__(Type &buffer)
427 {
428
429 this->_log->debug("Reading container data from dataset {}...", _path);
430
431 using value_type_1 =
433
434 // when the value_type of Type is a container again, we want nested
435 // arrays basically. Therefore we have to check if the desired type
436 // Type is suitable to hold them, read the nested data into a hvl_t
437 // container, assuming that they are varlen because this is the more
438 // general case, and then turn them into the desired type again...
439 if constexpr (Utils::is_container_v<value_type_1> ||
440 Utils::is_string_v<value_type_1>)
441 {
442 this->_log->debug(
443 "... reading nested container or container of strings ...");
444 using value_type_2 =
446
447 // if we have nested containers of depth larger than 2, throw a
448 // runtime error because we cannot handle this
449 // TODO extend this to work more generally
450 if constexpr (Utils::is_container_v<value_type_2>)
451 {
452 throw std::runtime_error(
453 "Dataset" + _path +
454 ": Cannot read data into nested containers with depth > 3 "
455 "in attribute " +
456 _path + " into vector containers!");
457 }
458 if constexpr (!std::is_same_v<std::vector<value_type_1>, Type>)
459 {
460 throw std::runtime_error("Dataset" + _path +
461 ": Can only read data"
462 " into vector containers!");
463 }
464
465 // everything is fine.
466
467 // check if type given in the buffer is std::array.
468 // If it is, the user knew that the data stored there
469 // has always the same length, otherwise she does not
470 // know and thus it is assumed that the data is variable
471 // length.
473 {
474 this->_log->debug("... nested type is array-like...");
475 // check if std::array is given as value_type,
476 // if not adjust sizes
477 if constexpr (!Utils::is_array_like_v<value_type_1>)
478 {
479 // if yes, throw exception is size is insufficient because
480 // the size cannot be adjusted
481
482 throw std::invalid_argument(
483 "Dataset " + _path +
484 ": Cannot read into container of non arrays "
485 "when data type in file is fixed array type");
486 }
487
488 return H5Dread(get_C_id(), _type.get_C_id(),
490 H5P_DEFAULT, buffer.data());
491 }
492 else if (_type.type_category() == H5T_STRING)
493 {
494 this->_log->debug("... nested type is string-like...");
495
496 if constexpr (!Utils::is_string_v<value_type_1>)
497 {
498 throw std::invalid_argument(
499 "Dataset " + _path +
500 ": Can only read stringdata into string elements");
501 }
502 else
503 {
504 /*
505 * we have two possibilities, which have to be treated
506 * sparatly, thanks to fucking hdf5 being the most crappy
507 * designed library I ever came accross: 1): dataset
508 * contains variable length strings 2): dataset contains
509 * fixed size strings
510 *
511 * logic:
512 * - check if we have a stringtype
513 * - make a variable length stringtype
514 * - check if the type of the dataset is varlen
515 * string
516 * - yes:
517 * - read into char** buffer,
518 * - then put into container<std::string>
519 * - no:
520 * - get size of type
521 * - make string (=> char array) of size
522 * bufferlen*typesize
523 * - read into it
524 * - split the long string each typesize chars
525 * -> get entries
526 * - put them into final buffer
527 * Mind that the buffer is preallocated to the correct size
528 */
530 vlentype.open<std::string>("vlentype_temporary", 0ul);
531
532 if (H5Tequal(vlentype.get_C_id(), _type.get_C_id()))
533 {
534 this->_log->debug(
535 "... nested type of variable length type ...");
536
537 std::vector<char *> temp_buffer(buffer.size());
538 herr_t err =
542
543 /* README:
544 - hdf5 uses `NULL` as fill value for string entries
545 which are not written per default, and setting another
546 fillvalue did not succeed for variable length data.
547 - The NULL produces a segmentation fault when trying to
548 turn it into an std::string.
549 - Hence, as a workaround, teh `NULL`s are treated
550 explicitly when postprocessing the data into their final
551 form, which is what the code below does.
552 */
553 for (auto [b, tb] = std::make_tuple(
554 buffer.begin(), temp_buffer.begin());
555 b != buffer.end(); ++b, ++tb)
556 {
557 if (*tb != NULL)
558 {
559 *b = *tb;
560 }
561 else
562 {
563 *b = "\0";
564 }
565 }
566
567 for (auto &&c : temp_buffer)
568 {
569 free(c);
570 }
571
572 return err;
573 }
574 else
575 {
576
577 this->_log->debug(
578 "... nested type of fixed length type ...");
579
580 // get size of the type, set up intermediate string
581 // buffer, adjust its size
582 auto s = _type.size() / sizeof(char);
583 std::string temp_buffer;
584
585 temp_buffer.resize(buffer.size() * s);
586
587 // actual read
588 herr_t err =
592
593 // content of dataset is now one consectuive line of
594 // stuff in temp_buffer. Use read size s to cut out the
595 // strings we want. definitly not elegant and fast, but
596 // strings are ugly to work with in general, and this is
597 // the most simple solution I can currently come up with
598
599 std::size_t i = 0;
600 std::size_t buffidx = 0;
601 while (i < temp_buffer.size())
602 {
603 buffer[buffidx] = temp_buffer.substr(i, s);
604 i += s;
605 buffidx += 1;
606 }
607
608 // return
609 return err;
610 }
611 }
612 }
613 // variable length arrays
614 else if (_type.type_category() == H5T_VLEN)
615 {
616 this->_log->debug(
617 "... nested type of variable length array type ... ");
618
619 std::vector<hvl_t> temp_buffer(buffer.size());
620
624
625 // turn the varlen buffer into the desired type
626 // Cumbersome, but necessary...
627
628 this->_log->debug("... transforming the read data to the "
629 "actually desired type ... ");
630
631 for (std::size_t i = 0; i < buffer.size(); ++i)
632 {
633 if constexpr (!Utils::is_array_like_v<value_type_1>)
634 {
635 buffer[i].resize(temp_buffer[i].len);
636 }
637
638 // I consider this more elegant than using std::for_each
639 // and defining the 'j' index outside of the predicate
640 for (auto [it, j] =
641 std::make_tuple(std::begin(buffer[i]), 0ul);
642 it != std::end(buffer[i]); ++it, ++j)
643 {
644 *it = static_cast<value_type_2 *>(temp_buffer[i].p)[j];
645 }
646 }
647
649
650 // free stuff allocated by hdf5 within the hvl_t objects
651 #if H5_VERSION_GE(1, 12, 0)
653 tempspace,
655 temp_buffer.data());
656 #else
658 tempspace,
660 temp_buffer.data());
661 #endif
662
664
665 if (status < 0)
666 {
667 throw std::runtime_error(
668 "Error when reclaiming memory in " + _path +
669 " for variable_length datatype");
670 }
671
672 return err;
673 }
674 else
675 {
676 throw std::runtime_error(
677 "Dataset " + _path +
678 ": Unknown kind of datatype in dataset when requesting to "
679 "read into container");
680 }
681 }
682
683 else // no nested container or container of strings, but one containing
684 // simple types
685 {
686 this->_log->debug("... no nested type to read");
688 _filespace.get_C_id(), H5P_DEFAULT, buffer.data());
689 }
690 }
691
693
696 template <typename Type> auto __read_stringtype__(Type &buffer)
697 {
698 this->_log->debug("Reading string data from dataset {}...", _path);
699
700 buffer.resize(buffer.size() * _type.size());
701 // read data
703 _filespace.get_C_id(), H5P_DEFAULT, buffer.data());
704 }
705
707
710 template <typename Type> auto __read_pointertype__(Type buffer)
711 {
712 this->_log->debug("Reading pointer data from dataset {}...", _path);
713
715 _filespace.get_C_id(), H5P_DEFAULT, buffer);
716 }
717
719 template <typename Type> auto __read_scalartype__(Type &buffer)
720 {
721 this->_log->debug("Reading scalar data from dataset {}...", _path);
722
724 _filespace.get_C_id(), H5P_DEFAULT, &buffer);
725 }
726
729 {
730 auto log = spdlog::get("data_io");
731
732 log->debug("Writing attribute buffer of dataset {}...", _path);
733
734 // do nothing if the buffer is empty;
735 if (_attribute_buffer.size() == 0)
736 {
737 return;
738 }
739
740 // write out the attributes from the attribute buffer.
741 for (auto &[path, variant] : _attribute_buffer)
742 {
743 log->debug("... currently at attribute {}", path);
744
745 HDFAttribute attr(static_cast<Base &>(*this), path);
746
747 // Use visiting syntax on the variant to write the attribute value
748 std::visit(
749 // this is a universal reference and hence perfect
750 // forwarding can be employed via std::forward
751 [&attr](auto &&arg) {
752 attr.write(
753 std::forward<std::remove_reference_t<decltype(arg)>>(
754 arg));
755 },
756 variant);
757 }
758
759 // free up memory.
760 _attribute_buffer.clear();
761 }
762
767
772
776 std::vector<hsize_t> _current_extent;
777
781 std::vector<hsize_t> _capacity;
782
787 std::vector<hsize_t> _chunksizes;
788
793 std::vector<hsize_t> _offset;
794
799 std::vector<hsize_t> _new_extent;
800
804 std::size_t _compress_level;
805
814 std::vector<std::pair<std::string, typename HDFType::Variant>>
816
822
828
834
835 public:
841
847 auto get_type() { return _type; }
848
855
862
867
874
880 std::size_t get_rank() { return _rank; }
881
888
894 auto get_offset() { return _offset; }
900 auto get_capacity() { return _capacity; }
901
907 auto get_chunksizes() { return _chunksizes; }
908
915
921 void set_capacity(std::vector<hsize_t> capacity)
922 {
923 if (is_valid())
924 {
925 throw std::runtime_error(
926 "Dataset " + _path +
927 ": Cannot set capacity after dataset has been created");
928 }
929 else
930 {
931 _rank = capacity.size();
932 _capacity = capacity;
933 }
934 }
935
941 void set_chunksize(std::vector<hsize_t> chunksizes)
942 {
943 if (is_valid())
944 {
945 throw std::runtime_error(
946 "Dataset " + _path +
947 ": Cannot set chunksize after dataset has been created");
948 }
949
950 // if chunksizes = {} then it will be automatically determined
951 if (chunksizes.size() != _rank and chunksizes.size() != 0)
952 {
953 throw std::runtime_error(
954 "Dataset " + _path +
955 ": Chunksizes size has to be equal to dataset rank");
956 }
957
959 }
960
975 template <typename Attrdata>
977 {
978 // Can only write directly, if the dataset is valid
979 if (is_valid())
980 {
981 this->_log->debug("Add attribute {} to valid dataset {}",
982 attribute_path, _path);
983 // make attribute and write
985 attr.write(data);
986 }
987 else
988 {
989
990 this->_log->debug("Add atttribute {} to attribute buffer of {} "
991 "because it has not yet been created on disk",
992 attribute_path, _path);
993 // The dataset was not opened yet. Need to write to buffer
994
995 // For non-vector container data, need to convert to vector
996 if constexpr (Utils::is_container_v<Attrdata>)
997 {
998 if constexpr (not std::is_same_v<
999 std::vector<typename Attrdata::value_type>,
1000 Attrdata>)
1001 {
1002 // Make it a vector and write to buffer
1003 _attribute_buffer.push_back(std::make_pair(
1005 std::vector<typename Attrdata::value_type>(
1006 std::begin(data), std::end(data))));
1007 }
1008 else
1009 {
1010 // Can write directly
1011 _attribute_buffer.push_back(
1012 std::make_pair(attribute_path, data));
1013 }
1014 }
1015 else
1016 {
1017 // Can write directly
1018 _attribute_buffer.push_back(
1019 std::make_pair(attribute_path, data));
1020 }
1021 }
1022 }
1023
1031 void close()
1032 {
1033 auto log = spdlog::get("data_io");
1034
1035 // write the attributebuffer out
1036 if (is_valid())
1037 {
1039 }
1040
1041 // employ the object base class' close function to close the dataset,
1042 // then write attributes and close the filespaces
1043 Base::close();
1044
1045 // close dataspaces
1046 _filespace.close();
1047 _memspace.close();
1048 _type.close();
1049 }
1050
1064 template <HDFCategory cat>
1065 void open(const HDFObject<cat> &parent_object, std::string path,
1066 std::vector<hsize_t> capacity = {},
1067 std::vector<hsize_t> chunksizes = {}, hsize_t compress_level = 0)
1068 {
1069
1070 this->_log->debug("Opening dataset {} within {}", path,
1071 parent_object.get_path());
1072
1073 open(parent_object.get_id_object(), path, capacity, chunksizes,
1075 }
1076
1090 void open(const HDFIdentifier &parent_identifier, std::string path,
1091 std::vector<hsize_t> capacity = {},
1092 std::vector<hsize_t> chunksizes = {}, hsize_t compress_level = 0)
1093 {
1094
1095 if (not parent_identifier.is_valid())
1096 {
1097 throw std::runtime_error("parent id not valid for dataset " + path);
1098 }
1099
1101 _path = path;
1102
1103 _filespace.close();
1104 _memspace.close();
1105 // open with H5S_ALL
1106 _filespace.open();
1107 _memspace.open();
1108 // Try to find the dataset in the parent_object
1109 // If it is there, open it.
1110 // Else: postphone the dataset creation to the first write
1111 // the attribute buffer has to be written in both cases,
1112 // as its existence is independent from the existence of the
1113 // dataset in the file. We could use a dataset object repeatedly
1114 // to represent different datasets in the file via calling close
1115 // and open over and over, writing attributes to it while
1116 // it is closed. Therefore, the attribute buffer is written
1117 // out at the end of this function
1119 { // dataset exists
1120 // open it
1121
1122 this->_log->debug("... binding existing dataset to object");
1123
1124 bind_to(H5Dopen(_parent_identifier.get_id(), _path.c_str(),
1125 H5P_DEFAULT),
1126 &H5Dclose);
1127
1128 _type.close();
1129 _type.open(*this);
1130
1131 // get dataspace and read out rank, extend, capacity
1132 _filespace.open(*this);
1133
1134 _rank = _filespace.rank();
1135
1136 _chunksizes.resize(_rank, 0);
1137 // get chunksizes
1140 if (layout == H5D_CHUNKED)
1141 {
1142 herr_t err =
1144 if (err < 0)
1145 {
1146 throw std::runtime_error(
1147 "Dataset " + _path +
1148 ": Error in reading out chunksizes while opening.");
1149 }
1150 }
1152
1153 // temporary workaround for type inconsistentcy:
1154 // arma::row used by dataspace and std::vector by dataset, and
1155 // chunksize algo
1156 auto [size, capacity] = _filespace.get_properties();
1157
1158 _current_extent.assign(size.begin(), size.end());
1159 _capacity.assign(capacity.begin(), capacity.end());
1161 }
1162 else
1163 {
1164 this->_log->debug("... dataset not yet existing, have to wait 'til "
1165 "data becomes available");
1166
1167 // it is not expected that the _attribute_buffer will become big
1168 // and reallocate often, hence a reserve is foregone here,
1169 // which one might otherwise consider.
1170 // The size to reserve would be a rather wild guess however.
1171 if (capacity.size() == 0)
1172 {
1173 _rank = 1;
1174 _capacity = std::vector<hsize_t>(_rank, H5S_UNLIMITED);
1175 _offset = std::vector<hsize_t>(_rank, 0);
1176 }
1177 else
1178 {
1179 _capacity = capacity;
1180 _rank = _capacity.size();
1181 _offset = std::vector<hsize_t>(_rank, 0);
1182 }
1183
1184 // if chunksizes is given, everything is fine, if not, it is empty
1185 // here and we will check in write method if calculation of
1186 // chunksize is needed
1188
1190
1191 _id.set_id(-1);
1192 }
1193 }
1194
1201 {
1202 using std::swap;
1204 swap(static_cast<Base &>(*this), static_cast<Base &>(other));
1205 swap(_parent_identifier, other._parent_identifier);
1206 swap(_rank, other._rank);
1207 swap(_current_extent, other._current_extent);
1208 swap(_capacity, other._capacity);
1209 swap(_chunksizes, other._chunksizes);
1210 swap(_offset, other._offset);
1211 swap(_new_extent, other._new_extent);
1212 swap(_compress_level, other._compress_level);
1213 swap(_attribute_buffer, other._attribute_buffer);
1214 swap(_filespace, other._filespace);
1215 swap(_memspace, other._memspace);
1216 swap(_type, other._type);
1217 }
1218
1226 template <typename T>
1227 void write(T &&data, [[maybe_unused]] std::vector<hsize_t> shape = {})
1228 {
1229 this->_log->debug("Writing data to dataset {}", _path);
1230 this->_log->debug("... current extent {}", Utils::str(_current_extent));
1231 this->_log->debug("... current offset {}", Utils::str(_offset));
1232 this->_log->debug("... capacity {}", Utils::str(_capacity));
1233 this->_log->debug("... refcount {}", get_refcount());
1234
1235 // dataset does not yet exist
1236 _memspace.close();
1237 _filespace.close();
1238
1239 _memspace.open();
1240 _filespace.open();
1241
1242 if (not is_valid())
1243 {
1244 // current limitation removed in future
1245 if (_rank > 2)
1246 {
1247 throw std::runtime_error("Rank > 2 not supported");
1248 }
1249
1250 /*
1251 if dataset does not yet exist
1252 Get current extend.
1253 If is container:
1254 if 1d:
1255 current_extent = data.size()
1256 else:
1257 current_extent = {1, data.size()}, i.e one line in
1258 matrix
1259
1260 if pointer:
1261 current_extent is shape
1262 if string or scalar:
1263 current_extent is 1
1264
1265 then check if chunking is needed but not known and calculate it
1266 or throw error. this is done within the individual __write_X__
1267 methods because detailed type info is needed.
1268 */
1269 _current_extent.resize(_rank);
1270
1271 if constexpr (Utils::is_container_v<std::decay_t<T>>)
1272 {
1273 if (_rank == 1)
1274 {
1275 _current_extent[_rank - 1] = data.size();
1276 }
1277 else
1278 {
1279 _current_extent[0] = 1;
1280 _current_extent[1] = data.size();
1281 }
1282 }
1283
1284 else if constexpr (std::is_pointer_v<std::decay_t<T>> and
1285 !Utils::is_string_v<std::decay_t<T>>)
1286 {
1287 if (shape.size() == 0)
1288 {
1289 throw std::runtime_error(
1290 "Dataset " + _path +
1291 ": shape has to be given explicitly when writing "
1292 "pointer types");
1293 }
1294 _current_extent = shape;
1295 }
1296 else
1297 {
1298 _current_extent[_rank - 1] = 1;
1299 }
1300 }
1301 else
1302 {
1303
1304 /*
1305 if dataset does exist:
1306 - check if the type of the data given to write is compatible
1307 with the one of the dataset
1308
1309 - make _new_extent array equalling current_extent, leave
1310 current_extent If is container: if 1d: _new_extent = current_extent
1311 + data.size() else: _new_extent = {current_extent[0]+1,
1312 current_extent[1]}, i.e one new line in matrix
1313
1314 if pointer:
1315 current_extent += shape
1316 if string or scalar:
1317 current_extent += 1
1318
1319
1320 offset = current_extent
1321 buf if 2d and current_extent[1]==capacity[1](end of line):
1322 offset = {current_extent[0]+1, 0};
1323
1324 count = {1, data.size} if 2d, {data.size()} if 1d
1325
1326 then extent data,
1327 select newly added line
1328 update current_ex
1329 write
1330 */
1331
1332 // make a temporary for new extent
1333 std::vector<hsize_t> _new_extent = _current_extent;
1334
1336 {
1337 throw std::runtime_error("Dataset " + _path +
1338 ": Error, dataset cannot be extended "
1339 "because it reached its capacity");
1340 }
1341 else
1342 {
1343 // set offset array
1344 // this is needed because multiple writes one after the other
1345 // could occur without intermediate close and reopen (which
1346 // would set _offset correctly)
1348
1349 if (_rank > 1)
1350 {
1351 if (_current_extent[1] == _capacity[1])
1352 {
1353 _offset[1] = 0;
1354 }
1355 }
1356
1357 // if data is a container, then we have to add its size to
1358 // extend, if it is a pointer, we have to add the pointers
1359 // shape, else we have to add 1 because we either write
1360 // a single scalar or string
1361 if constexpr (Utils::is_container_v<std::decay_t<T>>)
1362 {
1363 if (_rank == 1)
1364 {
1365 _new_extent[0] += data.size();
1366 }
1367 else
1368 {
1369 _new_extent[0] += 1;
1370 }
1371 }
1372 else if constexpr (std::is_pointer_v<std::decay_t<T>> and
1373 !Utils::is_string_v<std::decay_t<T>>)
1374 {
1375 if (shape.size() == 0)
1376 {
1377 throw std::runtime_error(
1378 "Dataset " + _path +
1379 ": shape has to be given explicitly when writing "
1380 "pointer types");
1381 }
1382
1383 for (std::size_t i = 0; i < _rank; ++i)
1384 {
1385 _new_extent[i] += shape[i];
1386 }
1387 }
1388 else
1389 {
1390 if (_rank == 1)
1391 {
1392 // if rank is one we can only extend into one direction
1393 _new_extent[0] += 1;
1394 }
1395 else
1396 {
1397 // first fill row, then column wise increase
1398 if (_current_extent[0] < _capacity[0])
1399 {
1400 _new_extent[0] += 1;
1401 }
1402 // if row is full, start a new one
1403 else
1404 {
1405 _new_extent[1] += 1;
1406 }
1407 }
1408 }
1409 }
1410 // select counts for dataset
1411 // this has to be generalized and refactored
1412 std::vector<hsize_t> counts(_rank, 0);
1413 if constexpr (Utils::is_container_v<std::decay_t<T>>)
1414 {
1415 if (_rank == 1)
1416 {
1417 counts = {data.size()};
1418 }
1419 else
1420 {
1421 counts = {1, data.size()};
1422 }
1423 }
1424 // when is pointer, the counts are given by shape
1425 else if constexpr (std::is_pointer_v<std::decay_t<T>> and
1426 !Utils::is_string_v<std::decay_t<T>>)
1427 {
1428 counts = shape;
1429 }
1430 else
1431 {
1432 counts = {1};
1433 }
1434
1435 // extent the dataset
1436 for (std::size_t i = 0; i < _rank; ++i)
1437 {
1438 if (_new_extent[i] > _capacity[i])
1439 {
1440 throw std::runtime_error("Dataset " + _path +
1441 ": Cannot append data, "
1442 "_new_extent larger than capacity "
1443 "in dimension " +
1444 std::to_string(i));
1445 }
1446 }
1447
1448 // extend the dataset to the new size
1450
1451 if (err < 0)
1452 {
1453 throw std::runtime_error(
1454 "Dataset " + _path +
1455 ": Error when trying to increase extent");
1456 }
1457
1458 // get file and memory spaces which represent the selection to write
1459 // at
1460 _filespace.open(*this);
1461
1462 _memspace.open(_path + "memory dataspace", _rank, counts, {});
1463
1465 _offset, // start
1466 arma::Row<hsize_t>(_offset) + arma::Row<hsize_t>(counts), // end
1467 {} // stride
1468 );
1469
1471 }
1472
1473 this->_log->debug("New extent {}", Utils::str(_new_extent));
1474 this->_log->debug("New offset {}", Utils::str(_offset));
1475 this->_log->debug(" Refcount before write {}", get_refcount());
1476
1477 // everything is prepared, we can write the data
1478 if constexpr (Utils::is_container_v<std::decay_t<T>>)
1479 {
1480
1481 herr_t err = __write_container__(std::forward<T>(data));
1482
1483 if (err < 0)
1484 {
1485 throw std::runtime_error("Dataset " + _path +
1486 ": Error in appending container");
1487 }
1488 }
1489 else if constexpr (Utils::is_string_v<std::decay_t<T>>)
1490 {
1491 herr_t err = __write_stringtype__(std::forward<T>(data));
1492 if (err < 0)
1493 {
1494 throw std::runtime_error("Dataset " + _path +
1495 ": Error in appending string");
1496 }
1497 }
1498 else if constexpr (std::is_pointer_v<std::decay_t<T>> and
1499 !Utils::is_string_v<std::decay_t<T>>)
1500 {
1501 herr_t err = __write_pointertype__(std::forward<T>(data));
1502 if (err < 0)
1503 {
1504 throw std::runtime_error("Dataset " + _path +
1505 ": Error in appending pointer");
1506 }
1507 }
1508 else
1509 {
1510 herr_t err = __write_scalartype__(std::forward<T>(data));
1511 if (err < 0)
1512 {
1513 throw std::runtime_error("Dataset " + _path +
1514 ": Error in appending scalar");
1515 }
1516 }
1517 }
1518
1532 template <typename Iter, typename Adaptor>
1533 void write(Iter begin, Iter end, Adaptor &&adaptor)
1534 {
1535 this->_log->debug("Writing iterator range to dataset {}", _path);
1536 // this->_log->debug("... current offset {}", Utils::str(_offset));
1537 // this->_log->debug("... capacity {}", Utils::str(_capacity));
1538
1539 using Type = Utils::remove_qualifier_t<decltype(adaptor(*begin))>;
1540
1541 write([&]() {
1542 std::vector<Type> buff(std::distance(begin, end));
1543
1544 std::generate(buff.begin(), buff.end(),
1545 [&begin, &adaptor]() { return adaptor(*(begin++)); });
1546 return buff;
1547 }());
1548 }
1549
1573 template <typename T, std::size_t d>
1574 void write_nd(const boost::multi_array<T, d> &data,
1575 std::vector<hsize_t> offset = {})
1576 {
1577 this->_log->debug("Writing N-dimensional dataset to dataset {}", _path);
1578 this->_log->debug("... current extent {}", Utils::str(_current_extent));
1579 this->_log->debug("... current offset {}", Utils::str(_offset));
1580 this->_log->debug("... capacity {}", Utils::str(_capacity));
1581
1582 _filespace.close();
1583 _memspace.close();
1584
1585 // create dataspaces
1586 _memspace.open();
1587 _filespace.open();
1588
1589 // dataset does not yet exist
1590 if (not is_valid())
1591 {
1592
1593 // two possibilities: capacity given or not:
1594 // if not given:
1595 // use data to determine extent and capacity, correcting the
1596 // assumed ones from 'open'
1597 // else use given values
1598
1599 if (_capacity == std::vector<hsize_t>{H5S_UNLIMITED} and _rank == 1)
1600 {
1601 _rank = d;
1602 _current_extent.resize(_rank, 0);
1603 _offset.resize(_rank, 0);
1604 for (std::size_t i = 0; i < _rank; ++i)
1605 {
1606 _current_extent[i] = data.shape()[i];
1607 }
1608 _capacity.resize(d, H5S_UNLIMITED);
1609 }
1610 else
1611 {
1612 _current_extent.resize(_rank, 1);
1613 _offset.resize(_rank, 0);
1614
1615 for (auto [i, j] = std::make_tuple(_rank - d, 0); i < _rank;
1616 ++i, ++j)
1617 {
1618 _current_extent[i] = data.shape()[j];
1619 }
1620 }
1621
1622 _log->debug("Dataset {} does not exist yet, properties were "
1623 "determined to be",
1624 _path);
1625 _log->debug(" rank: {}", Utils::str(_capacity));
1626 _log->debug(" datarank: {}", Utils::str(d));
1627 _log->debug(" datashape: {}", Utils::str(std::vector<std::size_t>(
1628 data.shape(), data.shape() + d)));
1629 _log->debug(" capacity: {}", Utils::str(_capacity));
1630 _log->debug(" offset: {}", Utils::str(_offset));
1631 _log->debug(" current_extent: {}", Utils::str(_current_extent));
1632 }
1633 else
1634 {
1635
1636 if (_rank < d)
1637 {
1638 throw std::invalid_argument(
1639 "Error, the dimensionality of the dataset, which is " +
1640 std::to_string(_current_extent.size()) +
1641 ", must be >= the dimensionality of the data to be "
1642 "written, which is " +
1643 std::to_string(d));
1644 }
1645 else
1646 {
1647
1648 _log->debug("Dataset {} does exist", _path);
1649 _log->debug("Properties of data to be written");
1650 _log->debug(" datarank: {}", Utils::str(d));
1651 _log->debug(" datashape: {}",
1652 Utils::str(std::vector<std::size_t>(
1653 data.shape(), data.shape() + d)));
1654
1655 _log->debug(
1656 "Properties before change for accomodating new data");
1657 _log->debug(" rank: {}", Utils::str(_capacity));
1658 _log->debug(" capacity: {}", Utils::str(_capacity));
1659 _log->debug(" offset: {}", Utils::str(_offset));
1660 _log->debug(" current_extent: {}", Utils::str(_current_extent));
1661
1662 std::vector<hsize_t> _new_extent = _current_extent;
1663
1664 // two cases: When offset is given, and when not. When it is
1665 // given, then it is assumed that the data has always the same
1666 // shape except in the first dimension
1667
1668 if (offset.size() != 0)
1669 {
1670
1671 // when offset is given we use it to
1672 // determine how to extent the dataset. Note that the
1673 // requirement that all data written have the same shape in
1674 // all dimensions but the first is not enforced here, hence
1675 // the algorithm works a little differently
1676 _offset = offset;
1677 for (std::size_t i = 0; i < _rank - d; ++i)
1678 {
1679 if (_offset[i] == _current_extent[i])
1680 {
1681 _new_extent[i] += 1;
1682 }
1683 }
1684
1685 for (auto [i, j] = std::make_tuple(_rank - d, 0ul); i < d;
1686 ++i)
1687 {
1688 if (_current_extent[i] < (_offset[i] + data.shape()[j]))
1689 {
1690 _new_extent[i] = _offset[i] + data.shape()[j];
1691 }
1692 if (_new_extent[i] > _capacity[i])
1693 {
1694 throw std::runtime_error(
1695 "Dataset " + _path + ": Capacity[" +
1696 std::to_string(i) +
1697 "] = " + std::to_string(_capacity[i]) +
1698 ", which is too small for a desired new "
1699 "extent[" +
1700 std::to_string(i) +
1701 "] = " + std::to_string(_new_extent[i]));
1702 }
1703 }
1704
1705 // extend the dataset to the new size
1707
1708 if (err < 0)
1709 {
1710 throw std::runtime_error(
1711 "Dataset " + _path +
1712 ": Error when trying to increase extent");
1713 }
1714 }
1715
1716 else
1717 {
1718 // zeroth index is treated separatlye because it is used to
1719 // increase the total available space in the dataset
1720
1721 _new_extent[0] += (d == _rank) ? data.shape()[0]
1722 : 1; // add all needed slices
1723
1724 if (_new_extent[0] > _capacity[0])
1725 {
1726 throw std::runtime_error(
1727 "Error in " + _path + ", capacity " +
1728 std::to_string(_capacity[0]) + " at index " +
1729 std::to_string(0) + " of " + std::to_string(d) +
1730 " is too small for new extent " +
1731 std::to_string(_new_extent[0]));
1732 }
1733
1734 for (auto [i, j] =
1735 std::make_tuple(1ul, (d == _rank) ? 1ul : 0ul);
1736 i < _rank && j < d; ++i, ++j)
1737 {
1738 if (data.shape()[j] > _current_extent[i])
1739 {
1740 _new_extent[i] +=
1741 data.shape()[j] - _current_extent[i];
1742 if (_new_extent[i] > _capacity[i])
1743 {
1744 throw std::runtime_error(
1745 "Error in " + _path +
1746 ", capacity at index " + std::to_string(i) +
1747 " of " + std::to_string(d) +
1748 " is too small");
1749 }
1750 }
1751 }
1752
1753 // extend the dataset to the new size
1755
1756 if (err < 0)
1757 {
1758 throw std::runtime_error(
1759 "Dataset " + _path +
1760 ": Error when trying to increase extent");
1761 }
1762
1763 // if the algo progresses until here, it is safe to do this;
1764
1765 _offset.resize(_rank);
1766 std::fill(_offset.begin(), _offset.end(), 0);
1767 _offset[0] = _current_extent[0];
1768 }
1769 /*
1770 * README: The count vector is needed for determining the slice
1771 * to write to in the datafile. hdf5 determines slices in the
1772 * dataset via [start, step, count], pattern, where the 'count'
1773 * gives the number fo steps in each dimension. Hence, the
1774 * counts have to be computed/assigned from the data to be
1775 * written
1776 */
1777 std::vector<hsize_t> counts(_rank, 1);
1778
1779 for (auto [i, j] = std::make_tuple(_rank - d, 0); i < _rank;
1780 ++i, ++j)
1781 {
1782 counts[i] = data.shape()[j];
1783 }
1784
1785 // get file and memory spaces which represent the selection to
1786 // write at
1787 _filespace.close();
1788 _memspace.close();
1789 _filespace.open(*this);
1790
1791 _memspace.open(_path + "memory dataspace", _rank, counts, {});
1792
1794 arma::Row<hsize_t>(_offset) +
1795 arma::Row<hsize_t>(counts),
1796 {});
1797
1798 // update the current extent
1800
1801 _log->debug(
1802 "Properties after change for accomodating new data");
1803 _log->debug(" rank: {}", Utils::str(_capacity));
1804 _log->debug(" capacity: {}", Utils::str(_capacity));
1805 _log->debug(" offset: {}", Utils::str(_offset));
1806 _log->debug(" current_extent: {}", Utils::str(_current_extent));
1807 _log->debug("new extent {}", Utils::str(_new_extent));
1808 }
1809 }
1810
1811 // dataset extension is done, now we can check if we have to buffer data
1812 // FIXME: this has to be put into the bufferfactory class later, ideally
1813 // using a plain char buffer for it to avoid templating and enabling
1814 // memory reuse by making the bufferfactory a member.
1815
1816 if constexpr (std::is_scalar_v<std::decay_t<T>>)
1817 {
1818 if (not is_valid())
1819 {
1821 }
1822 else
1823 {
1824 HDFType temp_type;
1825 temp_type.open<std::decay_t<T>>("testtype", 0);
1826 if (_type != temp_type)
1827 {
1828 throw std::runtime_error("Error, cannot write data of a "
1829 "different type into dataset " +
1830 _path);
1831 }
1832 }
1833
1834 herr_t err =
1836 _filespace.get_C_id(), H5P_DEFAULT, data.data());
1837
1838 if (err < 0)
1839 {
1840 throw std::runtime_error(
1841 "Dataset " + _path +
1842 ": Error in writing nd-array holding scalar values");
1843 }
1844 }
1845 else if constexpr (Utils::is_string_v<std::decay_t<T>>)
1846 {
1847 if (not is_valid())
1848 {
1850 }
1851 else
1852 {
1853 HDFType temp_type;
1854 temp_type.open<std::decay_t<T>>("testtype", 0);
1855
1856 if (_type != temp_type)
1857 {
1858 throw std::runtime_error("Error, cannot write data of a "
1859 "different type into dataset " +
1860 _path);
1861 }
1862 }
1863 // make a buffer that mirrors the shape of the data
1864 boost::multi_array<const char *, d> buffer(
1865 reinterpret_cast<boost::array<size_t, d> const &>(
1866 *data.shape()));
1867
1868 // fill the buffer
1869 std::transform(data.data(), data.data() + data.num_elements(),
1870 buffer.data(),
1871 [](auto &&str) { return str.c_str(); });
1872
1873 // write the buffer
1874 herr_t err =
1876 _filespace.get_C_id(), H5P_DEFAULT, buffer.data());
1877
1878 if (err < 0)
1879 {
1880 throw std::runtime_error(
1881 "Dataset " + _path +
1882 ": Error in writing nd-array holding string values");
1883 }
1884 }
1885 else if constexpr (Utils::is_container_v<std::decay_t<T>>)
1886 {
1887 if constexpr (Utils::is_array_like_v<std::decay_t<T>>)
1888 {
1889 hsize_t typesize = Utils::get_size<std::decay_t<T>>::value;
1890
1891 // create dataset with given typesize
1892 if (not is_valid())
1893 {
1895 }
1896 else
1897 {
1898 HDFType temp_type;
1899 temp_type.open<std::decay_t<T>>("testtype", typesize);
1900 if (_type != temp_type)
1901 {
1902 throw std::runtime_error(
1903 "Error, cannot write data of a "
1904 "different type into dataset " +
1905 _path);
1906 }
1907 }
1908
1909 // write the buffer not needed here
1910 herr_t err =
1912 _filespace.get_C_id(), H5P_DEFAULT, data.data());
1913 if (err < 0)
1914 {
1915 throw std::runtime_error(
1916 "Dataset " + _path +
1917 ": Error in writing nd-array holding array values");
1918 }
1919 }
1920 else
1921 {
1922 // create dataset with given typesize
1923 if (not is_valid())
1924 {
1926 }
1927 else
1928 {
1929 HDFType temp_type;
1930 temp_type.open<std::decay_t<T>>("temp_type", 0);
1931 if (_type != temp_type)
1932 {
1933 throw std::runtime_error(
1934 "Error, cannot write data of a "
1935 "different type into dataset " +
1936 _path);
1937 }
1938 }
1939 // vector is stored
1940 if constexpr (std::is_same_v<
1941 std::vector<typename T::value_type>,
1942 std::decay_t<T>>)
1943 {
1944 // make buffer
1945 boost::multi_array<hvl_t, d> buffer(
1946 reinterpret_cast<boost::array<size_t, d> const &>(
1947 *data.shape()));
1948
1950 data.data(), data.data() + data.num_elements(),
1951 buffer.data(), [](auto &&v) {
1952 return hvl_t{
1953 v.size(),
1954 // cumbersome const cast needed because I want
1955 // to keep const Reference argument 'cause it
1956 // can bind to lvalues and rvalues alike, i.e.
1957 // you can construct a multi_array in the arg
1958 // list, but also pass an existing one as
1959 // reference.
1960 const_cast<Utils::remove_qualifier_t<decltype(
1961 v.data())> *>(v.data())};
1962 });
1963
1964 // write the buffer
1967 _filespace.get_C_id(), H5P_DEFAULT, buffer.data());
1968
1969 if (err < 0)
1970 {
1971 throw std::runtime_error("Dataset " + _path +
1972 ": Error in writing nd-array "
1973 "holding vector values");
1974 }
1975 }
1976 // no vector is stored
1977 else
1978 {
1979 // make buffers, when no vector we need two of them,
1980 // one to transform to vector which has contiguous storage
1981 // which in turn is needed by hdf5, the other
1982 // for turning the new vectors into hvl_t
1983 boost::multi_array<std::vector<typename T::value_type>, d>
1985 reinterpret_cast<boost::array<size_t, d> const &>(
1986 *data.shape()));
1987
1988 boost::multi_array<hvl_t, d> buffer(
1989 reinterpret_cast<boost::array<size_t, d> const &>(
1990 *data.shape()));
1991
1993 data.data(), data.data() + data.num_elements(),
1994 vector_buffer.data(), [](auto &&v) {
1995 return std::vector<typename T::value_type>(
1996 v.begin(), v.end());
1997 });
1998
2000 vector_buffer.data() +
2001 vector_buffer.num_elements(),
2002 buffer.data(), [](auto &&v) {
2003 return hvl_t{v.size(), v.data()};
2004 });
2005
2006 // write the buffer
2009 _filespace.get_C_id(), H5P_DEFAULT, buffer.data());
2010
2011 if (err < 0)
2012 {
2013 throw std::runtime_error(
2014 "Dataset " + _path +
2015 ": Error in writing nd-array holding non-vector "
2016 "container values");
2017 }
2018 }
2019 }
2020 }
2021 }
2022
2036 template <typename Type>
2037 auto read([[maybe_unused]] std::vector<hsize_t> start = {},
2038 [[maybe_unused]] std::vector<hsize_t> end = {},
2039 [[maybe_unused]] std::vector<hsize_t> stride = {})
2040 {
2041 this->_log->debug(
2042 "Reading dataset {}, starting at {}, ending at {}, using stride {}",
2043 _path, Utils::str(start), Utils::str(end), Utils::str(stride));
2044
2045 if (not is_valid())
2046 {
2047 throw std::runtime_error("Dataset " + _path +
2048 ": Dataset id is invalid");
2049 }
2050
2051 _filespace.close();
2052 _memspace.close();
2053
2054 // variables needed for reading
2055 std::vector<hsize_t> readshape; // shape vector for read, either
2056 // _current_extent or another shape
2057
2058 std::size_t size = 1;
2059
2060 // read entire dataset
2061 if (start.size() == 0)
2062 {
2063 readshape = _current_extent;
2064 _filespace.open();
2065 _memspace.open();
2066
2067 // make flattened size of data to read
2068 for (auto &s : readshape)
2069 {
2070 size *= s;
2071 }
2072 }
2073 // read [start, end) with steps given by stride in each dimension
2074 else
2075 {
2076 // throw error if ranks and shape sizes do not match
2077 if (start.size() != _rank or end.size() != _rank or
2078 stride.size() != _rank)
2079 {
2080 throw std::invalid_argument(
2081 "Dataset " + _path +
2082 ": start, end, stride have to be "
2083 "same size as dataset rank, which is " +
2084 std::to_string(_rank));
2085 }
2086
2087 // set offset of current array to start
2088 _offset = start;
2089
2090 // make count vector
2091 // exploit that hsize_t((end-start)/stride) cuts off decimal
2092 // places and thus results in floor((end-start)/stride) always.
2093 std::vector<hsize_t> count(start.size());
2094
2095 // build the count array -> how many elements to read in each
2096 // dimension
2097 for (std::size_t i = 0; i < _rank; ++i)
2098 {
2099 count[i] = (end[i] - start[i]) / stride[i];
2100 }
2101
2102 for (auto &s : count)
2103 {
2104 size *= s;
2105 }
2106
2107 readshape = count;
2108
2109 _filespace.close();
2110 _memspace.close();
2111
2112 _filespace.open(*this);
2113 _memspace.open(_path + " memory dataspace", _rank, count, {});
2114
2115 this->_log->debug("... selecting slice in filespace for dataset {}",
2116 _path);
2117 _filespace.select_slice(start, end, stride);
2118 }
2119
2120 // Below the actual reading happens
2121
2122 // type to read in is a container type, which can hold containers
2123 // themselvels or just plain types.
2124 if constexpr (Utils::is_container_v<Type>)
2125 {
2126 Type buffer(size);
2127 herr_t err = __read_container__(buffer);
2128 if (err < 0)
2129 {
2130 throw std::runtime_error("Dataset " + _path +
2131 ": Error reading container type ");
2132 }
2133 return std::make_tuple(readshape, buffer);
2134 }
2135 else if constexpr (Utils::is_string_v<Type>) // we can have string
2136 // types too, i.e. char*,
2137 // const char*,
2138 // std::string
2139 {
2140 std::string buffer; // resized in __read_stringtype__ because this
2141 // as a scalar
2142 buffer.resize(size);
2143 herr_t err = __read_stringtype__(buffer);
2144 if (err < 0)
2145 {
2146 throw std::runtime_error("Dataset " + _path +
2147 ": Error reading string type ");
2148 }
2149
2150 return std::make_tuple(readshape, buffer);
2151 }
2152 else if constexpr (std::is_pointer_v<Type> && !Utils::is_string_v<Type>)
2153 {
2154 std::shared_ptr<Utils::remove_qualifier_t<Type>> buffer(
2155 new Utils::remove_qualifier_t<Type>[size],
2156 std::default_delete<Utils::remove_qualifier_t<Type>[]>());
2157
2158 herr_t err = __read_pointertype__(buffer.get());
2159
2160 if (err < 0)
2161 {
2162 std::runtime_error("Dataset " + _path +
2163 ": Error reading pointer type ");
2164 }
2165 return std::make_tuple(readshape, buffer);
2166 }
2167 else // reading scalar types is simple enough
2168 {
2169 Type buffer(0);
2170 herr_t err = __read_scalartype__(buffer);
2171 if (err < 0)
2172 {
2173 std::runtime_error("Dataset " + _path +
2174 ": Error reading scalar type ");
2175 }
2176 return std::make_tuple(readshape, buffer);
2177 }
2178 }
2179
2183 HDFDataset() = default;
2184
2190 HDFDataset(const HDFDataset &other) = default;
2191
2206
2214
2230 template <HDFCategory cat>
2232 std::vector<hsize_t> capacity = {},
2233 std::vector<hsize_t> chunksizes = {}, hsize_t compress_level = 0)
2234
2235 {
2236 open(parent_object, path, capacity, chunksizes, compress_level);
2237 }
2238
2242 virtual ~HDFDataset() { close(); }
2243}; // end of HDFDataset class
2244
2253void swap(HDFDataset &lhs, HDFDataset &rhs) { lhs.swap(rhs); }
2254
// end of group HDF5 // end of group DataIO
2257
2258} // namespace DataIO
2259} // namespace Utopia
2260#endif // UTOPIA_DATAIO_HDFDATASET_HH
Class for hdf5 attribute, which can be attached to groups and datasets.
Definition hdfattribute.hh:46
static auto buffer(Iter begin, Iter end, Adaptor &&adaptor)
static function for turning an iterator range with arbitrarty datatypes into a vector of data as retu...
Definition hdfbufferfactory.hh:96
Class representing a HDFDataset, wich reads and writes data and attributes.
Definition hdfdataset.hh:53
std::vector< std::pair< std::string, typename HDFType::Variant > > _attribute_buffer
A buffer for storing attributes before the dataset exists.
Definition hdfdataset.hh:815
auto get_capacity()
get the maximum extend of the dataset
Definition hdfdataset.hh:900
std::vector< hsize_t > _current_extent
the currently occupied size of the dataset in number of elements
Definition hdfdataset.hh:776
HDFDataset(const HDFDataset &other)=default
Copy constructor.
auto get_current_extent()
get the current extend of the dataset
Definition hdfdataset.hh:887
HDFDataspace get_filespace()
Get the file dataspace id.
Definition hdfdataset.hh:861
void write(Iter begin, Iter end, Adaptor &&adaptor)
Write function for writing iterator ranges [start, end), in accordance with respective stl pattern.
Definition hdfdataset.hh:1533
std::vector< hsize_t > _chunksizes
the chunksizes per dimensions if dataset is extendible or compressed
Definition hdfdataset.hh:787
herr_t __read_container__(Type &buffer)
Read a cointainer.
Definition hdfdataset.hh:426
HDFDataspace _filespace
file dataspace identifier
Definition hdfdataset.hh:827
HDFDataspace get_memspace()
Get the memory dataspace id.
Definition hdfdataset.hh:854
HDFDataset & operator=(HDFDataset &&other)=default
Move assignment operator.
auto get_attribute_buffer()
Returns the attribute buffer of this dataset.
Definition hdfdataset.hh:866
auto get_chunksizes()
Get the chunksizes vector.
Definition hdfdataset.hh:907
herr_t __write_scalartype__(T data)
Writes simple scalars, which are not pointers, containers or strings.
Definition hdfdataset.hh:387
void set_chunksize(std::vector< hsize_t > chunksizes)
Set the chunksize object.
Definition hdfdataset.hh:941
void __write_attribute_buffer__()
write out the attribute buffer
Definition hdfdataset.hh:728
HDFDataset & operator=(const HDFDataset &other)=default
Assignment operator.
auto read(std::vector< hsize_t > start={}, std::vector< hsize_t > end={}, std::vector< hsize_t > stride={})
Read (a subset of ) a dataset into a buffer of type 'Type'. Type gives the type of the buffer to read...
Definition hdfdataset.hh:2037
void open(const HDFObject< cat > &parent_object, std::string path, std::vector< hsize_t > capacity={}, std::vector< hsize_t > chunksizes={}, hsize_t compress_level=0)
Open the dataset in parent_object with relative path 'path'.
Definition hdfdataset.hh:1065
herr_t __write_pointertype__(T data)
Writes pointers, shape is like numpy shape arg.
Definition hdfdataset.hh:341
void add_attribute(std::string attribute_path, Attrdata data)
add attribute to the dataset
Definition hdfdataset.hh:976
virtual ~HDFDataset()
Destructor.
Definition hdfdataset.hh:2242
auto __read_stringtype__(Type &buffer)
read attirbute data which contains a single string.
Definition hdfdataset.hh:696
HDFDataset(HDFDataset &&other)=default
Move constructor.
auto __read_pointertype__(Type buffer)
read pointertype.
Definition hdfdataset.hh:710
HDFDataset(HDFObject< cat > &parent_object, std::string path, std::vector< hsize_t > capacity={}, std::vector< hsize_t > chunksizes={}, hsize_t compress_level=0)
Construct a new HDFDataset object.
Definition hdfdataset.hh:2231
auto get_type()
Get the type object.
Definition hdfdataset.hh:847
HDFIdentifier _parent_identifier
Identifier of the parent object.
Definition hdfdataset.hh:766
std::size_t get_rank()
get the rank of the dataset, i.e. the dimensionality
Definition hdfdataset.hh:880
HDFType _type
Type of the data the dataset holds.
Definition hdfdataset.hh:821
std::vector< hsize_t > _new_extent
buffer for extent update
Definition hdfdataset.hh:799
void swap(HDFDataset &other)
swap the state of the objects
Definition hdfdataset.hh:1200
void write(T &&data, std::vector< hsize_t > shape={})
Writes data of arbitrary type.
Definition hdfdataset.hh:1227
auto get_offset()
Get the offset object.
Definition hdfdataset.hh:894
hsize_t _rank
number of dimensions of the dataset
Definition hdfdataset.hh:771
HDFIdentifier get_parent_id()
get a shared_ptr to the parent_object
Definition hdfdataset.hh:873
HDFDataspace _memspace
memory dataspace identifier
Definition hdfdataset.hh:833
std::vector< hsize_t > _offset
offset of the data
Definition hdfdataset.hh:793
auto __read_scalartype__(Type &buffer)
read scalar type, trivial
Definition hdfdataset.hh:719
std::size_t _compress_level
the level of compression, 0 to 10
Definition hdfdataset.hh:804
void write_nd(const boost::multi_array< T, d > &data, std::vector< hsize_t > offset={})
Write a boost::multi_array of arbitrary type and dimension to the dataset. The dataset needs to be of...
Definition hdfdataset.hh:1574
void close()
Close the dataset.
Definition hdfdataset.hh:1031
auto get_compresslevel()
Get the compress level object.
Definition hdfdataset.hh:914
void __create_dataset__(std::size_t typesize)
helper function for making a non compressed dataset
Definition hdfdataset.hh:64
std::vector< hsize_t > _capacity
the maximum number of elements which can be stored in the dataset
Definition hdfdataset.hh:781
herr_t __write_container__(T &&data)
Writes containers to the dataset.
Definition hdfdataset.hh:163
void set_capacity(std::vector< hsize_t > capacity)
Set the capacity object, and sets rank of dataset to capacity.size.
Definition hdfdataset.hh:921
void open(const HDFIdentifier &parent_identifier, std::string path, std::vector< hsize_t > capacity={}, std::vector< hsize_t > chunksizes={}, hsize_t compress_level=0)
Open the dataset in parent_object with relative path 'path'.
Definition hdfdataset.hh:1090
herr_t __write_stringtype__(T data)
writes stringtypes
Definition hdfdataset.hh:276
HDFDataset()=default
default consturctor
Class that wraps an HDF5 dataspace and takes care of managing its resources.
Definition hdfdataspace.hh:37
Wrapper class around an hdf5 identifier, used to manage reference counts of the object this identifie...
Definition hdfidentifier.hh:29
hid_t get_id() const
Get the HDF5 id held by this object.
Definition hdfidentifier.hh:53
void set_id(hid_t id)
Set id to the given argument. Only to be used to invalidate objects upon move or similar.
Definition hdfidentifier.hh:65
Common base class for all HDF5 classes in the DATAIO Module i.e., for all classes that wrap HDF5-C-Li...
Definition hdfobject.hh:37
std::string _path
Name of the object.
Definition hdfobject.hh:50
auto get_refcount()
Get the reference count of object.
Definition hdfobject.hh:131
void close()
Close function which takes care of correctly closing the object and managing the reference counter.
Definition hdfobject.hh:161
virtual bool is_valid() const
Check if the object is still valid.
Definition hdfobject.hh:143
std::shared_ptr< spdlog::logger > _log
pointer to the logger for dataio
Definition hdfobject.hh:56
HDFIdentifier _id
Identifier object that binds an instance of this class to an HDF5 object.
Definition hdfobject.hh:44
hid_t get_C_id() const
Get the C id object.
Definition hdfobject.hh:120
void bind_to(hid_t id, std::function< herr_t(hid_t) > closing_func, std::string path={})
Open the object and bind it to a HDF5 object identified by 'id' with name 'path'. Object should be cr...
Definition hdfobject.hh:186
Class which handles the conversion of C-types into hdf5types.
Definition hdftype.hh:136
void open(T &&object)
Open the HDF5 type associated with an HDFObject, i.e., a dataset or an attribute.
Definition hdftype.hh:224
void close()
Construct close from the given arguments.
Definition hdftype.hh:322
auto type_category() const
Get the type category of the held type, i.e., scala, string, varlen,...
Definition hdftype.hh:199
std::size_t size() const
Size of the type held in bytes.
Definition hdftype.hh:210
OutputIt transform(const Utopia::ExecPolicy policy, InputIt first1, InputIt last1, OutputIt d_first, UnaryOperation unary_op)
Apply a unary operator to a range and store the result in a new range.
Definition parallel.hh:368
const Cont calc_chunksize(const hsize_t typesize, const Cont io_extend, Cont max_extend={}, const bool opt_inf_dims=true, const bool larger_high_dims=true, const unsigned int CHUNKSIZE_MAX=1048576, const unsigned int CHUNKSIZE_MIN=8192, const unsigned int CHUNKSIZE_BASE=262144)
Try to guess a good chunksize for a dataset.
Definition hdfchunking.hh:604
void swap(WriteTask< BGB, DW, DB, AWG, AWD > &lhs, WriteTask< BGB, DW, DB, AWG, AWD > &rhs)
Swaps the state of lhs and rhs.
Definition write_task.hh:240
Container select_entities(const Manager &mngr, const DataIO::Config &sel_cfg)
Select entities according to parameters specified in a configuration.
Definition select.hh:213
void select_slice(arma::Row< hsize_t > start, arma::Row< hsize_t > end, arma::Row< hsize_t > stride)
Select a slice in the dataspace defined by [start, end, stride] in the manner of numpy....
Definition hdfdataspace.hh:225
auto path_is_valid(hid_t id, std::string path)
Check if the path given relative to the object identified by 'id' exists and points to a valid hdf5 o...
Definition hdfutilities.hh:150
hsize_t rank()
Get thet dataspace's rank, i.e., number of dimensions.
Definition hdfdataspace.hh:46
void open()
Open the dataspace - set it to be equivalent to any data that later will be used to write or read.
Definition hdfdataspace.hh:117
std::pair< arma::Row< hsize_t >, arma::Row< hsize_t > > get_properties()
Get the properties object: size and capacity.
Definition hdfdataspace.hh:67
This file implements a C++ class which wraps a C HDF5 attribute to a HDF5-object (group or dataset),...
In this file, a class for automatically creating intermediate buffer data structures between the user...
This file provides a class which is responsible for the automatic conversion between C/C++ types and ...
This file provides metafunctions for automatically determining the nature of a C/C++ types at compile...
auto end(zip< Containers... > &zipper)
end function like std::end
Definition zip.hh:550
std::string str(T &&t)
Turn any object for which operator<< exists into a string. Mostly useful for logging data via spdlog ...
Definition ostream.hh:164
typename remove_qualifier< T >::type remove_qualifier_t
Shorthand for 'typename remove_qualifier::value'.
Definition type_traits.hh:97
Definition agent.hh:11
Return the size of a Type T containing other types at compile time. If no object for which an overloa...
Definition type_traits.hh:438