1#ifndef UTOPIA_DATAIO_HDFCHUNKING_HH
2#define UTOPIA_DATAIO_HDFCHUNKING_HH
9#include "../core/logging.hh"
28namespace _chunk_helpers
41template <
typename Cont,
typename Predicate >
42std::vector< unsigned short >
46 std::vector< unsigned short >
idcs;
63template <
typename Cont = std::vector< h
size_t > >
111template <
typename Cont,
typename Logger >
124 std::accumulate(
c.begin(),
c.end(), 1, std::multiplies<>());
131 throw std::invalid_argument(
"Cannot use opt_chunks_target with a "
132 "typesize larger than CHUNKSIZE_MAX!");
135 log->debug(
"Starting optimization towards target size:"
136 " {:7.0f}B ({:.1f} kiB)",
146 log->debug(
"Target size too large! New target size:"
147 " {:7.0f}B ({:.1f} kiB)",
155 log->debug(
"Target size too small! New target size:"
156 " {:7.0f}B ({:.1f} kiB)",
166 auto rank =
chunks.size();
180 for (
unsigned short i = 0;
i < 42 * rank;
i++)
185 log->debug(
"Chunks: {} -> {:7d} B ({:.1f} kiB)",
194 log->debug(
"Close enough to target size now.");
211 dim = (rank - 1) - dim;
215 log->debug(
"Doubling extend of chunk dimension {} ...", dim);
238 log->debug(
"Skipping reduction of chunk dimension {}, "
239 "because it is the highest ...",
248 log->debug(
"Extend of chunk dimension {} is already 1.", dim);
259 log->debug(
"Halving extend of chunk dimension {} ...", dim);
303template <
typename Cont,
typename Logger >
316 std::accumulate(
c.begin(),
c.end(), 1, std::multiplies<>());
323 throw std::invalid_argument(
324 "Cannot use opt_chunks_with_max_extend "
325 "with a typesize larger than CHUNKSIZE_MAX!");
350 std::iota(
dims.begin(),
dims.end(), 0);
379 log->debug(
"No finite dimensions available to optimize.");
383 log->debug(
"Optimizing {} finite dimension(s) where max_extend is not "
394 log->debug(
"Reached maximum chunksize.");
408 log->debug(
"Dimension {} can be filled completely. "
440 "Scaling dimension {} with factor {} ...", dim,
factor);
454 log->debug(
"Dimension {} can be filled completely. "
455 "(difference: {}, factor: {})",
465 log->debug(
"Dimension {} cannot be extended to fill "
466 "max_extend without exceeding maximum "
468 "(difference: {}, factor: {})",
482 log->debug(
"Optimization of unlimited dimensions is disabled.");
486 log->debug(
"No unlimited dimensions available to optimize.");
490 log->debug(
"Cannot further optimize using unlimited dimensions.");
494 log->debug(
"Optimizing {} unlimited dimension(s) to fill the maximum "
510 "Scaling dimension {} with factor {} ...", dim,
factor);
521 throw std::runtime_error(
"Calculated chunks exceed CHUNKSIZE_MAX! "
522 "This should not have happened!");
602template <
typename Cont = std::vector< h
size_t > >
614 using namespace _chunk_helpers;
619 std::accumulate(
c.begin(),
c.end(), 1, std::multiplies<>());
624 const auto log = spdlog::get(
"data_io");
633 throw std::invalid_argument(
"Cannot guess chunksize for a scalar "
642 throw std::invalid_argument(
643 "Argument 'io_extend' contained "
644 "illegal (zero or negative) value(s)! io_extend: " +
659 throw std::invalid_argument(
660 "Argument 'max_extend' does not have the same dimensionality "
661 "as the rank of this dataset (as extracted from the "
662 "'io_extend' argument).");
666 for (
unsigned short i = 0;
i < rank;
i++)
670 throw std::invalid_argument(
671 "Index " + std::to_string(
i) +
673 ") was smaller than the corresponding 'io_extend' (" +
712 log->info(
"Calculating optimal chunk size for io_extend {} and "
716 log->debug(
"rank: {}", rank);
721 log->debug(
"typesize: {}",
typesize);
722 log->debug(
"max. chunksize: {:7d} ({:.1f} kiB)",
725 log->debug(
"min. chunksize: {:7d} ({:.1f} kiB)",
728 log->debug(
"base chunksize: {:7d} ({:.1f} kiB)",
739 log->debug(
"Type size >= 1/2 max. chunksize -> Each cell needs to be "
741 return Cont(rank, 1);
748 log->debug(
"Maximally extended dataset will fit into single chunk.");
753 log->debug(
"Cannot apply simple optimizations. Try to fit single I/O "
754 "operation into a chunk ...");
771 log->debug(
"Single I/O operation does not fit into chunk.");
772 log->debug(
"Trying to use the fewest possible chunks for a single "
773 "I/O operation ...");
792 log->debug(
"Single I/O operation does fit into chunk.");
793 log->debug(
"Optimizing chunks in unlimited dimensions to be closer "
794 "to base chunksize ...");
808 log->debug(
"Single I/O operation does fit into a chunk.");
812 for (
unsigned short i = 0;
i < rank;
i++)
816 log->warn(
"Optimization led to chunks larger than max_extend. "
817 "This should not have happened!");
831 log->debug(
"Have max_extend information and can (potentially) use it "
832 "to optimize chunk extensions.");
848 throw std::runtime_error(
850 " is larger than CHUNKSIZE_MAX! This should not have happened!");
const Cont calc_chunksize(const hsize_t typesize, const Cont io_extend, Cont max_extend={}, const bool opt_inf_dims=true, const bool larger_high_dims=true, const unsigned int CHUNKSIZE_MAX=1048576, const unsigned int CHUNKSIZE_MIN=8192, const unsigned int CHUNKSIZE_BASE=262144)
Try to guess a good chunksize for a dataset.
Definition hdfchunking.hh:604
Container select_entities(const Manager &mngr, const DataIO::Config &sel_cfg)
Select entities according to parameters specified in a configuration.
Definition select.hh:213
This file provides metafunctions for automatically determining the nature of a C/C++ types at compile...
void opt_chunks_with_max_extend(Cont &chunks, const Cont &max_extend, const hsize_t typesize, const unsigned int CHUNKSIZE_MAX, const bool opt_inf_dims, const bool larger_high_dims, const Logger &log)
Optimize chunk sizes using max_extend information.
Definition hdfchunking.hh:305
std::vector< unsigned short > find_all_idcs(Cont &vec, Predicate pred)
Finds all indices of elements in a vector that matches the given predicate.
Definition hdfchunking.hh:43
void opt_chunks_target(Cont &chunks, double bytes_target, const hsize_t typesize, const unsigned int CHUNKSIZE_MAX, const unsigned int CHUNKSIZE_MIN, const bool larger_high_dims, const Logger &log)
Optimizes the chunks along all axes to find a good default.
Definition hdfchunking.hh:113
std::string to_str(const Cont &vec)
Helper function to create a string representation of containers.
Definition hdfchunking.hh:65