Optimize chunk sizes using max_extend information.
This algorithm is aware of the maximum extend of a dataset and can use that information during optimization, aiming to increase the size of the chunks towards CHUNKSIZE_MAX as far as possible without going beyond max_extend. The paradigm here is that the number of chunks needed for read/write operations should be minimized while trying to keep a chunk's byte size below a certain value. The algorithm distinguishes between dimensions that have a finite extend and those that can grow to H5S_UNLIMITED, i.e. "infinite" extend. First, the aim is to try to cover the max_extend in the finite dimensions. It checks if an integer multiple is needed to reach the maximum extend. If, after that, the target CHUNKSIZE_MAX is not yet reached and the opt_inf_dims flag is set, the chunk sizes in the unlimited dimensions are extended as far as possible, assuming that they were chosen unlimited because they will be filled at some point and larger chunk sizes will reduce the number of chunks needed during read/write operations.
312{
313
314 auto bytes = [&typesize](Cont c) {
315 return typesize *
316 std::accumulate(c.begin(), c.end(), 1, std::multiplies<>());
317 };
318
319
320
321 if (typesize > CHUNKSIZE_MAX)
322 {
323 throw std::invalid_argument(
324 "Cannot use opt_chunks_with_max_extend "
325 "with a typesize larger than CHUNKSIZE_MAX!");
326 }
327
328
329
330
331 auto dims_fin =
332 find_all_idcs(max_extend, [](auto l) { return l != H5S_UNLIMITED; });
333
334
335
336
337 auto dims_inf =
338 find_all_idcs(max_extend, [](auto l) { return l == H5S_UNLIMITED; });
339
340
341
342
343
344
345
346 using IdxCont = decltype(dims_fin);
347
348
349 IdxCont dims(chunks.size());
350 std::iota(dims.begin(), dims.end(), 0);
351
352
353
354 IdxCont dims_fillable;
355 for (auto dim : dims_fin)
356 {
357 if (max_extend[dim] > chunks[dim])
358 {
359 dims_fillable.push_back(dim);
360 }
361 }
362
363
364 if (larger_high_dims)
365 {
366
367 std::reverse(dims_fillable.begin(), dims_fillable.end());
368 std::reverse(dims_fin.begin(), dims_fin.end());
369 std::reverse(dims_inf.begin(), dims_inf.end());
370
371
372
373 }
374
375
376
377 if (!dims_fillable.size())
378 {
379 log->debug("No finite dimensions available to optimize.");
380 }
381 else
382 {
383 log->debug("Optimizing {} finite dimension(s) where max_extend is not "
384 "yet reached ...",
385 dims_fillable.size());
386
387
388 for (auto dim : dims_fillable)
389 {
390
391
392 if (bytes(chunks) == CHUNKSIZE_MAX)
393 {
394 log->debug("Reached maximum chunksize.");
395 break;
396 }
397
398
399 if (max_extend[dim] % chunks[dim] == 0)
400 {
401
402 std::size_t factor = max_extend[dim] / chunks[dim];
403
404
405 if (factor * bytes(chunks) <= CHUNKSIZE_MAX)
406 {
407
408 log->debug("Dimension {} can be filled completely. "
409 "Factor: {}",
410 dim,
411 factor);
412 chunks[dim] = chunks[dim] * factor;
413 continue;
414 }
415
416
417
418
419
420 for (std::size_t div = (CHUNKSIZE_MAX / bytes(chunks));
421 div >= 1;
422 div--)
423 {
424
425 if (factor % div == 0)
426 {
427
428 factor = div;
429 break;
430 }
431 }
432
433
434
435
436
437 if (factor > 1)
438 {
439 log->debug(
440 "Scaling dimension {} with factor {} ...", dim, factor);
441
442 chunks[dim] = chunks[dim] * factor;
443 }
444 }
445 else
446 {
447
448
449 const double factor = double(max_extend[dim]) / chunks[dim];
450
451 if (factor * bytes(chunks) <= CHUNKSIZE_MAX)
452 {
453
454 log->debug("Dimension {} can be filled completely. "
455 "(difference: {}, factor: {})",
456 dim,
457 max_extend[dim] - chunks[dim],
458 factor);
459
460 chunks[dim] = max_extend[dim];
461 }
462 else
463 {
464
465 log->debug("Dimension {} cannot be extended to fill "
466 "max_extend without exceeding maximum "
467 "chunksize! "
468 "(difference: {}, factor: {})",
469 dim,
470 max_extend[dim] - chunks[dim],
471 factor);
472 }
473 }
474
475 }
476 }
477
478
479
480 if (!opt_inf_dims)
481 {
482 log->debug("Optimization of unlimited dimensions is disabled.");
483 }
484 else if (!dims_inf.size())
485 {
486 log->debug("No unlimited dimensions available to optimize.");
487 }
488 else if (bytes(chunks) == CHUNKSIZE_MAX)
489 {
490 log->debug("Cannot further optimize using unlimited dimensions.");
491 }
492 else
493 {
494 log->debug("Optimizing {} unlimited dimension(s) to fill the maximum "
495 "chunk size ...",
496 dims_inf.size());
497
498
499
500
501 for (auto dim : dims_inf)
502 {
503
504 const std::size_t factor = CHUNKSIZE_MAX / bytes(chunks);
505
506
507 if (factor > 1)
508 {
509 log->debug(
510 "Scaling dimension {} with factor {} ...", dim, factor);
511
512 chunks[dim] = chunks[dim] * factor;
513 }
514 }
515 }
516
517
518
519 if (bytes(chunks) > CHUNKSIZE_MAX)
520 {
521 throw std::runtime_error("Calculated chunks exceed CHUNKSIZE_MAX! "
522 "This should not have happened!");
523 }
524
525 return;
526}