bes Updated for version 3.20.13
HDF5BaseArray.cc
Go to the documentation of this file.
1// This file is part of hdf5_handler an HDF5 file handler for the OPeNDAP
2// data server.
3
4// Author: Muqun Yang <myang6@hdfgroup.org>
5
6// Copyright (c) 2011-2016 The HDF Group, Inc. and OPeNDAP, Inc.
7//
8// This is free software; you can redistribute it and/or modify it under the
9// terms of the GNU Lesser General Public License as published by the Free
10// Software Foundation; either version 2.1 of the License, or (at your
11// option) any later version.
12//
13// This software is distributed in the hope that it will be useful, but
14// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16// License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23// You can contact The HDF Group, Inc. at 1800 South Oak Street,
24// Suite 203, Champaign, IL 61820
41
42#include <iostream>
43#include <sstream>
44#include <cassert>
45#include <algorithm>
46#include <BESDebug.h>
47#include <libdap/InternalErr.h>
48
49#include "HDF5BaseArray.h"
50#include "HDF5RequestHandler.h"
51#include "ObjMemCache.h"
52
53using namespace std;
54using namespace libdap;
55#if 0
56BaseType *HDF5BaseArray::ptr_duplicate()
57{
58 return new HDF5BaseArray(*this);
59}
60
61// Always return true.
62// Data will be read from the missing coordinate variable class(HDF5GMCFMissNonLLCVArray etc.)
63bool HDF5BaseArray::read()
64{
65 BESDEBUG("h5","Coming to HDF5BaseArray read "<<endl);
66 return true;
67}
68
69#endif
70
71// parse constraint expr. and make hdf5 coordinate point location.
72// return number of elements to read.
73int
74HDF5BaseArray::format_constraint (int *offset, int *step, int *count)
75{
76 long nels = 1;
77 int id = 0;
78
79 Dim_iter p = dim_begin ();
80
81 while (p != dim_end ()) {
82
83 int start = dimension_start (p, true);
84 int stride = dimension_stride (p, true);
85 int stop = dimension_stop (p, true);
86
87 // Check for illegal constraint
88 if (start > stop) {
89 ostringstream oss;
90 oss << "Array/Grid hyperslab start point "<< start <<
91 " is greater than stop point " << stop <<".";
92 throw Error(malformed_expr, oss.str());
93 }
94
95 offset[id] = start;
96 step[id] = stride;
97 count[id] = ((stop - start) / stride) + 1; // count of elements
98 nels *= count[id]; // total number of values for variable
99
100 BESDEBUG ("h5",
101 "=format_constraint():"
102 << "id=" << id << " offset=" << offset[id]
103 << " step=" << step[id]
104 << " count=" << count[id]
105 << endl);
106
107 id++;
108 p++;
109 }// "while (p != dim_end ())"
110
111 return (int)nels;
112}
113
114void HDF5BaseArray::write_nature_number_buffer(int rank, int tnumelm) {
115
116 if (rank != 1)
117 throw InternalErr(__FILE__, __LINE__, "Currently the rank of the missing field should be 1");
118
119 vector<int>offset;
120 vector<int>count;
121 vector<int>step;
122 offset.resize(rank);
123 count.resize(rank);
124 step.resize(rank);
125
126
127 int nelms = format_constraint(offset.data(), step.data(), count.data());
128
129 // Since we always assign the the missing Z dimension as 32-bit
130 // integer, so no need to check the type. The missing Z-dim is always
131 // 1-D with natural number 1,2,3,....
132 vector<int>val;
133 val.resize(nelms);
134
135 if (nelms == tnumelm) {
136 for (int i = 0; i < nelms; i++)
137 val[i] = i;
138 set_value(val.data(), nelms);
139 }
140 else {
141 for (int i = 0; i < count[0]; i++)
142 val[i] = offset[0] + step[0] * i;
143 set_value(val.data(), nelms);
144 }
145}
146
147//#if 0
148void HDF5BaseArray::read_data_from_mem_cache(H5DataType h5type, const vector<size_t> &h5_dimsizes,void* buf,const bool is_dap4){
149
150 BESDEBUG("h5", "Coming to read_data_from_mem_cache"<<endl);
151 vector<int>offset;
152 vector<int>count;
153 vector<int>step;
154
155 auto ndims = (int)(h5_dimsizes.size());
156 if(ndims == 0)
157 throw InternalErr(__FILE__, __LINE__, "Currently we only support array numeric data in the cache, the number of dimension for this file is 0");
158
159
160 offset.resize(ndims);
161 count.resize(ndims);
162 step.resize(ndims);
163 int nelms = format_constraint (offset.data(), step.data(), count.data());
164
165 // set the original position to the starting point
166 vector<size_t>pos(ndims,0);
167 for (int i = 0; i< ndims; i++)
168 pos[i] = offset[i];
169
170
171 switch (h5type) {
172
173 case H5UCHAR:
174
175 {
176 vector<unsigned char> val;
177 subset<unsigned char>(
178 buf,
179 ndims,
180 h5_dimsizes,
181 offset.data(),
182 step.data(),
183 count.data(),
184 &val,
185 pos,
186 0
187 );
188
189 set_value ((dods_byte *) val.data(), nelms);
190 } // case H5UCHAR
191 break;
192
193 case H5CHAR:
194 {
195
196 vector<char>val;
197 subset<char>(
198 buf,
199 ndims,
200 h5_dimsizes,
201 offset.data(),
202 step.data(),
203 count.data(),
204 &val,
205 pos,
206 0
207 );
208
209 if(false == is_dap4) {
210
211 vector<short>newval;
212 newval.resize(nelms);
213
214 for (int counter = 0; counter < nelms; counter++)
215 newval[counter] = (short) (val[counter]);
216 set_value ((dods_int16 *) val.data(), nelms);
217 }
218 else
219 set_value ((dods_int8 *) val.data(), nelms);
220
221
222 } // case H5CHAR
223 break;
224
225 case H5INT16:
226 {
227 vector<short> val;
228 subset<short>(
229 buf,
230 ndims,
231 h5_dimsizes,
232 offset.data(),
233 step.data(),
234 count.data(),
235 &val,
236 pos,
237 0
238 );
239
240
241 set_value ((dods_int16 *) val.data(), nelms);
242 }// H5INT16
243 break;
244
245
246 case H5UINT16:
247 {
248 vector<unsigned short> val;
249 subset<unsigned short>(
250 buf,
251 ndims,
252 h5_dimsizes,
253 offset.data(),
254 step.data(),
255 count.data(),
256 &val,
257 pos,
258 0
259 );
260
261
262 set_value ((dods_uint16 *) val.data(), nelms);
263 } // H5UINT16
264 break;
265
266 case H5INT32:
267 {
268 vector<int>val;
269 subset<int>(
270 buf,
271 ndims,
272 h5_dimsizes,
273 offset.data(),
274 step.data(),
275 count.data(),
276 &val,
277 pos,
278 0
279 );
280
281 set_value ((dods_int32 *) val.data(), nelms);
282 } // case H5INT32
283 break;
284
285 case H5UINT32:
286 {
287 vector<unsigned int>val;
288 subset<unsigned int>(
289 buf,
290 ndims,
291 h5_dimsizes,
292 offset.data(),
293 step.data(),
294 count.data(),
295 &val,
296 pos,
297 0
298 );
299
300 set_value ((dods_uint32 *) val.data(), nelms);
301 }
302 break;
303 // Add the code for the CF option DAP4 support
304 // For the CF option DAP2 support, the code will
305 // not come here since 64-integer will be ignored
306 // in DAP2.
307 case H5INT64:
308 {
309 vector<long long>val;
310 subset<long long>(
311 buf,
312 ndims,
313 h5_dimsizes,
314 offset.data(),
315 step.data(),
316 count.data(),
317 &val,
318 pos,
319 0
320 );
321
322 set_value ((dods_int64 *) val.data(), nelms);
323 } // case H5INT64
324 break;
325
326 case H5UINT64:
327 {
328 vector<unsigned long long>val;
329 subset<unsigned long long>(
330 buf,
331 ndims,
332 h5_dimsizes,
333 offset.data(),
334 step.data(),
335 count.data(),
336 &val,
337 pos,
338 0
339 );
340
341 set_value ((dods_uint64 *) val.data(), nelms);
342 }
343 break;
344
345
346 case H5FLOAT32:
347 {
348 vector<float>val;
349 subset<float>(
350 buf,
351 ndims,
352 h5_dimsizes,
353 offset.data(),
354 step.data(),
355 count.data(),
356 &val,
357 pos,
358 0
359 );
360 set_value ((dods_float32 *) val.data(), nelms);
361 }
362 break;
363
364
365 case H5FLOAT64:
366 {
367
368 vector<double>val;
369 subset<double>(
370 buf,
371 ndims,
372 h5_dimsizes,
373 offset.data(),
374 step.data(),
375 count.data(),
376 &val,
377 pos,
378 0
379 );
380 set_value ((dods_float64 *) val.data(), nelms);
381 } // case H5FLOAT64
382 break;
383
384 default:
385 throw InternalErr(__FILE__,__LINE__,"Non-supported datatype");
386
387 }
388}
389
391//
392// \param input Input variable
393// \param dim dimension info of the input
394// \param start start indexes of each dim
395// \param stride stride of each dim
396// \param edge count of each dim
397// \param poutput output variable
398// \parrm index dimension index
399// \return 0 if successful. -1 otherwise.
400//
401template<typename T>
403 void* input,
404 int rank,
405 const vector<size_t> & dim,
406 int start[],
407 int stride[],
408 int edge[],
409 vector<T> *poutput,
410 vector<size_t>& pos,
411 int index)
412{
413 for(int k=0; k<edge[index]; k++)
414 {
415 pos[index] = start[index] + k*stride[index];
416 if(index+1<rank)
417 subset(input, rank, dim, start, stride, edge, poutput,pos,index+1);
418 if(index==rank-1)
419 {
420 size_t cur_pos = INDEX_nD_TO_1D( dim, pos);
421 void* tempbuf = (void*)((char*)input+cur_pos*sizeof(T));
422 poutput->push_back(*(static_cast<T*>(tempbuf)));
423 //"poutput->push_back(input[HDF5CFUtil::INDEX_nD_TO_1D( dim, pos)]);"
424 }
425 } // end of for
426 return 0;
427} // end of template<typename T> static int subset
428
429size_t HDF5BaseArray::INDEX_nD_TO_1D (const std::vector < size_t > &dims,
430 const std::vector < size_t > &pos) const {
431 //
432 // "int a[10][20][30] // & a[1][2][3] == a + (20*30+1 + 30*2 + 1 *3)"
433 // "int b[10][2] // &b[1][1] == b + (2*1 + 1)"
434 //
435 if(dims.size () != pos.size ())
436 throw InternalErr(__FILE__,__LINE__,"dimension error in INDEX_nD_TO_1D routine.");
437 size_t sum = 0;
438 size_t start = 1;
439
440 for (size_t p = 0; p < pos.size (); p++) {
441 size_t m = 1;
442
443 for (size_t j = start; j < dims.size (); j++)
444 m *= dims[j];
445 sum += m * pos[p];
446 start++;
447 }
448 return sum;
449}
450
451// This routine will check if any section(separated by sep) of string cur_str is inside the vector str_list.
452// The first found string will be returned or empty string will return if not found in the whole cur_str.
453string HDF5BaseArray::
454check_str_sect_in_list(const vector<string>&str_list, const string &cur_str,const char sep) const {
455
456 string ret_str;
457 string::size_type start = 0;
458 string::size_type end = 0;
459 // Obtain the ret_str value
460 // The cur_str will be chopped into tokens separated by sep.
461 while ((end = cur_str.find(sep, start)) != string::npos) {
462 if(std::find(str_list.begin(),str_list.end(),cur_str.substr(start,end-start))!=
463 str_list.end()) {
464 ret_str = cur_str.substr(start,end-start);
465 break;
466 }
467 start = end + 1;
468 }
469
470 // We will not include the last sect (rightmost sect) of cur_str.
471#if 0
472 //if(ret_str != "") {
473 // if(ret_str == cur_str.substr(cur_str.find_last_of(sep)+1))
474 // ret_str ="";
475 //}
476 //
477#endif
478
479 return ret_str;
480
481}
482
483// This routine will check if there is any sub-string of the fullpath(fname+varname) that is exactly the subset of the fullpath with the same ending
484// of the fullpath is contained in the slist.
485// Examples: slist contains { /foo1/foovar foovar2 } fname is /temp/myfile/foo1/ varname is foovar. The rotuine will return true.
486// fname is /myfile/foo2/ varname is foovar. The routine will return false.
487bool HDF5BaseArray::
488check_var_cache_files(const vector<string>&slist, const string &fname,const string &varname) const {
489
490 bool ret_value = false;
491 if(fname=="" || varname=="")
492 return ret_value;
493
494 string fullpath;
495
496 if(fname[fname.size()-1] == '/') {
497 if(varname[0]!='/')
498 fullpath = fname+varname;
499 else
500 fullpath = fname.substr(0,fname.size()-1)+varname;
501 }
502 else {
503 if(varname[0]!='/')
504 fullpath = fname+'/'+varname;
505 else
506 fullpath = fname+varname;
507 }
508
509
510 for(unsigned int i = 0; i<slist.size();i++) {
511#if 0
512//cerr<<"fullpath is "<<fullpath <<endl;
513//cerr<<"slist[i] is "<<slist[i] <<endl;
514//cerr<<"fullpath - slist size"<<fullpath.size() -slist[i].size()<<endl;
515//cerr<<"fullpath.rfind(slist[i] is "<<fullpath.rfind(slist[i]) <<endl;
516#endif
517 if(fullpath.rfind(slist[i])==(fullpath.size()-slist[i].size())){
518 ret_value = true;
519 break;
520 }
521 }
522 return ret_value;
523}
524
525// Handle data when memory cache is turned on.
526void HDF5BaseArray::
527handle_data_with_mem_cache(H5DataType h5_dtype, size_t total_elems,const short cache_flag, const string & cache_key, const bool is_dap4) {
528
529 //
530 ObjMemCache * mem_data_cache= nullptr;
531 if(1 == cache_flag)
532 mem_data_cache = HDF5RequestHandler::get_srdata_mem_cache();
533 else if(cache_flag > 1) {
534 mem_data_cache = HDF5RequestHandler::get_lrdata_mem_cache();
535
536#if 0
537//cerr<<"coming to the large metadata cache "<<endl;
538//cerr<<"The cache key is "<<cache_key <<endl;
539
540// dump the values in the cache,keep this line to check if memory cache works.
541//mem_data_cache->dump(cerr);
542#endif
543
544 }
545
546
547 if(mem_data_cache == nullptr)
548 throw InternalErr(__FILE__,__LINE__,"The memory data cache should NOT be nullptr.");
549
550 auto mem_cache_ptr = static_cast<HDF5DataMemCache*>(mem_data_cache->get(cache_key));
551 if(mem_cache_ptr) {
552
553 BESDEBUG("h5","Cache flag: 1 small data cache, 2 large data cache genenral"
554 <<" 3 large data cache common dir, 4 large data cache real var" <<endl);
555
556 BESDEBUG("h5","Data Memory Cache hit, the variable name is "<<name() <<". The cache flag is "<< cache_flag<<endl);
557
558#if 0
559 //const string var_name = mem_cache_ptr->get_varname();
560#endif
561
562 // Obtain the buffer and do subsetting
563 const size_t var_size = mem_cache_ptr->get_var_buf_size();
564 if(!var_size)
565 throw InternalErr(__FILE__,__LINE__,"The cached data buffer size is 0.");
566 else {
567
568 void *buf = mem_cache_ptr->get_var_buf();
569
570 // Obtain dimension size info.
571 vector<size_t> dim_sizes;
572 Dim_iter i_dim = dim_begin();
573 Dim_iter i_enddim = dim_end();
574 while (i_dim != i_enddim) {
575 dim_sizes.push_back(dimension_size(i_dim));
576 ++i_dim;
577 }
578 // read data from the memory cache
579 read_data_from_mem_cache(h5_dtype,dim_sizes,buf,is_dap4);
580 }
581 }
582 else{
583
584 BESDEBUG("h5","Cache flag: 1 small data cache, 2 large data cache genenral"
585 <<" 3 large data cache common dir, 4 large data cache real var" <<endl);
586
587 BESDEBUG("h5","Data Memory added to the cache, the variable name is "<<name() <<". The cache flag is "<< cache_flag<<endl);
588
589 vector <char> buf;
590 if(total_elems == 0)
591 throw InternalErr(__FILE__,__LINE__,"The total number of elements is 0.");
592
593 buf.resize(total_elems*HDF5CFUtil::H5_numeric_atomic_type_size(h5_dtype));
594
595 // This routine will read the data, send it to the DAP and save the buf to the cache.
596 read_data_NOT_from_mem_cache(true,buf.data());
597
598 // Create a new cache element.
599#if 0
600 //HDF5DataMemCache* new_mem_cache = new HDF5DataMemCache(varname);
601#endif
602 auto new_mem_cache_ele = new HDF5DataMemCache();
603 new_mem_cache_ele->set_databuf(buf);
604
605 // Add this entry to the cache list
606 mem_data_cache->add(new_mem_cache_ele, cache_key);
607 }
608
609 return;
610}
611
612BaseType* HDF5BaseArray::h5cfdims_transform_to_dap4(D4Group *grp) {
613
614 if(grp == nullptr)
615 return nullptr;
616 Array *dest = static_cast<HDF5BaseArray*>(ptr_duplicate());
617
618 // If there is just a size, don't make
619 // a D4Dimension (In DAP4 you cannot share a dimension unless it has
620 // a name). jhrg 3/18/14
621
622 D4Dimensions *grp_dims = grp->dims();
623 for (Array::Dim_iter dap2_dim = dest->dim_begin(), e = dest->dim_end(); dap2_dim != e; ++dap2_dim) {
624 if (!(*dap2_dim).name.empty()) {
625
626 // If a D4Dimension with the name already exists, use it.
627 D4Dimension *d4_dim = grp_dims->find_dim((*dap2_dim).name);
628 if (!d4_dim) {
629 d4_dim = new D4Dimension((*dap2_dim).name, (*dap2_dim).size);
630 grp_dims->add_dim_nocopy(d4_dim);
631 }
632 // At this point d4_dim's name and size == those of (*d) so just set
633 // the D4Dimension pointer so it matches the one in the D4Group.
634 (*dap2_dim).dim = d4_dim;
635 }
636 }
637
638 return dest;
639
640}
641
642
643
A helper class that aims to reduce code redundence for different special CF derived array class For e...
include the entry functions to execute the handlers
int subset(void *input, int rank, const std::vector< size_t > &dim, int start[], int stride[], int edge[], std::vector< T > *poutput, std::vector< size_t > &pos, int index)
Getting a subset of a variable.
An in-memory cache for DapObj (DAS, DDS, ...) objects.
Definition: ObjMemCache.h:84
virtual void add(libdap::DapObj *obj, const std::string &key)
Add an object to the cache and associate it with a key.
Definition: ObjMemCache.cc:63
virtual libdap::DapObj * get(const std::string &key)
Get the cached pointer.
Definition: ObjMemCache.cc:105