12bool obtain_var_info(
const string & miss_dmrpp_info,vector<string> & var_types, vector<string>&var_names,vector<string>&chunk_info_list,
bool & is_chunk_mark1);
13bool find_var_name(
const string &str,
size_t &str_pos,
size_t &var_name_pos_start,
size_t &var_name_pos_end);
14bool find_end_var_block(
const string&str,
const string&var_type,
const size_t &str_pos,
size_t &var_end_pos);
15bool find_chunk_info(
const string &str,
const size_t&str_pos,
size_t &chunk_info_pos_start,
size_t &chunk_info_pos_end,
const size_t&var_end_pos,
bool & is_mark1);
18bool add_faddr_chunk_info(
const string& miss_dmrpp_info,vector<string>&chunk_info_list,
bool is_dmrpp_mark1,
const string faddr_source =
"");
19bool add_faddr_contig_line(
string &chunk_info,
const string &file_addr);
20bool add_faddr_chunk_comp_lines(
string & chunk_info,
const string &file_addr);
23bool add_missing_info_to_file(
const string &fname2,
const vector<string> &var_types,
const vector<string> &var_names,
const vector<string> &chunk_info_list);
24void gen_block(
const vector<string>&var_type_list,
const vector<string>&var_name_list,vector<string>&block_begin,vector<string>&block_end);
25bool check_overlap_intervals(
const vector<size_t> &sort_block_pos,
const vector<size_t> & block_pos_start);
26void obtain_bindex_in_modified_string(
const vector<size_t>& block_pos_start, vector<int>& block_index);
27bool split_string(
const string & str, vector<string> &str_vec,
const vector<string> &block_begin,
const vector<string> &block_end,vector<int> &block_index);
28bool convert_dmrppstr_to_vec(
const string &dmrpp_str,vector<string> &dmrpp_str_vec,
const vector<string> &var_types,
const vector<string> &var_names,vector<int> & block_index);
29void add_missing_info_to_vec(vector<string> &dmrpp_str_vec,
const vector<string> &chunk_info_list,
const vector<int>&block_index);
30void write_vec_to_file(
const string &fname,
const vector<string> &dmrpp_str_vec);
33void file_to_string(
const string &filename,
string & out);
34void string_tokenize(
const string &in_str,
const char delim,vector<string>&out_vec);
37int main (
int argc,
char**argv)
40 vector<string>var_types;
41 vector<string>var_names;
42 vector<string>chunk_info_list;
44 bool add_dmrpp_info =
false;
45 bool is_chunk_mark1 =
true;
47 string missing_dmrpp_str;
50 cout<<
"Please provide four arguments: "<< endl;
51 cout<<
" The first is the dmrpp file that contains the missing variable value information. "<<endl;
52 cout<<
" The second is the original dmrpp file. "<<endl;
53 cout<<
" An third one is the href to the missing variables HDF5 file. "<<endl;
54 cout<<
" The fourth one is the text file that includes the missing variable information. "<<endl;
59 string fname(argv[1]);
62 file_to_string(fname,missing_dmrpp_str);
65 add_dmrpp_info = obtain_var_info(missing_dmrpp_str,var_types,var_names,chunk_info_list,is_chunk_mark1);
68 if(
false == add_dmrpp_info) {
69 cout<<
"Cannot find corresponding chunk info. from the supplemental dmrpp file."<<endl;
70 cout<<
"You may need to check if there is any variable in the dmrpp file. "<<endl;
71 cout<<
"The dmrpp file is "<<fname <<endl;
74 if(var_types.size() !=var_names.size() || var_names.size() != chunk_info_list.size()) {
75 cout <<
"Var type, var name and chunk_info must have the same number of sizes. "<<endl;
76 cout <<
"The dmrpp file is "<<fname <<endl;
82 for (
size_t i =0; i<var_names.size();i++) {
91 string mvar_fname(argv[4]);
92 string missing_vname_str;
95 file_to_string(mvar_fname,missing_vname_str);
97 vector<string> missing_vname_list;
99 string_tokenize(missing_vname_str,delim,missing_vname_list);
102 for(
size_t i = 0;i<missing_vname_list.size();i++)
103 cout <<
"missing_vname_list["<<i<<
"]"<<missing_vname_list[i]<<endl;
107 vector<string>new_var_types;
108 vector<string>new_var_names;
109 vector<string>new_chunk_info_list;
111 for (
size_t i =0; i<var_names.size();i++) {
112 for(
size_t j = 0; j<missing_vname_list.size();j++) {
113 if(var_names[i] == missing_vname_list[j]) {
114 new_var_names.push_back(var_names[i]);
115 new_var_types.push_back(var_types[i]);
116 new_chunk_info_list.push_back(chunk_info_list[i]);
123 string fadd_source(argv[3]);
124 add_faddr_chunk_info(missing_dmrpp_str,new_chunk_info_list,is_chunk_mark1,fadd_source);
127for (
size_t i =0; i<new_var_types.size();i++) {
128cout<<
"new chunk_info_list["<<i<<
"]"<< endl;
129cout<<new_chunk_info_list[i]<<endl;
134 string fname2(argv[2]);
137 bool well_formed = add_missing_info_to_file(fname2,new_var_types,new_var_names,new_chunk_info_list);
139 if(
false == well_formed) {
140 cout <<
"The dmrpp file to be modified is either not well-formed or contains nested variable blocks that cannot be supported by this routine" <<endl;
141 cout <<
"The dmrpp file is "<<fname2<<endl;
150bool obtain_var_info(
const string & miss_dmrpp_info,vector<string> & var_types, vector<string>&var_names,vector<string>&chunk_info_list,
bool & is_chunk_mark1) {
153 vector<string> var_type_list;
154 var_type_list.push_back(
"Float32");
155 var_type_list.push_back(
"Int32");
156 var_type_list.push_back(
"Float64");
157 var_type_list.push_back(
"Byte");
158 var_type_list.push_back(
"Int16");
159 var_type_list.push_back(
"UInt16");
160 var_type_list.push_back(
"String");
161 var_type_list.push_back(
"UInt32");
162 var_type_list.push_back(
"Int8");
163 var_type_list.push_back(
"Int64");
164 var_type_list.push_back(
"UInt64");
165 var_type_list.push_back(
"UInt8");
166 var_type_list.push_back(
"Char");
168 size_t var_type_pos_start =0;
169 size_t var_name_pos_start = 0;
170 size_t var_name_pos_end = 0;
171 size_t chunk_pos_start = 0;
172 size_t chunk_pos_end = 0;
173 size_t var_end_pos= 0;
177 if(miss_dmrpp_info.empty())
180 size_t str_last_char_pos = miss_dmrpp_info.size()-1;
181 bool well_formed =
true;
184 while (str_pos <=str_last_char_pos && well_formed) {
188 string temp_var_sign;
189 size_t temp_var_type_pos_start=string::npos;
190 int var_type_index = -1;
195 while(i <var_type_list.size()) {
196 var_sign =
"<"+var_type_list[i]+
" name=\"";
197 var_type_pos_start = miss_dmrpp_info.find(var_sign,str_pos);
198 if(var_type_pos_start ==string::npos) {
204 if(temp_var_type_pos_start>var_type_pos_start){
205 temp_var_type_pos_start = var_type_pos_start;
207 temp_var_sign = var_sign;
215 if(temp_var_type_pos_start !=string::npos) {
216 var_type_pos_start = temp_var_type_pos_start;
217 var_sign = temp_var_sign;
222 if(var_type_pos_start == string::npos) {
223 str_pos = string::npos;
227 str_pos = var_type_pos_start+var_sign.size();
231 if(
false == find_var_name(miss_dmrpp_info,str_pos,var_name_pos_start,var_name_pos_end))
233 else if(
false == find_end_var_block(miss_dmrpp_info,var_type_list[var_type_index],str_pos,var_end_pos))
235 else if(
false == find_chunk_info(miss_dmrpp_info,str_pos,chunk_pos_start,chunk_pos_end,var_end_pos,is_chunk_mark1))
239 str_pos = var_end_pos+1;
241 var_types.push_back(var_type_list[var_type_index]);
242 var_names.push_back(miss_dmrpp_info.substr(var_name_pos_start,var_name_pos_end-var_name_pos_start));
243 string temp_chunk_info = miss_dmrpp_info.substr(chunk_pos_start,chunk_pos_end-chunk_pos_start);
244 if(
true == is_chunk_mark1)
245 temp_chunk_info +=
"</dmrpp:chunks>";
247 temp_chunk_info +=
"/>";
248 chunk_info_list.push_back(temp_chunk_info);
258bool find_var_name(
const string &str,
size_t &str_pos,
size_t &var_name_pos_start,
size_t &var_name_pos_end) {
261 var_name_pos_start = str_pos;
262 var_name_pos_end = str.find(
"\"",str_pos);
263 if(var_name_pos_end == string::npos)
266 str_pos = var_name_pos_end;
269cout<<
"cannot find var name"<<endl;
276bool find_end_var_block(
const string&str,
const string&var_type,
const size_t &str_pos,
size_t &var_end_pos) {
278 string end_var =
"</" + var_type +
'>';
279 var_end_pos = str.find(end_var,str_pos);
281if(var_end_pos==string::npos)
282cout<<
"cannot find end var block"<<endl;
284 return !(var_end_pos==string::npos);
289bool find_chunk_info(
const string &str,
const size_t&str_pos,
size_t &chunk_info_pos_start,
size_t &chunk_info_pos_end,
const size_t&var_end_pos,
bool & is_mark1){
292 string chunk_start_mark1 =
"<dmrpp:chunks";
293 string chunk_end_mark1 =
"</dmrpp:chunks>";
294 string chunk_start_mark2 =
"<dmrpp:chunk ";
295 string chunk_end_mark2 =
"/>";
299cout<<
"str_pos is "<<str_pos <<endl;
300cout<<
"var_end_pos is "<<var_end_pos <<endl;
301cout<<
"substr is "<<str.substr(str_pos,var_end_pos-str_pos)<<endl;
303 chunk_info_pos_start = str.find(chunk_start_mark1,str_pos);
305 if(string::npos == chunk_info_pos_start) {
307 chunk_info_pos_start = str.find(chunk_start_mark2,str_pos);
308 if(string::npos != chunk_info_pos_start)
309 chunk_info_pos_end =str.find(chunk_end_mark2,str_pos);
313 chunk_info_pos_start = str.find_last_not_of(wspace,chunk_info_pos_start-1)+1;
318 chunk_info_pos_start = str.find_last_not_of(wspace,chunk_info_pos_start-1)+1;
319 chunk_info_pos_end = str.find(chunk_end_mark1,str_pos);
323 if(string::npos == chunk_info_pos_start || string::npos== chunk_info_pos_end)
325 else if(var_end_pos <=chunk_info_pos_end)
329cout<<
"cannot find_chunk_info "<<endl;
336bool add_faddr_chunk_info(
const string &str,vector<string>& chunk_info,
bool is_dmrpp_mark1,
const string faddr_source) {
338 bool well_formed=
true;
339 if(chunk_info.size()==0)
341 string addr_mark =
"dmrpp:href=\"";
346 if(chunk_info[0].find(addr_mark)!=string::npos)
352 string name_mark =
" name=\"";
353 string end_delim1 =
"\"";
356 size_t hdf5_fname_start_pos = str.find(name_mark);
357 if(hdf5_fname_start_pos == string::npos)
359 size_t hdf5_fname_end_pos = str.find(end_delim1,hdf5_fname_start_pos+name_mark.size());
360 if(hdf5_fname_end_pos == string::npos)
362 hdf5_fname = str.substr(hdf5_fname_start_pos+name_mark.size(),hdf5_fname_end_pos-hdf5_fname_start_pos-name_mark.size());
367 size_t hdf5_faddr_start_pos = str.find(addr_mark);
368 if(hdf5_faddr_start_pos != string::npos) {
369 size_t hdf5_faddr_end_pos = str.find(end_delim1,hdf5_faddr_start_pos+addr_mark.size());
370 if(hdf5_faddr_end_pos == string::npos)
372 hdf5_faddr = str.substr(hdf5_faddr_start_pos+addr_mark.size(),hdf5_faddr_end_pos-hdf5_faddr_start_pos-addr_mark.size());
376 hdf5_faddr =
" href=\"" + faddr_source + end_delim1;
388 for (
size_t i = 0;i<chunk_info.size();i++) {
395 if(
true == is_dmrpp_mark1)
396 add_faddr_chunk_comp_lines(chunk_info[i],hdf5_faddr);
398 add_faddr_contig_line(chunk_info[i],hdf5_faddr);
406bool add_faddr_chunk_comp_lines(
string & chunk_info,
const string &file_addr) {
408 string chunk_line_mark =
"<dmrpp:chunk offset=";
409 string chunk_line_end_mark =
"/>";
410 string chunk_stop_mark =
"</dmrpp:chunks>";
413 size_t chunk_line_end_pos = 0;
414 bool loop_continue =
true;
416 bool well_formed =
true;
417 bool find_chunk_line =
false;
420 while(
true == loop_continue) {
421 temp_pos = chunk_info.find(chunk_line_mark,str_pos);
422 if(temp_pos != string::npos) {
423 chunk_line_end_pos = chunk_info.find(chunk_line_end_mark,temp_pos);
424 if(chunk_line_end_pos != string::npos) {
425 find_chunk_line =
true;
426 temp_str += chunk_info.substr(str_pos,chunk_line_end_pos-str_pos);
427 temp_str += file_addr;
428 str_pos = chunk_line_end_pos;
431 loop_continue =
false;
436 temp_pos = chunk_info.find(chunk_stop_mark,str_pos);
437 loop_continue =
false;
440 if(temp_pos!=string::npos)
441 temp_str +=
' '+chunk_info.substr(str_pos);
446 if(
true == find_chunk_line)
447 chunk_info = temp_str;
455bool add_faddr_contig_line(
string &chunk_info,
const string &file_addr) {
457 bool well_formed =
true;
458 string chunk_line_start_mark =
"<dmrpp::chunk nBytes=";
459 string chunk_line_end_mark =
"/>";
464 size_t chunk_line_end_pos = chunk_info.find(chunk_line_end_mark);
465 if(string::npos == chunk_line_end_pos)
468 temp_str = chunk_info.substr(0,chunk_line_end_pos);
469 temp_str +=file_addr;
470 temp_str +=
' ' +chunk_info.substr(chunk_line_end_pos);
471 chunk_info = temp_str;
477bool add_missing_info_to_file(
const string &fname,
const vector<string> &var_types,
const vector<string> &var_names,
const vector<string> &chunk_info_list) {
479 bool well_formed =
true;
483 file_to_string(fname,dmrpp_str);
484 vector<string>dmrpp_str_vec;
485 vector <int> block_index;
491 well_formed = convert_dmrppstr_to_vec(dmrpp_str,dmrpp_str_vec,var_types,var_names,block_index);
494 string().swap(dmrpp_str);
497 if(
true == well_formed) {
498 add_missing_info_to_vec(dmrpp_str_vec,chunk_info_list,block_index);
499 write_vec_to_file(fname,dmrpp_str_vec);
508bool convert_dmrppstr_to_vec(
const string &dmrpp_str,vector<string> &dmrpp_str_vec,
const vector<string> &var_types,
const vector<string> &var_names,vector<int>&block_index){
510 vector<string>block_begin;
511 block_begin.resize(var_types.size());
512 vector<string>block_end;
513 block_end.resize(var_types.size());
514 gen_block(var_types,var_names,block_begin,block_end);
517for(
size_t i =0; i<block_begin.size();i++)
519cout<<
"block_begin["<<i<<
"]= "<<block_begin[i]<<endl;
520cout<<
"block_end["<<i<<
"]= "<<block_end[i]<<endl;
525 bool well_formed = split_string(dmrpp_str,dmrpp_str_vec,block_begin,block_end,block_index);
531void add_missing_info_to_vec(vector<string> &dmrpp_str_vec,
const vector<string> &chunk_info_list,
const vector<int> &block_index) {
533 char insert_mark =
'>';
534 for (
size_t i = 0; i<block_index.size();i++) {
544 temp_str = dmrpp_str_vec[2*i+1];
545 size_t insert_pos = temp_str.find_last_of(insert_mark);
546 insert_pos = temp_str.find_last_of(insert_mark,insert_pos-1);
549 string temp_str2 =
'\n'+chunk_info_list[block_index[i]];
550 temp_str.insert(insert_pos+1,temp_str2);
555 dmrpp_str_vec[2*i+1] = temp_str;
564void write_vec_to_file(
const string &fname,
const vector<string> &dmrpp_str_vec) {
567 for (
size_t i =0;i<dmrpp_str_vec.size();i++)
568 str_to_file +=dmrpp_str_vec[i];
571 outFile.open(fname.c_str());
572 outFile<<str_to_file;
578void gen_block(
const vector<string>&var_type_list,
const vector<string>&var_name_list,vector<string>&block_begin,vector<string>&block_end) {
580 for (
size_t i =0; i<var_type_list.size();i++) {
581 block_begin[i] =
'<' +var_type_list[i] +
' '+
"name=\""+var_name_list[i]+
"\">";
582 block_end[i] =
"</" + var_type_list[i] +
'>';
587bool split_string(
const string & str, vector<string> &str_vec,
const vector<string> &block_begin,
const vector<string>&block_end,vector<int>&block_index) {
589 bool well_formed =
true;
590 vector<size_t> block_begin_pos;
591 vector<size_t> block_end_pos;
592 block_begin_pos.resize(block_begin.size());
593 block_end_pos.resize(block_end.size());
601 for(
size_t i = 0; i<block_begin.size();i++) {
602 block_begin_pos[i] = str.find(block_begin[i]);
603 block_end_pos[i] = str.find(block_end[i],block_begin_pos[i])+(block_end[i].size());
606 obtain_bindex_in_modified_string(block_begin_pos,block_index);
609for(
size_t i = 0; i<block_index.size();i++)
610cout<<
"block_index["<<i<<
"] is: "<<block_index[i] <<endl;
612 vector<size_t>block_pos;
613 block_pos.resize(2*block_begin_pos.size());
614 for (
size_t i = 0; i<block_begin.size();i++) {
615 block_pos[2*i] = block_begin_pos[i];
616 block_pos[2*i+1] = block_end_pos[i];
620 sort(block_pos.begin(),block_pos.end());
625 well_formed = check_overlap_intervals(block_pos,block_begin_pos);
631 if(
true == well_formed) {
632 size_t str_block_pos = 0;
633 str_vec.resize(block_pos.size()+1);
634 for (
size_t i =0; i<block_pos.size(); i++) {
635 str_vec[i] = str.substr(str_block_pos,block_pos[i]-str_block_pos);
636 str_block_pos = block_pos[i];
638 str_vec[block_pos.size()] = str.substr(str_block_pos);
641for(
size_t i = 0; i <str_vec.size();i++)
642 cout<<
"str_vec["<<i<<
"] is: "<<str_vec[i] <<endl;
652bool check_overlap_intervals(
const vector<size_t> &sort_block_pos,
const vector<size_t>&block_pos_start){
655 set<size_t>sort_start_pos;
656 set<size_t>start_pos;
657 for (
size_t i = 0; i<block_pos_start.size();i++) {
658 sort_start_pos.insert(sort_block_pos[2*i]);
659 start_pos.insert(block_pos_start[i]);
661 return (sort_start_pos == start_pos);
678void obtain_bindex_in_modified_string(
const vector<size_t>& block_pos_start, vector<int>& block_index) {
680 vector<pair<size_t,int> > pos_index;
681 for (
size_t i = 0; i <block_pos_start.size();i++)
682 pos_index.push_back(make_pair(block_pos_start[i],i));
685 sort(pos_index.begin(),pos_index.end());
687 for (
size_t i = 0; i <block_pos_start.size();i++)
688 block_index.push_back(pos_index[i].second);
693void file_to_string(
const string &filename,
string &out_str) {
696 inFile.open(filename.c_str());
698 stringstream strStream;
699 strStream << inFile.rdbuf();
702 out_str = strStream.str();
708void string_tokenize(
const string &in_str,
const char delim,vector<string>&out_vec) {
709 stringstream ss_str(in_str);
711 while (getline(ss_str,temp_str,delim)) {
712 out_vec.push_back(temp_str);