bes Updated for version 3.20.10
check_dmrpp.cc
1#include <iostream>
2#include<fstream>
3#include <string>
4#include <vector>
5using namespace std;
6
7bool find_var(const string &str, const vector<string>var_type_list,
8 vector<string>&var_type,vector<string>&var_name);
9bool find_endvar(const string &str,const string vtype);
10bool find_chunk(const string &str);
11
12int main (int argc, char** argv)
13{
14 // Provide the dmrpp file name and the file name to store the variables that miss values
15 if(argc !=3) {
16 cout<<"Please provide the dmrpp file name to be checked and the output name."<<endl;
17 return -1;
18 }
19
20 string fname(argv[1]);
21 ifstream dmrpp_fstream;
22 dmrpp_fstream.open(fname.c_str(),ifstream::in);
23 string dmrpp_line;
24
25 // DAP4 supported atomic datatype
26 vector<string> var_type_list;
27 var_type_list.push_back("Float32");
28 var_type_list.push_back("Int32");
29 var_type_list.push_back("Float64");
30 var_type_list.push_back("Byte");
31 var_type_list.push_back("Int16");
32 var_type_list.push_back("UInt16");
33 var_type_list.push_back("String");
34 var_type_list.push_back("UInt32");
35 var_type_list.push_back("Int8");
36 var_type_list.push_back("Int64");
37 var_type_list.push_back("UInt64");
38 var_type_list.push_back("UInt8");
39 var_type_list.push_back("Char");
40
41 // var_type and var_name should be var data type and var name in the dmrpp file
42 vector<string>var_type;
43 vector<string>var_name;
44
45 //The vector to check if chunk block inside this var block(<var ..> </var>)
46 vector<bool>chunk_exist;
47
48 // The following flags are used to check the variables that miss the values.
49 // In a dmrpp file, an example of variable block may start from
50 // <Float32 name="temperature"> and end with </Float32>
51 // fin_vb_start: flag to find the start of the var block
52 // fin_vb_end: flag to find the end of the var block
53 // chunk_found: flag to find is chunking information is inside the var block
54 bool fin_vb_start = false;
55 bool fin_vb_end = false;
56 bool chunk_found = false;
57
58 // Check every line of the dmrpp file. This will use less memory.
59 while(getline(dmrpp_fstream,dmrpp_line)) {
60
61 // If we find the start of the var block(<var..>)
62 if(true == fin_vb_start) {
63
64 // var data type must exist.
65 if(var_type.empty()) {
66 cout<<"Doesn't have the variable datatype, abort for dmrpp file "<<fname << endl;
67 return -1;
68 }
69 // Not find the end of var block. try to find it.
70 if(false == fin_vb_end)
71 fin_vb_end = find_endvar(dmrpp_line, var_type[var_type.size()-1]);
72
73 // If find the end of var block, check if the chunk is already found in the var block.
74 if(true == fin_vb_end) {
75 if(false == chunk_found)
76 chunk_exist.push_back(false);
77
78 // If we find the end of this var block,
79 // reset all bools for the next variable.
80 fin_vb_start = false;
81 fin_vb_end = false;
82 chunk_found = false;
83 }
84 else {// Check if having chunks within this var block.
85 if(false == chunk_found) {
86 chunk_found = find_chunk(dmrpp_line);
87 // When finding the chunk info, update the chunk_exist vector.
88 if(true == chunk_found)
89 chunk_exist.push_back(true);
90 }
91 }
92 }
93 else // Continue finding the var block
94 fin_vb_start = find_var(dmrpp_line,var_type_list,var_type,var_name);
95
96 }
97
98 //Sanity check to make sure the chunk_exist vector is the same as var_type vector.
99 //If not, something is wrong with this dmrpp file.
100 if(chunk_exist.size()!=var_type.size()) {
101 cout<<"Number of chunk check is not consistent with the number of var check."<<endl;
102 cout<< "The dmrpp file is "<<fname<<endl;
103 return -1;
104 }
105
106#if 0
107for(size_t i = 0; i<var_type.size(); i++)
108cout<<"var_type["<<i<<"]= "<<var_type[i]<<endl;
109for(size_t i = 0; i<var_name.size(); i++) {
110cout<<"var_name["<<i<<"]= "<<var_name[i]<<endl;
111cout<<"chunk_exist["<<i<<"]= "<<chunk_exist[i]<<endl;
112}
113#endif
114
115 bool has_missing_info = false;
116 size_t last_missing_chunk_index = 0;
117
118 // Check if there are any missing variable information.
119 if (!var_type.empty()) {
120 auto ritr = var_type.rbegin();
121 size_t i = var_type.size() - 1;
122 while(ritr != var_type.rend()) {
123 if (!chunk_exist[i]) {
124 has_missing_info = true;
125 last_missing_chunk_index = i;
126 break;
127 }
128 ritr++;
129 i--;
130 }
131 }
132
133#if 0
134 size_t j = 0;
135 for (size_t i =0;i<var_type.size();i++) {
136 if(false == chunk_exist[i]){
137 j++;
138 if(j == 1)
139 cout<<"The following variables don't have data value information(datatype + data name): "<<endl;
140 cout<< var_type[i] <<" "<<var_name[i] <<endl;
141 }
142 }
143#endif
144
145 // Report the final output.
146 if(true == has_missing_info) {
147
148 ofstream dmrpp_ofstream;
149 string fname2(argv[2]);
150 dmrpp_ofstream.open(fname2.c_str(),ofstream::out);
151
152 size_t i = 0;
153 for (auto vt:var_type) {
154 if(!chunk_exist[i]) {
155 if (i!=last_missing_chunk_index)
156 dmrpp_ofstream<<var_name[i] <<",";
157 else
158 dmrpp_ofstream<<var_name[i];
159 }
160 i++;
161 }
162
163 dmrpp_ofstream.close();
164 }
165
166
167 return 0;
168
169}
170
171// Find the the var type and var name like <Int16 name="foo">
172bool find_var(const string &str, const vector<string>var_type_list,
173 vector<string>&var_type,vector<string>&var_name) {
174
175 bool ret = false;
176 //if(str[0]=='\n' || str[0]!=' '){
177 // Every var block will have spaces before <
178 if(str[0]!=' '){
179 return ret;
180 }
181
182 // Ignore the line with all spaces
183 size_t non_space_char_pos = str.find_first_not_of(' ');
184 if(non_space_char_pos == string::npos){
185 return ret;
186 }
187
188 // The first non-space character should be '<'
189 if(str[non_space_char_pos]!='<') {
190 return ret;
191 }
192
193 // After space, must at least contain '<','>'
194 if(str.size() <= (non_space_char_pos+1)){
195 return ret;
196 }
197
198 // The last character must be '>', maybe this is too strict.
199 // We will see.
200 if(str[str.size()-1]!='>' ) {
201 return ret;
202 }
203
204 // char_2 is a character right after<
205 char char_2 = str[non_space_char_pos+1];
206
207 // The first var character must be one of the list.
208 // The following list includes the first character
209 // of all possible variable types.
210 string v_1char_list = "FIUBSC";
211
212 // If the first character is not one of DAP type,ignore.
213 if(v_1char_list.find_first_of(char_2)==string::npos) {
214 return ret;
215 }
216
217 // Find ' name="' and the position after non_space_char_pos+1, like <Int16 name="d16_1">
218 string sep=" name=\"";
219 size_t sep_pos = str.find(sep,non_space_char_pos+2);
220
221 // Cannot find "name=..", ignore this line.
222 if(sep_pos == string::npos){
223 return ret;
224 }
225
226 // Try to figure out the variable type.
227 size_t var_index = -1;
228 bool found = false;
229 for (size_t i = 0; i<var_type_list.size() && !found ;i++) {
230 if(str.compare(non_space_char_pos+1,sep_pos-non_space_char_pos-1,var_type_list[i]) == 0) {
231 var_index = i;
232 found = true;
233 }
234 }
235
236 // If cannot find the supported type, ignore this line.
237 if(!found) {
238 return ret;
239 }
240
241 // Find the end quote position of the variable name.
242 char end_quote='"';
243 size_t end_name_pos = str.find(end_quote,sep_pos+sep.size()+1);
244 if(end_name_pos == string::npos)
245 ret = false;
246 else {
247 // Find both var type and var name. Store them in the vector
248 string var_name_line = str.substr(sep_pos+sep.size(),end_name_pos-sep_pos-sep.size());
249 var_type.push_back(var_type_list[var_index]);
250 var_name.push_back(var_name_line);
251 ret = true;
252 }
253 return ret;
254}
255
256// Find whether there are chunks inside the var block.
257// Any chunk info(chunk or contiguous) should include
258// "<dmrpp:chunk " and "offset".
259bool find_chunk(const string &str) {
260 bool ret = false;
261 string chunk_mark = "<dmrpp:chunk ";
262 string offset_mark = "offset";
263 size_t chunk_mark_pos = str.find(chunk_mark);
264 if(chunk_mark_pos !=string::npos) {
265 if(string::npos != str.find(offset_mark, chunk_mark_pos+chunk_mark.size()))
266 ret = true;
267 }
268 return ret;
269}
270
271// Find the end of var block such as </Int32>
272// There may be space before </Int32>
273bool find_endvar(const string &str, const string vtype) {
274 bool ret = false;
275 string end_var = "</" + vtype + '>';
276 size_t vb_end_pos = str.find(end_var);
277 if(vb_end_pos !=string::npos) {
278 if((vb_end_pos + end_var.size())==str.size())
279 ret = true;
280 }
281 return ret;
282}
283
284
285
286
287
288
289
290
291
292
293
294