bes Updated for version 3.20.13
check_dmrpp.cc
1#include <iostream>
2#include<fstream>
3#include <string>
4#include <vector>
5using namespace std;
6
7bool find_var(const string &str, const vector<string>var_type_list,
8 vector<string>&var_type,vector<string>&var_name);
9bool find_endvar(const string &str,const string vtype);
10bool find_chunk(const string &str);
11
12int main (int argc, char** argv)
13{
14 // Provide the dmrpp file name and the file name to store the variables that miss values
15 if(argc !=3) {
16 cout<<"Please provide the dmrpp file name to be checked and the output name."<<endl;
17 return -1;
18 }
19
20 string fname(argv[1]);
21 ifstream dmrpp_fstream;
22 dmrpp_fstream.open(fname.c_str(),ifstream::in);
23 string dmrpp_line;
24
25 // DAP4 supported atomic datatype
26 vector<string> var_type_list;
27 var_type_list.push_back("Float32");
28 var_type_list.push_back("Int32");
29 var_type_list.push_back("Float64");
30 var_type_list.push_back("Byte");
31 var_type_list.push_back("Int16");
32 var_type_list.push_back("UInt16");
33 var_type_list.push_back("String");
34 var_type_list.push_back("UInt32");
35 var_type_list.push_back("Int8");
36 var_type_list.push_back("Int64");
37 var_type_list.push_back("UInt64");
38 var_type_list.push_back("UInt8");
39 var_type_list.push_back("Char");
40
41 // var_type and var_name should be var data type and var name in the dmrpp file
42 vector<string>var_type;
43 vector<string>var_name;
44
45 //The vector to check if chunk block inside this var block(<var ..> </var>)
46 vector<bool>chunk_exist;
47
48 // The following flags are used to check the variables that miss the values.
49 // In a dmrpp file, an example of variable block may start from
50 // <Float32 name="temperature"> and end with </Float32>
51 // fin_vb_start: flag to find the start of the var block
52 // fin_vb_end: flag to find the end of the var block
53 // chunk_found: flag to find is chunking information is inside the var block
54 bool fin_vb_start = false;
55 bool fin_vb_end = false;
56 bool chunk_found = false;
57
58 // Check every line of the dmrpp file. This will use less memory.
59 while(getline(dmrpp_fstream,dmrpp_line)) {
60
61 // If we find the start of the var block(<var..>)
62 if(true == fin_vb_start) {
63
64 // var data type must exist.
65 if(var_type.empty()) {
66 cout<<"Doesn't have the variable datatype, abort for dmrpp file "<<fname << endl;
67 return -1;
68 }
69 // Not find the end of var block. try to find it.
70 if(false == fin_vb_end)
71 fin_vb_end = find_endvar(dmrpp_line, var_type[var_type.size()-1]);
72
73 // If find the end of var block, check if the chunk is already found in the var block.
74 if(true == fin_vb_end) {
75 if(false == chunk_found)
76 chunk_exist.push_back(false);
77
78 // If we find the end of this var block,
79 // reset all bools for the next variable.
80 fin_vb_start = false;
81 fin_vb_end = false;
82 chunk_found = false;
83 }
84 else {// Check if having chunks within this var block.
85 if(false == chunk_found) {
86 chunk_found = find_chunk(dmrpp_line);
87 // When finding the chunk info, update the chunk_exist vector.
88 if(true == chunk_found)
89 chunk_exist.push_back(true);
90 }
91 }
92 }
93 else // Continue finding the var block
94 fin_vb_start = find_var(dmrpp_line,var_type_list,var_type,var_name);
95
96 }
97
98 //Sanity check to make sure the chunk_exist vector is the same as var_type vector.
99 //If not, something is wrong with this dmrpp file.
100 if(chunk_exist.size()!=var_type.size()) {
101 cout<<"Number of chunk check is not consistent with the number of var check."<<endl;
102 cout<< "The dmrpp file is "<<fname<<endl;
103 return -1;
104 }
105
106 bool has_missing_info = false;
107 size_t last_missing_chunk_index = 0;
108
109 // Check if there are any missing variable information.
110 if (!var_type.empty()) {
111 auto ritr = var_type.rbegin();
112 size_t i = var_type.size() - 1;
113 while(ritr != var_type.rend()) {
114 if (!chunk_exist[i]) {
115 has_missing_info = true;
116 last_missing_chunk_index = i;
117 break;
118 }
119 ritr++;
120 i--;
121 }
122 }
123
124 // Report the final output.
125 if(true == has_missing_info) {
126
127 ofstream dmrpp_ofstream;
128 string fname2(argv[2]);
129 dmrpp_ofstream.open(fname2.c_str(),ofstream::out);
130
131 size_t i = 0;
132 for (auto vt:var_type) {
133 if(!chunk_exist[i]) {
134 if (i!=last_missing_chunk_index)
135 dmrpp_ofstream<<var_name[i] <<",";
136 else
137 dmrpp_ofstream<<var_name[i];
138 }
139 i++;
140 }
141
142 dmrpp_ofstream.close();
143 }
144
145
146 return 0;
147
148}
149
150// Find the the var type and var name like <Int16 name="foo">
151bool find_var(const string &str, const vector<string>var_type_list,
152 vector<string>&var_type,vector<string>&var_name) {
153
154 bool ret = false;
155 //if(str[0]=='\n' || str[0]!=' '){
156 // Every var block will have spaces before <
157 if(str[0]!=' '){
158 return ret;
159 }
160
161 // Ignore the line with all spaces
162 size_t non_space_char_pos = str.find_first_not_of(' ');
163 if(non_space_char_pos == string::npos){
164 return ret;
165 }
166
167 // The first non-space character should be '<'
168 if(str[non_space_char_pos]!='<') {
169 return ret;
170 }
171
172 // After space, must at least contain '<','>'
173 if(str.size() <= (non_space_char_pos+1)){
174 return ret;
175 }
176
177 // The last character must be '>', maybe this is too strict.
178 // We will see.
179 if(str[str.size()-1]!='>' ) {
180 return ret;
181 }
182
183 // char_2 is a character right after<
184 char char_2 = str[non_space_char_pos+1];
185
186 // The first var character must be one of the list.
187 // The following list includes the first character
188 // of all possible variable types.
189 string v_1char_list = "FIUBSC";
190
191 // If the first character is not one of DAP type,ignore.
192 if(v_1char_list.find_first_of(char_2)==string::npos) {
193 return ret;
194 }
195
196 // Find ' name="' and the position after non_space_char_pos+1, like <Int16 name="d16_1">
197 string sep=" name=\"";
198 size_t sep_pos = str.find(sep,non_space_char_pos+2);
199
200 // Cannot find "name=..", ignore this line.
201 if(sep_pos == string::npos){
202 return ret;
203 }
204
205 // Try to figure out the variable type.
206 size_t var_index = -1;
207 bool found = false;
208 for (size_t i = 0; i<var_type_list.size() && !found ;i++) {
209 if(str.compare(non_space_char_pos+1,sep_pos-non_space_char_pos-1,var_type_list[i]) == 0) {
210 var_index = i;
211 found = true;
212 }
213 }
214
215 // If cannot find the supported type, ignore this line.
216 if(!found) {
217 return ret;
218 }
219
220 // Find the end quote position of the variable name.
221 char end_quote='"';
222 size_t end_name_pos = str.find(end_quote,sep_pos+sep.size()+1);
223 if(end_name_pos == string::npos)
224 ret = false;
225 else {
226 // Find both var type and var name. Store them in the vector
227 string var_name_line = str.substr(sep_pos+sep.size(),end_name_pos-sep_pos-sep.size());
228 var_type.push_back(var_type_list[var_index]);
229 var_name.push_back(var_name_line);
230 ret = true;
231 }
232 return ret;
233}
234
235// Find whether there are chunks inside the var block.
236// Any chunk info(chunk or contiguous) should include
237// "<dmrpp:chunk " and "offset".
238bool find_chunk(const string &str) {
239 bool ret = false;
240 string chunk_mark = "<dmrpp:chunk ";
241 string offset_mark = "offset";
242 size_t chunk_mark_pos = str.find(chunk_mark);
243 if(chunk_mark_pos !=string::npos) {
244 if(string::npos != str.find(offset_mark, chunk_mark_pos+chunk_mark.size()))
245 ret = true;
246 }
247 return ret;
248}
249
250// Find the end of var block such as </Int32>
251// There may be space before </Int32>
252bool find_endvar(const string &str, const string vtype) {
253 bool ret = false;
254 string end_var = "</" + vtype + '>';
255 size_t vb_end_pos = str.find(end_var);
256 if(vb_end_pos !=string::npos) {
257 if((vb_end_pos + end_var.size())==str.size())
258 ret = true;
259 }
260 return ret;
261}
262
263
264
265
266
267
268
269
270
271
272
273