libwreport  3.40
input.h
1 #ifndef WREPORT_BUFR_INPUT_H
2 #define WREPORT_BUFR_INPUT_H
3 
4 #include <functional>
5 #include <string>
6 #include <wreport/bulletin.h>
7 #include <wreport/error.h>
8 #include <wreport/var.h>
9 
10 namespace wreport {
11 struct Bulletin;
12 
13 namespace bulletin {
14 struct AssociatedField;
15 }
16 
17 namespace bufr {
18 
20 {
21  Bulletin& out;
22  unsigned subset_count;
23  DispatchToSubsets(Bulletin& out, unsigned subset_count)
24  : out(out), subset_count(subset_count)
25  {
26  }
27 
28  void add_missing(Varinfo info)
29  {
30  for (unsigned i = 0; i < subset_count; ++i)
31  out.subsets[i].store_variable_undef(info);
32  }
33  void add_same(const Var& var)
34  {
35  for (unsigned i = 0; i < subset_count; ++i)
36  out.subsets[i].store_variable(Var(var));
37  }
38  void add_var(unsigned subset, Var&& var)
39  {
40  out.subsets[subset].store_variable(var);
41  }
42 };
43 
47 class Input
48 {
49 protected:
54  void scan_section_length(unsigned sec_no);
55 
56 public:
58  const uint8_t* data;
59 
61  size_t data_len;
62 
70  const char* fname = nullptr;
71 
79  size_t start_offset = 0;
80 
82  unsigned s4_cursor = 0;
83 
85  uint8_t pbyte = 0;
86 
88  int pbyte_len = 0;
89 
91  unsigned sec[6];
92 
99  explicit Input(const std::string& in);
100 
108  void scan_lead_sections();
109 
121  void scan_other_sections(bool has_optional);
122 
124  unsigned offset() const { return s4_cursor; }
125 
127  unsigned bits_left() const
128  {
129  return static_cast<unsigned>((data_len - s4_cursor) * 8 + pbyte_len);
130  }
131 
133  inline unsigned read_byte(unsigned pos) const
134  {
135  return (unsigned)data[pos];
136  }
137 
139  inline unsigned read_byte(unsigned section, unsigned pos) const
140  {
141  return (unsigned)data[sec[section] + pos];
142  }
143 
145  unsigned read_number(unsigned pos, unsigned byte_len) const
146  {
147  unsigned res = 0;
148  for (unsigned i = 0; i < byte_len; ++i)
149  {
150  res <<= 8;
151  res |= data[pos + i];
152  }
153  return res;
154  }
155 
160  inline unsigned read_number(unsigned section, unsigned pos,
161  unsigned byte_len) const
162  {
163  return read_number(sec[section] + pos, byte_len);
164  }
165 
170  uint32_t get_bits(unsigned n)
171  {
172  uint32_t result = 0;
173 
174  if (s4_cursor == data_len)
175  parse_error(
176  "end of buffer while looking for %u bits of bit-packed data",
177  n);
178 
179  // TODO: review and benchmark and possibly simplify
180  // (a possible alternative approach is to keep a current bitmask that
181  // starts at 0x80 and is shifted right by 1 at each read until it
182  // reaches 0, and get rid of pbyte_len)
183  for (unsigned i = 0; i < n; i++)
184  {
185  if (pbyte_len == 0)
186  {
187  pbyte_len = 8;
188  pbyte = data[s4_cursor++];
189  }
190  result <<= 1;
191  if (pbyte & 0x80)
192  result |= 1;
193  pbyte <<= 1;
194  pbyte_len--;
195  }
196 
197  return result;
198  }
199 
203  void skip_bits(unsigned n)
204  {
205  if (s4_cursor == data_len)
206  parse_error(
207  "end of buffer while looking for %u bits of bit-packed data",
208  n);
209 
210  for (unsigned i = 0; i < n; i++)
211  {
212  if (pbyte_len == 0)
213  {
214  pbyte_len = 8;
215  pbyte = data[s4_cursor++];
216  }
217  pbyte <<= 1;
218  pbyte_len--;
219  }
220  }
221 
223  void debug_dump_next_bits(const char* desc, unsigned count,
224  const std::vector<unsigned>& groups = {}) const;
225 
230  void debug_find_sequence(const char* pattern) const;
231 
233  void parse_error(const char* fmt, ...) const WREPORT_THROWF_ATTRS(2, 3);
234 
236  void parse_error(unsigned pos, const char* fmt, ...) const
237  WREPORT_THROWF_ATTRS(3, 4);
238 
241  void parse_error(unsigned section, unsigned pos, const char* fmt, ...) const
242  WREPORT_THROWF_ATTRS(4, 5);
243 
256  void check_available_data(unsigned pos, size_t datalen,
257  const char* expected);
258 
273  void check_available_message_data(unsigned section, unsigned pos,
274  size_t datalen, const char* expected);
275 
290  void check_available_section_data(unsigned section, unsigned pos,
291  size_t datalen, const char* expected);
292 
305  void decode_compressed_number(Var& dest, uint32_t base, unsigned diffbits);
306 
315  void decode_number(Var& dest);
316 
320  bool decode_compressed_base(Varinfo info, uint32_t& base,
321  uint32_t& diffbits);
322 
327  void decode_compressed_number(Varinfo info, unsigned subsets,
328  std::function<void(unsigned, Var&&)> dest);
329 
330  void decode_string(Varinfo info, unsigned subsets, DispatchToSubsets& dest);
331 
332  void decode_compressed_number(Varinfo info, unsigned subsets,
333  DispatchToSubsets& dest);
334 
340  const bulletin::AssociatedField& afield,
341  unsigned subsets,
342  std::function<void(unsigned, Var&&)> dest);
343 
355  void decode_compressed_semantic_number(Var& dest, unsigned subsets);
356 
373  bool decode_string(unsigned bit_len, char* str, size_t& len);
374 
386  void decode_string(Var& dest);
387 
399  void decode_string(Var& dest, unsigned subsets);
400 
405  void decode_string(Varinfo info, unsigned subsets,
406  std::function<void(unsigned, Var&&)> dest);
407 
419  void decode_binary(Var& dest);
420 
428  std::string decode_uncompressed_bitmap(unsigned size);
429 
443  std::string decode_compressed_bitmap(unsigned size);
444 };
445 
446 } // namespace bufr
447 } // namespace wreport
448 #endif
void check_available_section_data(unsigned section, unsigned pos, size_t datalen, const char *expected)
Check that the given section in the input buffer contains at least datalen characters after offset po...
void decode_number(Var &dest)
Decode a number as described by dest.info(), and set it as value for dest.
const char * fname
Input file name (optional).
Definition: input.h:70
void scan_lead_sections()
Scan the message filling in the sec[] array of start offsets of sections 0 and 1. ...
unsigned s4_cursor
Offset of the byte we are currently decoding.
Definition: input.h:82
unsigned read_number(unsigned pos, unsigned byte_len) const
Read a big endian integer value byte_len bytes long, at offset pos.
Definition: input.h:145
void skip_bits(unsigned n)
Skip the next n bits.
Definition: input.h:203
A physical variable.
Definition: var.h:24
unsigned bits_left() const
Return the number of bits left in the message to be decoded.
Definition: input.h:127
STL namespace.
bool decode_compressed_base(Varinfo info, uint32_t &base, uint32_t &diffbits)
Decode the base value for a variable in a compressed BUFR.
int pbyte_len
Bits left in pbyte to decode.
Definition: input.h:88
void void void void check_available_data(unsigned pos, size_t datalen, const char *expected)
Check that the input buffer contains at least datalen characters after offset pos; throw error_parse ...
unsigned sec[6]
Offsets of the start of BUFR sections.
Definition: input.h:91
uint32_t get_bits(unsigned n)
Get the integer value of the next &#39;n&#39; bits from the decode input n must be <= 32. ...
Definition: input.h:170
std::string decode_uncompressed_bitmap(unsigned size)
Decode an uncompressed bitmap of size bits.
Storage for the decoded data of a BUFR or CREX message.
Definition: bulletin.h:29
wreport exceptions.
std::string decode_compressed_bitmap(unsigned size)
Decode a "compressed" bitmap of size bits.
void check_available_message_data(unsigned section, unsigned pos, size_t datalen, const char *expected)
Check that the input buffer contains at least datalen characters after offset pos in section section;...
void scan_other_sections(bool has_optional)
Scan the message filling in the sec[] array of section start offsets of all sections from 2 on...
unsigned offset() const
Return the current decoding byte offset.
Definition: input.h:124
std::vector< Subset > subsets
Decoded variables.
Definition: bulletin.h:124
Definition: input.h:19
size_t data_len
Input buffer size.
Definition: input.h:61
Information about a variable.
Definition: varinfo.h:139
void debug_find_sequence(const char *pattern) const
Match the given pattern as regexp on the still unread input bitstream, with bits converted to a strin...
Binary buffer with bit-level read operations.
Definition: input.h:47
void decode_compressed_semantic_number(Var &dest, unsigned subsets)
Decode a number as described by dest.info(), and set it as value for dest.
unsigned read_byte(unsigned section, unsigned pos) const
Read a byte value at offset pos inside section section.
Definition: input.h:139
void decode_compressed_number_af(Varinfo info, const bulletin::AssociatedField &afield, unsigned subsets, std::function< void(unsigned, Var &&)> dest)
Decode a number as described by info from a compressed bufr with subsets subsets, and send the result...
void scan_section_length(unsigned sec_no)
Scan length of section sec_no, filling in the start of the next section in sec[sec_no + 1]...
unsigned read_byte(unsigned pos) const
Read a byte value at offset pos.
Definition: input.h:133
void decode_compressed_number(Var &dest, uint32_t base, unsigned diffbits)
Decode a compressed number as described by dest.info(), ad set it as value for dest.
String functions.
Definition: benchmark.h:13
const uint8_t * data
Input buffer.
Definition: input.h:58
Definition: associated_fields.h:12
size_t start_offset
File offset of the start of the message.
Definition: input.h:79
Input(const std::string &in)
Wrap a string iinto a Input.
#define WREPORT_THROWF_ATTRS(a, b)
Tell the compiler that a function always throws and expects printf-style arguments.
Definition: error.h:56
void decode_binary(Var &dest)
Decode a generic binary value as-is, as described by dest.info(), ad set it as value for dest...
unsigned read_number(unsigned section, unsigned pos, unsigned byte_len) const
Read a big endian integer value byte_len bytes long, at offset pos inside section section...
Definition: input.h:160
void parse_error(const char *fmt,...) const WREPORT_THROWF_ATTRS(2
Throw an error_parse at the current decoding location.
void debug_dump_next_bits(const char *desc, unsigned count, const std::vector< unsigned > &groups={}) const
Dump to stderr &#39;count&#39; bits of &#39;buf&#39;, starting at the &#39;ofs-th&#39; bit.
uint8_t pbyte
Byte we are currently decoding.
Definition: input.h:85