protozero
Minimalistic protocol buffer decoder and encoder in C++.
pbf_reader.hpp
Go to the documentation of this file.
1 #ifndef PROTOZERO_PBF_READER_HPP
2 #define PROTOZERO_PBF_READER_HPP
3 
4 /*****************************************************************************
5 
6 protozero - Minimalistic protocol buffer decoder and encoder in C++.
7 
8 This file is from https://github.com/mapbox/protozero where you can find more
9 documentation.
10 
11 *****************************************************************************/
12 
19 #include <cstddef>
20 #include <cstdint>
21 #include <string>
22 #include <utility>
23 
24 #include <protozero/config.hpp>
25 #include <protozero/exception.hpp>
26 #include <protozero/iterators.hpp>
27 #include <protozero/types.hpp>
28 #include <protozero/varint.hpp>
29 
30 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
31 # include <protozero/byteswap.hpp>
32 #endif
33 
34 namespace protozero {
35 
60 class pbf_reader {
61 
62  // A pointer to the next unread data.
63  const char* m_data = nullptr;
64 
65  // A pointer to one past the end of data.
66  const char* m_end = nullptr;
67 
68  // The wire type of the current field.
69  pbf_wire_type m_wire_type = pbf_wire_type::unknown;
70 
71  // The tag of the current field.
72  pbf_tag_type m_tag = 0;
73 
74  template <typename T>
75  T get_fixed() {
76  T result;
77  skip_bytes(sizeof(T));
78  detail::copy_or_byteswap<sizeof(T)>(m_data - sizeof(T), &result);
79  return result;
80  }
81 
82  template <typename T>
84  protozero_assert(tag() != 0 && "call next() before accessing field value");
85  const auto len = get_len_and_skip();
86  protozero_assert(len % sizeof(T) == 0);
87  return create_fixed_iterator_range<T>(m_data - len, m_data);
88  }
89 
90  template <typename T>
91  T get_varint() {
92  return static_cast<T>(decode_varint(&m_data, m_end));
93  }
94 
95  template <typename T>
96  T get_svarint() {
97  protozero_assert((has_wire_type(pbf_wire_type::varint) || has_wire_type(pbf_wire_type::length_delimited)) && "not a varint");
98  return static_cast<T>(decode_zigzag64(decode_varint(&m_data, m_end)));
99  }
100 
101  pbf_length_type get_length() {
102  return get_varint<pbf_length_type>();
103  }
104 
105  void skip_bytes(pbf_length_type len) {
106  if (m_data + len > m_end) {
107  throw end_of_buffer_exception();
108  }
109  m_data += len;
110 
111  // In debug builds reset the tag to zero so that we can detect (some)
112  // wrong code.
113 #ifndef NDEBUG
114  m_tag = 0;
115 #endif
116  }
117 
118  pbf_length_type get_len_and_skip() {
119  const auto len = get_length();
120  skip_bytes(len);
121  return len;
122  }
123 
124  template <typename T>
125  iterator_range<T> get_packed() {
126  protozero_assert(tag() != 0 && "call next() before accessing field value");
127  const auto len = get_len_and_skip();
128  return iterator_range<T>{T{m_data - len, m_data},
129  T{m_data, m_data}};
130  }
131 
132 public:
133 
144  explicit pbf_reader(const data_view& view) noexcept
145  : m_data(view.data()),
146  m_end(view.data() + view.size()),
147  m_wire_type(pbf_wire_type::unknown),
148  m_tag(0) {
149  }
150 
160  pbf_reader(const char* data, std::size_t length) noexcept
161  : m_data(data),
162  m_end(data + length),
163  m_wire_type(pbf_wire_type::unknown),
164  m_tag(0) {
165  }
166 
176  pbf_reader(std::pair<const char*, std::size_t> data) noexcept
177  : m_data(data.first),
178  m_end(data.first + data.second),
179  m_wire_type(pbf_wire_type::unknown),
180  m_tag(0) {
181  }
182 
193  pbf_reader(const std::string& data) noexcept
194  : m_data(data.data()),
195  m_end(data.data() + data.size()),
196  m_wire_type(pbf_wire_type::unknown),
197  m_tag(0) {
198  }
199 
204  pbf_reader() noexcept = default;
205 
207  pbf_reader(const pbf_reader&) noexcept = default;
208 
210  pbf_reader(pbf_reader&&) noexcept = default;
211 
213  pbf_reader& operator=(const pbf_reader& other) noexcept = default;
214 
216  pbf_reader& operator=(pbf_reader&& other) noexcept = default;
217 
218  ~pbf_reader() = default;
219 
225  void swap(pbf_reader& other) noexcept {
226  using std::swap;
227  swap(m_data, other.m_data);
228  swap(m_end, other.m_end);
229  swap(m_wire_type, other.m_wire_type);
230  swap(m_tag, other.m_tag);
231  }
232 
237  operator bool() const noexcept {
238  return m_data < m_end;
239  }
240 
250  std::size_t length() const noexcept {
251  return std::size_t(m_end - m_data);
252  }
253 
269  bool next() {
270  if (m_data == m_end) {
271  return false;
272  }
273 
274  const auto value = get_varint<uint32_t>();
275  m_tag = pbf_tag_type(value >> 3);
276 
277  // tags 0 and 19000 to 19999 are not allowed as per
278  // https://developers.google.com/protocol-buffers/docs/proto
279  protozero_assert(((m_tag > 0 && m_tag < 19000) || (m_tag > 19999 && m_tag <= ((1 << 29) - 1))) && "tag out of range");
280 
281  m_wire_type = pbf_wire_type(value & 0x07);
282  switch (m_wire_type) {
283  case pbf_wire_type::varint:
284  case pbf_wire_type::fixed64:
285  case pbf_wire_type::length_delimited:
286  case pbf_wire_type::fixed32:
287  break;
288  default:
290  }
291 
292  return true;
293  }
294 
321  while (next()) {
322  if (m_tag == tag) {
323  return true;
324  } else {
325  skip();
326  }
327  }
328  return false;
329  }
330 
340  pbf_tag_type tag() const noexcept {
341  return m_tag;
342  }
343 
359  pbf_wire_type wire_type() const noexcept {
360  return m_wire_type;
361  }
362 
369  bool has_wire_type(pbf_wire_type type) const noexcept {
370  return wire_type() == type;
371  }
372 
379  void skip() {
380  protozero_assert(tag() != 0 && "call next() before calling skip()");
381  switch (wire_type()) {
382  case pbf_wire_type::varint:
383  skip_varint(&m_data, m_end);
384  break;
385  case pbf_wire_type::fixed64:
386  skip_bytes(8);
387  break;
388  case pbf_wire_type::length_delimited:
389  skip_bytes(get_length());
390  break;
391  case pbf_wire_type::fixed32:
392  skip_bytes(4);
393  break;
394  default:
395  protozero_assert(false && "can not be here because next() should have thrown already");
396  }
397  }
398 
400 
411  bool get_bool() {
412  protozero_assert(tag() != 0 && "call next() before accessing field value");
413  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
414  protozero_assert((*m_data & 0x80) == 0 && "not a 1 byte varint");
415  skip_bytes(1);
416  return m_data[-1] != 0; // -1 okay because we incremented m_data the line before
417  }
418 
426  int32_t get_enum() {
427  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
428  return get_varint<int32_t>();
429  }
430 
438  int32_t get_int32() {
439  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
440  return get_varint<int32_t>();
441  }
442 
450  int32_t get_sint32() {
451  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
452  return get_svarint<int32_t>();
453  }
454 
462  uint32_t get_uint32() {
463  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
464  return get_varint<uint32_t>();
465  }
466 
474  int64_t get_int64() {
475  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
476  return get_varint<int64_t>();
477  }
478 
486  int64_t get_sint64() {
487  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
488  return get_svarint<int64_t>();
489  }
490 
498  uint64_t get_uint64() {
499  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
500  return get_varint<uint64_t>();
501  }
502 
510  uint32_t get_fixed32() {
511  protozero_assert(tag() != 0 && "call next() before accessing field value");
512  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
513  return get_fixed<uint32_t>();
514  }
515 
523  int32_t get_sfixed32() {
524  protozero_assert(tag() != 0 && "call next() before accessing field value");
525  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
526  return get_fixed<int32_t>();
527  }
528 
536  uint64_t get_fixed64() {
537  protozero_assert(tag() != 0 && "call next() before accessing field value");
538  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
539  return get_fixed<uint64_t>();
540  }
541 
549  int64_t get_sfixed64() {
550  protozero_assert(tag() != 0 && "call next() before accessing field value");
551  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
552  return get_fixed<int64_t>();
553  }
554 
562  float get_float() {
563  protozero_assert(tag() != 0 && "call next() before accessing field value");
564  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
565  return get_fixed<float>();
566  }
567 
575  double get_double() {
576  protozero_assert(tag() != 0 && "call next() before accessing field value");
577  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
578  return get_fixed<double>();
579  }
580 
591  protozero_assert(tag() != 0 && "call next() before accessing field value");
592  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
593  const auto len = get_len_and_skip();
594  return data_view{m_data-len, len};
595  }
596 
597 #ifndef PROTOZERO_STRICT_API
598 
606  std::pair<const char*, pbf_length_type> get_data() {
607  protozero_assert(tag() != 0 && "call next() before accessing field value");
608  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
609  const auto len = get_len_and_skip();
610  return std::make_pair(m_data-len, len);
611  }
612 #endif
613 
621  std::string get_bytes() {
622  return std::string(get_view());
623  }
624 
632  std::string get_string() {
633  return std::string(get_view());
634  }
635 
644  return pbf_reader(get_view());
645  }
646 
648 
651 
654 
657 
660 
663 
666 
669 
672 
674 
688  return get_packed<pbf_reader::const_bool_iterator>();
689  }
690 
701  return get_packed<pbf_reader::const_enum_iterator>();
702  }
703 
714  return get_packed<pbf_reader::const_int32_iterator>();
715  }
716 
727  return get_packed<pbf_reader::const_sint32_iterator>();
728  }
729 
740  return get_packed<pbf_reader::const_uint32_iterator>();
741  }
742 
753  return get_packed<pbf_reader::const_int64_iterator>();
754  }
755 
766  return get_packed<pbf_reader::const_sint64_iterator>();
767  }
768 
779  return get_packed<pbf_reader::const_uint64_iterator>();
780  }
781 
791  auto get_packed_fixed32() -> decltype(packed_fixed<uint32_t>()) {
792  return packed_fixed<uint32_t>();
793  }
794 
804  auto get_packed_sfixed32() -> decltype(packed_fixed<int32_t>()) {
805  return packed_fixed<int32_t>();
806  }
807 
817  auto get_packed_fixed64() -> decltype(packed_fixed<uint64_t>()) {
818  return packed_fixed<uint64_t>();
819  }
820 
830  auto get_packed_sfixed64() -> decltype(packed_fixed<int64_t>()) {
831  return packed_fixed<int64_t>();
832  }
833 
843  auto get_packed_float() -> decltype(packed_fixed<float>()) {
844  return packed_fixed<float>();
845  }
846 
856  auto get_packed_double() -> decltype(packed_fixed<double>()) {
857  return packed_fixed<double>();
858  }
859 
861 
862 }; // class pbf_reader
863 
870 inline void swap(pbf_reader& lhs, pbf_reader& rhs) noexcept {
871  lhs.swap(rhs);
872 }
873 
874 } // end namespace protozero
875 
876 #endif // PROTOZERO_PBF_READER_HPP
pbf_reader(const data_view &view) noexcept
Definition: pbf_reader.hpp:144
int64_t get_sfixed64()
Definition: pbf_reader.hpp:549
uint32_t get_uint32()
Definition: pbf_reader.hpp:462
uint64_t get_fixed64()
Definition: pbf_reader.hpp:536
int32_t get_sfixed32()
Definition: pbf_reader.hpp:523
Definition: exception.hpp:48
uint64_t get_uint64()
Definition: pbf_reader.hpp:498
auto get_packed_double() -> decltype(packed_fixed< double >())
Definition: pbf_reader.hpp:856
pbf_reader(const char *data, std::size_t length) noexcept
Definition: pbf_reader.hpp:160
int32_t get_int32()
Definition: pbf_reader.hpp:438
pbf_reader(const std::string &data) noexcept
Definition: pbf_reader.hpp:193
bool next(pbf_tag_type tag)
Definition: pbf_reader.hpp:320
iterator_range< pbf_reader::const_sint64_iterator > get_packed_sint64()
Definition: pbf_reader.hpp:765
auto get_packed_float() -> decltype(packed_fixed< float >())
Definition: pbf_reader.hpp:843
auto get_packed_fixed32() -> decltype(packed_fixed< uint32_t >())
Definition: pbf_reader.hpp:791
void swap(pbf_reader &lhs, pbf_reader &rhs) noexcept
Definition: pbf_reader.hpp:870
Contains macro checks for different configurations.
Contains the declaration of low-level types used in the pbf format.
bool has_wire_type(pbf_wire_type type) const noexcept
Definition: pbf_reader.hpp:369
std::size_t length() const noexcept
Definition: pbf_reader.hpp:250
void skip()
Definition: pbf_reader.hpp:379
iterator_range< pbf_reader::const_int32_iterator > get_packed_int32()
Definition: pbf_reader.hpp:713
pbf_reader get_message()
Definition: pbf_reader.hpp:643
pbf_reader() noexcept=default
iterator_range< pbf_reader::const_int64_iterator > get_packed_int64()
Definition: pbf_reader.hpp:752
void skip_varint(const char **data, const char *end)
Definition: varint.hpp:112
auto get_packed_sfixed64() -> decltype(packed_fixed< int64_t >())
Definition: pbf_reader.hpp:830
Contains the iterators for access to packed repeated fields.
auto get_packed_sfixed32() -> decltype(packed_fixed< int32_t >())
Definition: pbf_reader.hpp:804
pbf_wire_type
Definition: types.hpp:39
iterator_range< pbf_reader::const_uint64_iterator > get_packed_uint64()
Definition: pbf_reader.hpp:778
iterator_range< pbf_reader::const_sint32_iterator > get_packed_sint32()
Definition: pbf_reader.hpp:726
pbf_wire_type wire_type() const noexcept
Definition: pbf_reader.hpp:359
pbf_reader(std::pair< const char *, std::size_t > data) noexcept
Definition: pbf_reader.hpp:176
void swap(pbf_reader &other) noexcept
Definition: pbf_reader.hpp:225
int64_t get_sint64()
Definition: pbf_reader.hpp:486
std::pair< const char *, pbf_length_type > get_data()
Definition: pbf_reader.hpp:606
auto get_packed_fixed64() -> decltype(packed_fixed< uint64_t >())
Definition: pbf_reader.hpp:817
iterator_range< pbf_reader::const_bool_iterator > get_packed_bool()
Definition: pbf_reader.hpp:687
Contains functions to swap bytes in values (for different endianness).
int32_t get_sint32()
Definition: pbf_reader.hpp:450
std::string get_bytes()
Definition: pbf_reader.hpp:621
double get_double()
Definition: pbf_reader.hpp:575
pbf_reader & operator=(const pbf_reader &other) noexcept=default
pbf_reader messages can be copied trivially.
bool get_bool()
Definition: pbf_reader.hpp:411
std::string get_string()
Definition: pbf_reader.hpp:632
uint32_t pbf_length_type
Definition: types.hpp:51
Contains the exceptions used in the protozero library.
data_view get_view()
Definition: pbf_reader.hpp:590
uint32_t pbf_tag_type
Definition: types.hpp:32
uint32_t get_fixed32()
Definition: pbf_reader.hpp:510
iterator_range< pbf_reader::const_enum_iterator > get_packed_enum()
Definition: pbf_reader.hpp:700
Definition: types.hpp:63
pbf_tag_type tag() const noexcept
Definition: pbf_reader.hpp:340
Definition: iterators.hpp:327
Definition: iterators.hpp:260
int32_t get_enum()
Definition: pbf_reader.hpp:426
Definition: pbf_reader.hpp:60
Definition: iterators.hpp:53
float get_float()
Definition: pbf_reader.hpp:562
Definition: exception.hpp:61
Contains low-level varint and zigzag encoding and decoding functions.
uint64_t decode_varint(const char **data, const char *end)
Definition: varint.hpp:89
bool next()
Definition: pbf_reader.hpp:269
int64_t get_int64()
Definition: pbf_reader.hpp:474
iterator_range< pbf_reader::const_uint32_iterator > get_packed_uint32()
Definition: pbf_reader.hpp:739
int64_t decode_zigzag64(uint64_t value) noexcept
Definition: varint.hpp:181
All parts of the protozero header-only library are in this namespace.
Definition: byteswap.hpp:24