libabigail
abg-symtab-reader.h
Go to the documentation of this file.
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2020-2023 Google, Inc.
5 //
6 // Author: Matthias Maennich
7 
8 /// @file
9 ///
10 /// This contains the declarations for the symtab reader.
11 
12 #ifndef __ABG_SYMTAB_READER_H__
13 #define __ABG_SYMTAB_READER_H__
14 
15 #include <gelf.h>
16 
17 #include <functional>
18 #include <iterator>
19 #include <memory>
20 #include <unordered_map>
21 #include <vector>
22 
23 #include "abg-cxx-compat.h" // for abg_compat::optional
24 #include "abg-ir.h"
25 
26 namespace abigail
27 {
28 namespace symtab_reader
29 {
30 
31 /// The symtab filter is the object passed to the symtab object in order to
32 /// iterate over the symbols in the symtab while applying filters.
33 ///
34 /// The general idea is that it consists of a set of optionally enforced flags,
35 /// such as 'functions' or 'variables'. If not set, those are not filtered for,
36 /// neither inclusive nor exclusive. If set they are all ANDed together.
38 {
39 public:
40  // Default constructor disabling all features.
41  symtab_filter() {}
42 
43  bool
44  matches(const elf_symbol& symbol) const;
45 
46  /// Enable or disable function filtering
47  ///
48  /// @param new_value whether to filter for functions
49  void
50  set_functions(bool new_value = true)
51  {functions_ = new_value;};
52 
53  /// Enable or disable variable filtering
54  ///
55  /// @param new_value whether to filter for variables
56  void
57  set_variables(bool new_value = true)
58  {variables_ = new_value;};
59 
60  /// Enable or disable public symbol filtering
61  ///
62  /// @param new_value whether to filter for public symbols
63  void
64  set_public_symbols(bool new_value = true)
65  {public_symbols_ = new_value;};
66 
67  /// Enable or disable undefined symbol filtering
68  ///
69  /// @param new_value whether to filter for undefined symbols
70  void
71  set_undefined_symbols(bool new_value = true)
72  {undefined_symbols_ = new_value;};
73 
74  /// Enable or disable kernel symbol filtering
75  ///
76  /// @param new_value whether to filter for kernel symbols
77  void
78  set_kernel_symbols(bool new_value = true)
79  {kernel_symbols_ = new_value;};
80 
81 private:
82  // The symbol is a function (FUNC)
83  abg_compat::optional<bool> functions_;
84 
85  // The symbol is a variables (OBJECT)
86  abg_compat::optional<bool> variables_;
87 
88  // The symbol is publicly accessible (global/weak with default/protected
89  // visibility)
90  abg_compat::optional<bool> public_symbols_;
91 
92  // The symbols is not defined (declared)
93  abg_compat::optional<bool> undefined_symbols_;
94 
95  // The symbol is listed in the ksymtab (for Linux Kernel binaries).
96  abg_compat::optional<bool> kernel_symbols_;
97 };
98 
99 /// Base iterator for our custom iterator based on whatever the const_iterator
100 /// is for a vector of symbols.
101 /// As of writing this, std::vector<elf_symbol_sptr>::const_iterator.
102 using base_iterator = elf_symbols::const_iterator;
103 
104 /// An iterator to walk a vector of elf_symbols filtered by symtab_filter.
105 ///
106 /// The implementation inherits all properties from the vector's
107 /// const_iterator, but intercepts where necessary to allow effective
108 /// filtering. This makes it a STL compatible iterator for general purpose
109 /// usage.
111 {
112 public:
113  using value_type = base_iterator::value_type;
114  using reference = base_iterator::reference;
115  using pointer = base_iterator::pointer;
116  using difference_type = base_iterator::difference_type;
117  using iterator_category = std::forward_iterator_tag;
118  using iterator_concept = std::forward_iterator_tag;
119 
120  /// Construct the iterator based on a pair of underlying iterators and a
121  /// symtab_filter object. Immediately fast forward to the next element that
122  /// matches the criteria (if any).
123  ///
124  /// @param begin the underlying begin iterator
125  ///
126  /// @param begin the underlying end iterator
127  ///
128  /// @param filter the symtab_filter to apply
130  base_iterator end,
131  const symtab_filter& filter = symtab_filter())
132  : base_iterator(begin), end_(end), filter_(filter)
133  {skip_to_next();}
134 
135  /// Pre-increment operator to advance to the next matching element.
136  ///
137  /// @return itself after incrementing
140  {
141  base_iterator::operator++();
142  skip_to_next();
143  return *this;
144  }
145 
146  /// Post-increment operator to advance to the next matching element.
147  ///
148  /// @return a copy of the iterator before incrementing
151  {
152  symtab_iterator result(*this);
153  ++(*this);
154  return result;
155  }
156 
157 private:
158  /// The end of the underlying iterator.
159  const base_iterator end_;
160 
161  /// The symtab_filter used to determine when to advance.
162  const symtab_filter& filter_;
163 
164  /// Skip to the next element that matches the filter criteria (if any). Hold
165  /// off when reaching the end of the underlying iterator.
166  void
167  skip_to_next()
168  {
169  while (*this != end_ && !filter_.matches(***this))
170  ++(*this);
171  }
172 };
173 
174 /// Convenience declaration of a unique_ptr<symtab>
175 class symtab;
176 using symtab_ptr = std::unique_ptr<symtab>;
177 
178 /// symtab is the actual data container of the symtab_reader implementation.
179 ///
180 /// The symtab is instantiated either via an Elf handle (from binary) or from a
181 /// set of existing symbol maps (usually when instantiated from XML). It will
182 /// then discover the symtab, possibly the ksymtab (for Linux Kernel binaries)
183 /// and setup the data containers and lookup maps for later perusal.
184 ///
185 /// The symtab is supposed to be used in a const context as all information is
186 /// already computed at construction time. Symbols are stored sorted to allow
187 /// deterministic reading of the entries.
188 ///
189 /// An example use of the symtab class is
190 ///
191 /// const auto symtab = symtab::load(elf_handle, env);
192 /// symtab_filter filter = symtab->make_filter();
193 /// filter.set_public_symbols();
194 /// filter.set_functions();
195 ///
196 /// for (const auto& symbol : filtered_symtab(*symtab, filter))
197 /// {
198 /// std::cout << symbol->get_name() << "\n";
199 /// }
200 ///
201 /// This uses the filtered_symtab proxy object to capture the filter.
202 class symtab
203 {
204 public:
205  using symbol_predicate = std::function<bool(const elf_symbol_sptr&)>;
206 
207  /// Indicate whether any (kernel) symbols have been seen at construction.
208  ///
209  /// @return true if there are symbols detected earlier.
210  bool
211  has_symbols() const
212  {return is_kernel_binary_ ? has_ksymtab_entries_ : !symbols_.empty();}
213 
215  make_filter() const;
216 
217  /// The (only) iterator type we offer is a const_iterator implemented by the
218  /// symtab_iterator.
220 
221  /// Obtain an iterator to the beginning of the symtab according to the filter
222  /// criteria. Whenever this iterator advances, it skips elements that do not
223  /// match the filter criteria.
224  ///
225  /// @param filter the symtab_filter to match symbols against
226  ///
227  /// @return a filtering const_iterator of the underlying type
229  begin(const symtab_filter& filter) const
230  {return symtab_iterator(symbols_.begin(), symbols_.end(), filter);}
231 
232  /// Obtain an iterator to the end of the symtab.
233  ///
234  /// @return an end iterator
236  end() const
237  {return symtab_iterator(symbols_.end(), symbols_.end());}
238 
239  const elf_symbols&
240  lookup_symbol(const std::string& name) const;
241 
242  const elf_symbol_sptr&
243  lookup_symbol(GElf_Addr symbol_addr) const;
244 
245  const elf_symbol_sptr
246  lookup_undefined_function_symbol(const std::string& name);
247 
248  const elf_symbol_sptr
249  lookup_undefined_variable_symbol(const std::string& name);
250 
252  function_symbol_is_exported(const string&);
253 
255  function_symbol_is_exported(const GElf_Addr symbol_address);
256 
258  variable_symbol_is_exported(const string&);
259 
261  variable_symbol_is_exported(const GElf_Addr symbol_address);
262 
264  function_symbol_is_undefined(const string&);
265 
267  variable_symbol_is_undefined(const string&);
268 
269  static symtab_ptr
270  load(Elf* elf_handle,
271  const ir::environment& env,
272  symbol_predicate is_suppressed = NULL);
273 
274  static symtab_ptr
275  load(string_elf_symbols_map_sptr function_symbol_map,
276  string_elf_symbols_map_sptr variables_symbol_map);
277 
278  void
279  update_main_symbol(GElf_Addr addr, const std::string& name);
280 
281 private:
282  /// Default constructor. Private to enforce creation by factory methods.
283  symtab();
284 
285  /// The vector of symbols we discovered.
286  elf_symbols symbols_;
287 
288  /// Whether this is a Linux Kernel binary
289  bool is_kernel_binary_;
290 
291  /// Whether this kernel_binary has ksymtab entries
292  ///
293  /// A kernel module might not have a ksymtab if it does not export any
294  /// symbols. In order to quickly decide whether the symbol table is empty, we
295  /// remember whether we ever saw ksymtab entries.
296  bool has_ksymtab_entries_;
297 
298  /// Lookup map name->symbol(s)
299  using name_symbol_map_type =
300  std::unordered_map<std::string, std::vector<elf_symbol_sptr>>;
301  name_symbol_map_type name_symbol_map_;
302 
303  /// Lookup map addr->symbol
304  using addr_symbol_map_type = std::unordered_map<GElf_Addr, elf_symbol_sptr>;
305  addr_symbol_map_type addr_symbol_map_;
306 
307  /// Lookup map function entry address -> symbol
308  addr_symbol_map_type entry_addr_symbol_map_;
309 
310  /// Set of undefined function symbol names
311  std::unordered_set<std::string> undefined_function_linkage_names_;
312 
313  /// of undefined variable function symbol names
314  std::unordered_set<std::string> undefined_variable_linkage_names_;
315 
316  bool cached_undefined_symbol_names_;
317 
318  bool
319  load_(Elf* elf_handle,
320  const ir::environment& env,
321  symbol_predicate is_suppressed);
322 
323  bool
324  load_(string_elf_symbols_map_sptr function_symbol_map,
325  string_elf_symbols_map_sptr variables_symbol_map);
326 
327  GElf_Addr
328  setup_symbol_lookup_tables(Elf* elf_handle,
329  GElf_Sym* elf_symbol,
330  const elf_symbol_sptr& symbol_sptr);
331 
332  void
333  update_function_entry_address_symbol_map(Elf* elf_handle,
334  GElf_Sym* native_symbol,
335  const elf_symbol_sptr& symbol_sptr);
336 
337  void
338  add_alternative_address_lookups(Elf* elf_handle);
339 
340  void
341  collect_undefined_fns_and_vars_linkage_names();
342 };
343 
344 /// Helper class to allow range-for loops on symtabs for C++11 and later code.
345 /// It serves as a proxy for the symtab iterator and provides a begin() method
346 /// without arguments, as required for range-for loops (and possibly other
347 /// iterator based transformations).
348 ///
349 /// Example usage:
350 ///
351 /// for (const auto& symbol : filtered_symtab(tab, filter))
352 /// {
353 /// std::cout << symbol->get_name() << "\n";
354 /// }
355 ///
357 {
358  const symtab& tab_;
359  const symtab_filter filter_;
360 
361 public:
362  /// Construct the proxy object keeping references to the underlying symtab
363  /// and the filter object.
364  filtered_symtab(const symtab& tab, const symtab_filter& filter)
365  : tab_(tab), filter_(filter)
366  {}
367 
368  /// Pass through symtab.begin(), but also pass on the filter.
370  begin() const
371  {return tab_.begin(filter_);}
372 
373  /// Pass through symtab.end().
375  end() const
376  {return tab_.end();}
377 };
378 
379 } // end namespace symtab_reader
380 } // end namespace abigail
381 
382 #endif // __ABG_SYMTAB_READER_H__
symtab_iterator operator++(int)
Post-increment operator to advance to the next matching element.
symtab::const_iterator end() const
Pass through symtab.end().
elf_symbol_sptr variable_symbol_is_exported(const string &)
Test if a given variable symbol has been exported.
void set_functions(bool new_value=true)
Enable or disable function filtering.
Helper class to allow range-for loops on symtabs for C++11 and later code. It serves as a proxy for t...
const elf_symbol_sptr lookup_undefined_variable_symbol(const std::string &name)
Lookup an undefined variable symbol with a given name.
void set_undefined_symbols(bool new_value=true)
Enable or disable undefined symbol filtering.
An iterator to walk a vector of elf_symbols filtered by symtab_filter.
bool has_symbols() const
Indicate whether any (kernel) symbols have been seen at construction.
symtab_iterator(base_iterator begin, base_iterator end, const symtab_filter &filter=symtab_filter())
Construct the iterator based on a pair of underlying iterators and a symtab_filter object...
symtab is the actual data container of the symtab_reader implementation.
filtered_symtab(const symtab &tab, const symtab_filter &filter)
Construct the proxy object keeping references to the underlying symtab and the filter object...
symtab_iterator & operator++()
Pre-increment operator to advance to the next matching element.
std::vector< elf_symbol_sptr > elf_symbols
Convenience typedef for a vector of elf_symbol.
Definition: abg-ir.h:904
elf_symbol_sptr variable_symbol_is_undefined(const string &)
Test if a name is a the name of an undefined variable symbol.
symtab_filter make_filter() const
symtab implementations
Toplevel namespace for libabigail.
Types of the main internal representation of libabigail.
Abstraction of an elf symbol.
Definition: abg-ir.h:922
This is an abstraction of the set of resources necessary to manage several aspects of the internal re...
Definition: abg-ir.h:139
shared_ptr< elf_symbol > elf_symbol_sptr
A convenience typedef for a shared pointer to elf_symbol.
Definition: abg-ir.h:886
bool matches(const elf_symbol &symbol) const
symtab_filter implementations
symtab::const_iterator begin() const
Pass through symtab.begin(), but also pass on the filter.
void update_main_symbol(GElf_Addr addr, const std::string &name)
Notify the symtab about the name of the main symbol at a given address.
void set_public_symbols(bool new_value=true)
Enable or disable public symbol filtering.
const elf_symbol_sptr lookup_undefined_function_symbol(const std::string &name)
Lookup an undefined function symbol with a given name.
void set_variables(bool new_value=true)
Enable or disable variable filtering.
const elf_symbols & lookup_symbol(const std::string &name) const
Get a vector of symbols that are associated with a certain name.
elf_symbol_sptr function_symbol_is_exported(const string &)
Test if a given function symbol has been exported.
static symtab_ptr load(Elf *elf_handle, const ir::environment &env, symbol_predicate is_suppressed=NULL)
Construct a symtab object and instantiate it from an ELF handle. Also pass in the ir::environment we ...
symtab_iterator const_iterator
The (only) iterator type we offer is a const_iterator implemented by the symtab_iterator.
void set_kernel_symbols(bool new_value=true)
Enable or disable kernel symbol filtering.
The symtab filter is the object passed to the symtab object in order to iterate over the symbols in t...
shared_ptr< string_elf_symbols_map_type > string_elf_symbols_map_sptr
Convenience typedef for a shared pointer to string_elf_symbols_map_type.
Definition: abg-ir.h:913
const_iterator begin(const symtab_filter &filter) const
Obtain an iterator to the beginning of the symtab according to the filter criteria. Whenever this iterator advances, it skips elements that do not match the filter criteria.
const_iterator end() const
Obtain an iterator to the end of the symtab.
elf_symbol_sptr function_symbol_is_undefined(const string &)
Test if a name is a the name of an undefined function symbol.