libabigail
abg-diff-utils.cc
Go to the documentation of this file.
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2013-2023 Red Hat, Inc.
5 
6 #include <cstring>
7 
8 #include "abg-fwd.h"
9 #include "abg-internal.h"
10 // <headers defining libabigail's API go under here>
11 ABG_BEGIN_EXPORT_DECLARATIONS
12 
13 #include "abg-diff-utils.h"
14 
16 // </headers defining libabigail's API>
17 
18 /// @file
19 ///
20 /// This file defines the declarations found in abg-diff-utils.h
21 namespace abigail
22 {
23 
24 namespace diff_utils
25 {
26 /// @return true iff the end of a furthest forward d_path overlaps the
27 /// end of a furtherst reverse d_path on the same diagonal. This is
28 /// defined by the lemma 3 of the paper.
29 ///
30 /// @param forward_d_path_end
31 bool
32 ends_of_furthest_d_paths_overlap(const point& forward_d_path_end,
33  const point& reverse_d_path_end)
34 {
35  return ((forward_d_path_end.x() - forward_d_path_end.y())
36  == (reverse_d_path_end.x() - reverse_d_path_end.y())
37  && forward_d_path_end.x() >= reverse_d_path_end.x());
38 }
39 //// Test if a point is within the boundaries of the edit graph.
40 ///
41 /// @param p the point to test.
42 ///
43 /// @param a_size the size of abscissas. That is, the size of the
44 /// first input to consider.
45 ///
46 /// @param b_size the of ordinates. That is, the size of the second
47 /// input to consider.
48 ///
49 /// @return true iff a point is within the boundaries of the edit
50 /// graph.
51 bool
53  unsigned a_size,
54  unsigned b_size)
55 {
56  return (p.x() > -1
57  && p.x() < (int) a_size
58  && p.y() > -1
59  && p.y() < (int) b_size);
60 }
61 
62 /// Get the end points of the snake, as intended by the paper.
63 ///
64 /// @param s the snake to consider.
65 ///
66 /// @param x the starting point of the snake.
67 ///
68 /// @param u the end point of the snake.
69 bool
70 snake_end_points(const snake& s, point& x, point& u)
71 {
72  if (s.is_empty())
73  return false;
74 
75  if (s.is_forward())
76  {
77  x = s.intermediate();
78  u = s.end();
79  }
80  else
81  {
82  x = s.end();
83  u = s.intermediate();
84  }
85  return true;
86 }
87 
88 /// Returns the middle snake of two strings, as well as the length of
89 /// their shortest editing script.
90 ///
91 /// @param str1 the first string to consider.
92 ///
93 /// @param str2 the second string to consider.
94 ///
95 /// @param snake_begin the point representing the beginning
96 bool
97 compute_middle_snake(const char* str1, const char* str2,
98  snake& s, int& ses_len)
99 {
100  bool has_snake = false;
101  int str1_size = strlen(str1), str2_size = strlen(str2);
102 
103  if (compute_middle_snake<const char*,
104  default_eq_functor>(str1, str1 + str1_size,
105  str2 , str2 + str2_size,
106  s, ses_len))
107  has_snake = true;
108 
109  return has_snake;
110 }
111 
112 /// Compute the length of the shortest edit script for two strings.
113 /// This is done using the "Greedy LCS/SES" of figure 2 in the paper.
114 /// It can walk the edit graph either foward (when reverse is false)
115 /// or backward starting from the end (when reverse is true).
116 ///
117 /// @param str1 the first string to consider.
118 ///
119 /// @param str2 the second string to consider.
120 ///
121 /// @param reverse when true, walk the edit graph backward, starting
122 /// from the point (M,N) (with M and N being the length of str1 and
123 /// str2). When false, walk the edit graph forward, starting from the
124 /// point (0,0).
125 ///
126 /// @return the length of the shortest edit script.
127 int
128 ses_len(const char* str1,
129  const char* str2,
130  bool reverse)
131 {
132  int str1_size = strlen(str1), str2_size = strlen(str2);
133 
134  d_path_vec v(str1_size, str2_size);
135  return ses_len(str1, str1 + str1_size,
136  str2, str2 + str2_size,
137  v, reverse);
138 }
139 
140 /// Compute the longest common subsequence of two strings and return
141 /// the length of the shortest edit script for transforming the first
142 /// string into the second.
143 ///
144 /// @param str1 the first string to consider.
145 ///
146 /// @param str2 the second string to consider.
147 ///
148 /// @param ses_len the length of the shortest edit script. This is
149 /// set by the function iff it returns true.
150 ///
151 /// @param the longest common subsequence of the two strings. This is
152 /// set the function iff it returns true.
153 ///
154 /// @return true if the function could compute an lcs, false
155 /// otherwise.
156 void
157 compute_lcs(const char* str1, const char* str2, int &ses_len, string& lcs)
158 {
159  vector<point> result;
160  edit_script ses;
161 
162  compute_diff(str1, str1 + strlen(str1),
163  str2, str2 + strlen(str2),
164  result, ses);
165 
166  ses_len = ses.length();
167 
168  for (unsigned i = 0; i < result.size(); ++i)
169  {
170  int x = result[i].x(), y = result[i].y();
171  ABG_ASSERT(str1[x] == str2[y]);
172  lcs.push_back(str1[x]);
173  }
174 }
175 
176 /// Compute the shortest edit script for transforming a string into
177 /// another.
178 ///
179 /// @param str1 the first string to consider.
180 ///
181 /// @param str2 the second string to consider.
182 ///
183 /// @param ses the resulting edit script.
184 ///
185 /// @pram ses_len the length of the edit script.
186 void
187 compute_ses(const char* str1, const char* str2, edit_script& ses)
188 {
189  vector<point> lcs;
190  compute_diff(str1, str1 + strlen(str1),
191  str2, str2 + strlen(str2),
192  lcs, ses);
193 }
194 
195 }//end namespace diff_utils
196 
197 }//end namespace abigail
const point & intermediate() const
Getter for the end point of the non-diagonal edge of the snake.
The abstraction of the Snake concept, from the paper.
A class representing a vertex in an edit graph, as explained in the paper. A vertex is a basically a ...
bool point_is_valid_in_graph(point &p, unsigned a_size, unsigned b_size)
bool compute_middle_snake(const char *str1, const char *str2, snake &s, int &ses_len)
Returns the middle snake of two strings, as well as the length of their shortest editing script...
Toplevel namespace for libabigail.
The default equality functor used by the core diffing algorithms.
int ses_len(const char *str1, const char *str2, bool reverse)
Compute the length of the shortest edit script for two strings. This is done using the "Greedy LCS/SE...
void compute_lcs(const char *str1, const char *str2, int &ses_len, string &lcs)
Compute the longest common subsequence of two strings and return the length of the shortest edit scri...
This file declares types and operations implementing the "O(ND) Difference Algorithm" (aka diff2) fro...
void compute_ses(const char *str1, const char *str2, edit_script &ses)
Compute the shortest edit script for transforming a string into another.
#define ABG_ASSERT(cond)
This is a wrapper around the 'assert' glibc call. It allows for its argument to have side effects...
Definition: abg-fwd.h:1714
The abstraction of an edit script for transforming a sequence A into a sequence B.
const point & end() const
Getter for the end point of the last diagonal edge, aka snake end point. Note that if the snake has n...
bool snake_end_points(const snake &s, point &x, point &u)
Get the end points of the snake, as intended by the paper.
bool ends_of_furthest_d_paths_overlap(const point &forward_d_path_end, const point &reverse_d_path_end)
void compute_diff(RandomAccessOutputIterator a_base, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_base, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, vector< point > &lcs, edit_script &ses, int &ses_len)
Compute the longest common subsequence of two (sub-regions of) sequences as well as the shortest edit...
The array containing the furthest D-path end-points, for each value of K. MAX_D is the maximum value ...