blob: 373dbebacd0035d73aef7b4433ea16352e0024ee [file] [log] [blame]
jimblandy92444962010-03-16 16:20:34 +00001// -*- mode: C++ -*-
2
3// Copyright (c) 2010, Google Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
33
34// test-assembler.h: interface to class for building complex binary streams.
35
36// To test the Breakpad symbol dumper and processor thoroughly, for
37// all combinations of host system and minidump processor
38// architecture, we need to be able to easily generate complex test
39// data like debugging information and minidump files.
40//
41// For example, if we want our unit tests to provide full code
42// coverage for stack walking, it may be difficult to persuade the
43// compiler to generate every possible sort of stack walking
44// information that we want to support; there are probably DWARF CFI
45// opcodes that GCC never emits. Similarly, if we want to test our
46// error handling, we will need to generate damaged minidumps or
47// debugging information that (we hope) the client or compiler will
48// never produce on its own.
49//
50// google_breakpad::TestAssembler provides a predictable and
51// (relatively) simple way to generate complex formatted data streams
52// like minidumps and CFI. Furthermore, because TestAssembler is
53// portable, developers without access to (say) Visual Studio or a
54// SPARC assembler can still work on test data for those targets.
55
56#ifndef PROCESSOR_TEST_ASSEMBLER_H_
57#define PROCESSOR_TEST_ASSEMBLER_H_
58
59#include <list>
60#include <vector>
61#include <string>
62
ivan.penkov@gmail.com6de969a2012-06-28 22:46:01 +000063#include "common/using_std_string.h"
jimblandy92444962010-03-16 16:20:34 +000064#include "google_breakpad/common/breakpad_types.h"
65
66namespace google_breakpad {
67
68using std::list;
jimblandy92444962010-03-16 16:20:34 +000069using std::vector;
70
ted.mielczarek35c41e02010-06-25 16:56:16 +000071namespace test_assembler {
jimblandy92444962010-03-16 16:20:34 +000072
73// A Label represents a value not yet known that we need to store in a
74// section. As long as all the labels a section refers to are defined
75// by the time we retrieve its contents as bytes, we can use undefined
76// labels freely in that section's construction.
77//
78// A label can be in one of three states:
79// - undefined,
80// - defined as the sum of some other label and a constant, or
81// - a constant.
82//
83// A label's value never changes, but it can accumulate constraints.
84// Adding labels and integers is permitted, and yields a label.
85// Subtracting a constant from a label is permitted, and also yields a
86// label. Subtracting two labels that have some relationship to each
87// other is permitted, and yields a constant.
88//
89// For example:
90//
91// Label a; // a's value is undefined
92// Label b; // b's value is undefined
93// {
94// Label c = a + 4; // okay, even though a's value is unknown
95// b = c + 4; // also okay; b is now a+8
96// }
97// Label d = b - 2; // okay; d == a+6, even though c is gone
98// d.Value(); // error: d's value is not yet known
99// d - a; // is 6, even though their values are not known
100// a = 12; // now b == 20, and d == 18
101// d.Value(); // 18: no longer an error
102// b.Value(); // 20
103// d = 10; // error: d is already defined.
104//
105// Label objects' lifetimes are unconstrained: notice that, in the
106// above example, even though a and b are only related through c, and
107// c goes out of scope, the assignment to a sets b's value as well. In
108// particular, it's not necessary to ensure that a Label lives beyond
109// Sections that refer to it.
110class Label {
111 public:
112 Label(); // An undefined label.
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000113 Label(uint64_t value); // A label with a fixed value
jimblandy92444962010-03-16 16:20:34 +0000114 Label(const Label &value); // A label equal to another.
115 ~Label();
116
117 // Return this label's value; it must be known.
118 //
119 // Providing this as a cast operator is nifty, but the conversions
120 // happen in unexpected places. In particular, ISO C++ says that
121 // Label + size_t becomes ambigious, because it can't decide whether
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000122 // to convert the Label to a uint64_t and then to a size_t, or use
jimblandy92444962010-03-16 16:20:34 +0000123 // the overloaded operator that returns a new label, even though the
124 // former could fail if the label is not yet defined and the latter won't.
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000125 uint64_t Value() const;
jimblandy92444962010-03-16 16:20:34 +0000126
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000127 Label &operator=(uint64_t value);
jimblandy92444962010-03-16 16:20:34 +0000128 Label &operator=(const Label &value);
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000129 Label operator+(uint64_t addend) const;
130 Label operator-(uint64_t subtrahend) const;
131 uint64_t operator-(const Label &subtrahend) const;
jimblandy92444962010-03-16 16:20:34 +0000132
133 // We could also provide == and != that work on undefined, but
134 // related, labels.
135
136 // Return true if this label's value is known. If VALUE_P is given,
137 // set *VALUE_P to the known value if returning true.
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000138 bool IsKnownConstant(uint64_t *value_p = NULL) const;
jimblandy92444962010-03-16 16:20:34 +0000139
140 // Return true if the offset from LABEL to this label is known. If
141 // OFFSET_P is given, set *OFFSET_P to the offset when returning true.
142 //
143 // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m',
144 // except that it also returns a value indicating whether the
145 // subtraction is possible given what we currently know of l and m.
146 // It can be possible even if we don't know l and m's values. For
147 // example:
148 //
149 // Label l, m;
150 // m = l + 10;
151 // l.IsKnownConstant(); // false
152 // m.IsKnownConstant(); // false
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000153 // uint64_t d;
jimblandy92444962010-03-16 16:20:34 +0000154 // l.IsKnownOffsetFrom(m, &d); // true, and sets d to -10.
155 // l-m // -10
156 // m-l // 10
157 // m.Value() // error: m's value is not known
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000158 bool IsKnownOffsetFrom(const Label &label, uint64_t *offset_p = NULL) const;
jimblandy92444962010-03-16 16:20:34 +0000159
160 private:
161 // A label's value, or if that is not yet known, how the value is
162 // related to other labels' values. A binding may be:
163 // - a known constant,
164 // - constrained to be equal to some other binding plus a constant, or
165 // - unconstrained, and free to take on any value.
166 //
167 // Many labels may point to a single binding, and each binding may
168 // refer to another, so bindings and labels form trees whose leaves
169 // are labels, whose interior nodes (and roots) are bindings, and
170 // where links point from children to parents. Bindings are
171 // reference counted, allowing labels to be lightweight, copyable,
172 // assignable, placed in containers, and so on.
173 class Binding {
174 public:
175 Binding();
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000176 Binding(uint64_t addend);
jimblandy92444962010-03-16 16:20:34 +0000177 ~Binding();
178
179 // Increment our reference count.
180 void Acquire() { reference_count_++; };
181 // Decrement our reference count, and return true if it is zero.
182 bool Release() { return --reference_count_ == 0; }
183
184 // Set this binding to be equal to BINDING + ADDEND. If BINDING is
185 // NULL, then set this binding to the known constant ADDEND.
186 // Update every binding on this binding's chain to point directly
187 // to BINDING, or to be a constant, with addends adjusted
188 // appropriately.
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000189 void Set(Binding *binding, uint64_t value);
jimblandy92444962010-03-16 16:20:34 +0000190
191 // Return what we know about the value of this binding.
192 // - If this binding's value is a known constant, set BASE to
193 // NULL, and set ADDEND to its value.
194 // - If this binding is not a known constant but related to other
195 // bindings, set BASE to the binding at the end of the relation
196 // chain (which will always be unconstrained), and set ADDEND to the
197 // value to add to that binding's value to get this binding's
198 // value.
199 // - If this binding is unconstrained, set BASE to this, and leave
200 // ADDEND unchanged.
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000201 void Get(Binding **base, uint64_t *addend);
jimblandy92444962010-03-16 16:20:34 +0000202
203 private:
204 // There are three cases:
205 //
206 // - A binding representing a known constant value has base_ NULL,
207 // and addend_ equal to the value.
208 //
209 // - A binding representing a completely unconstrained value has
210 // base_ pointing to this; addend_ is unused.
211 //
212 // - A binding whose value is related to some other binding's
213 // value has base_ pointing to that other binding, and addend_
214 // set to the amount to add to that binding's value to get this
215 // binding's value. We only represent relationships of the form
216 // x = y+c.
217 //
218 // Thus, the bind_ links form a chain terminating in either a
219 // known constant value or a completely unconstrained value. Most
220 // operations on bindings do path compression: they change every
221 // binding on the chain to point directly to the final value,
222 // adjusting addends as appropriate.
223 Binding *base_;
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000224 uint64_t addend_;
jimblandy92444962010-03-16 16:20:34 +0000225
226 // The number of Labels and Bindings pointing to this binding.
227 // (When a binding points to itself, indicating a completely
228 // unconstrained binding, that doesn't count as a reference.)
229 int reference_count_;
230 };
231
232 // This label's value.
233 Binding *value_;
234};
235
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000236inline Label operator+(uint64_t a, const Label &l) { return l + a; }
jimblandy92444962010-03-16 16:20:34 +0000237// Note that int-Label isn't defined, as negating a Label is not an
238// operation we support.
239
240// Conventions for representing larger numbers as sequences of bytes.
241enum Endianness {
242 kBigEndian, // Big-endian: the most significant byte comes first.
243 kLittleEndian, // Little-endian: the least significant byte comes first.
244 kUnsetEndian, // used internally
245};
246
247// A section is a sequence of bytes, constructed by appending bytes
248// to the end. Sections have a convenient and flexible set of member
249// functions for appending data in various formats: big-endian and
250// little-endian signed and unsigned values of different sizes;
251// LEB128 and ULEB128 values (see below), and raw blocks of bytes.
252//
253// If you need to append a value to a section that is not convenient
254// to compute immediately, you can create a label, append the
255// label's value to the section, and then set the label's value
256// later, when it's convenient to do so. Once a label's value is
257// known, the section class takes care of updating all previously
258// appended references to it.
259//
260// Once all the labels to which a section refers have had their
261// values determined, you can get a copy of the section's contents
262// as a string.
263//
264// Note that there is no specified "start of section" label. This is
265// because there are typically several different meanings for "the
266// start of a section": the offset of the section within an object
267// file, the address in memory at which the section's content appear,
268// and so on. It's up to the code that uses the Section class to
269// keep track of these explicitly, as they depend on the application.
270class Section {
271 public:
272 Section(Endianness endianness = kUnsetEndian)
273 : endianness_(endianness) { };
ivan.penkov@gmail.com5f6e1f02012-07-02 22:55:57 +0000274
275 // A base class destructor should be either public and virtual,
276 // or protected and nonvirtual.
277 virtual ~Section() { };
jimblandy92444962010-03-16 16:20:34 +0000278
279 // Set the default endianness of this section to ENDIANNESS. This
280 // sets the behavior of the D<N> appending functions. If the
281 // assembler's default endianness was set, this is the
282 void set_endianness(Endianness endianness) {
283 endianness_ = endianness;
284 }
285
286 // Return the default endianness of this section.
287 Endianness endianness() const { return endianness_; }
288
289 // Append the SIZE bytes at DATA or the contents of STRING to the
290 // end of this section. Return a reference to this section.
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000291 Section &Append(const uint8_t *data, size_t size) {
jimblandy92444962010-03-16 16:20:34 +0000292 contents_.append(reinterpret_cast<const char *>(data), size);
293 return *this;
294 };
295 Section &Append(const string &data) {
296 contents_.append(data);
297 return *this;
298 };
299
300 // Append SIZE copies of BYTE to the end of this section. Return a
301 // reference to this section.
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000302 Section &Append(size_t size, uint8_t byte) {
jimblandy92444962010-03-16 16:20:34 +0000303 contents_.append(size, (char) byte);
304 return *this;
305 }
306
307 // Append NUMBER to this section. ENDIANNESS is the endianness to
308 // use to write the number. SIZE is the length of the number in
309 // bytes. Return a reference to this section.
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000310 Section &Append(Endianness endianness, size_t size, uint64_t number);
jimblandy92444962010-03-16 16:20:34 +0000311 Section &Append(Endianness endianness, size_t size, const Label &label);
312
313 // Append SECTION to the end of this section. The labels SECTION
314 // refers to need not be defined yet.
315 //
316 // Note that this has no effect on any Labels' values, or on
317 // SECTION. If placing SECTION within 'this' provides new
318 // constraints on existing labels' values, then it's up to the
319 // caller to fiddle with those labels as needed.
320 Section &Append(const Section &section);
321
322 // Append the contents of DATA as a series of bytes terminated by
323 // a NULL character.
324 Section &AppendCString(const string &data) {
325 Append(data);
326 contents_ += '\0';
327 return *this;
328 }
329
jimblandy073a7f62010-05-05 17:37:58 +0000330 // Append at most SIZE bytes from DATA; if DATA is less than SIZE bytes
331 // long, pad with '\0' characters.
332 Section &AppendCString(const string &data, size_t size) {
333 contents_.append(data, 0, size);
334 if (data.size() < size)
335 Append(size - data.size(), 0);
336 return *this;
337 }
338
jimblandy92444962010-03-16 16:20:34 +0000339 // Append VALUE or LABEL to this section, with the given bit width and
340 // endianness. Return a reference to this section.
341 //
342 // The names of these functions have the form <ENDIANNESS><BITWIDTH>:
343 // <ENDIANNESS> is either 'L' (little-endian, least significant byte first),
344 // 'B' (big-endian, most significant byte first), or
345 // 'D' (default, the section's default endianness)
346 // <BITWIDTH> is 8, 16, 32, or 64.
347 //
348 // Since endianness doesn't matter for a single byte, all the
349 // <BITWIDTH>=8 functions are equivalent.
350 //
351 // These can be used to write both signed and unsigned values, as
352 // the compiler will properly sign-extend a signed value before
353 // passing it to the function, at which point the function's
354 // behavior is the same either way.
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000355 Section &L8(uint8_t value) { contents_ += value; return *this; }
356 Section &B8(uint8_t value) { contents_ += value; return *this; }
357 Section &D8(uint8_t value) { contents_ += value; return *this; }
358 Section &L16(uint16_t), &L32(uint32_t), &L64(uint64_t),
359 &B16(uint16_t), &B32(uint32_t), &B64(uint64_t),
360 &D16(uint16_t), &D32(uint32_t), &D64(uint64_t);
jimblandy92444962010-03-16 16:20:34 +0000361 Section &L8(const Label &label), &L16(const Label &label),
362 &L32(const Label &label), &L64(const Label &label),
363 &B8(const Label &label), &B16(const Label &label),
364 &B32(const Label &label), &B64(const Label &label),
365 &D8(const Label &label), &D16(const Label &label),
366 &D32(const Label &label), &D64(const Label &label);
367
368 // Append VALUE in a signed LEB128 (Little-Endian Base 128) form.
369 //
370 // The signed LEB128 representation of an integer N is a variable
371 // number of bytes:
372 //
373 // - If N is between -0x40 and 0x3f, then its signed LEB128
374 // representation is a single byte whose value is N.
375 //
376 // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
377 // 0x80, followed by the signed LEB128 representation of N / 128,
378 // rounded towards negative infinity.
379 //
380 // In other words, we break VALUE into groups of seven bits, put
381 // them in little-endian order, and then write them as eight-bit
382 // bytes with the high bit on all but the last.
383 //
384 // Note that VALUE cannot be a Label (we would have to implement
385 // relaxation).
386 Section &LEB128(long long value);
387
388 // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form.
389 //
390 // The unsigned LEB128 representation of an integer N is a variable
391 // number of bytes:
392 //
393 // - If N is between 0 and 0x7f, then its unsigned LEB128
394 // representation is a single byte whose value is N.
395 //
396 // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
397 // 0x80, followed by the unsigned LEB128 representation of N /
398 // 128, rounded towards negative infinity.
399 //
400 // Note that VALUE cannot be a Label (we would have to implement
401 // relaxation).
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000402 Section &ULEB128(uint64_t value);
jimblandy92444962010-03-16 16:20:34 +0000403
404 // Jump to the next location aligned on an ALIGNMENT-byte boundary,
405 // relative to the start of the section. Fill the gap with PAD_BYTE.
406 // ALIGNMENT must be a power of two. Return a reference to this
407 // section.
ted.mielczarek@gmail.comaeffe102013-03-06 14:04:42 +0000408 Section &Align(size_t alignment, uint8_t pad_byte = 0);
jimblandy92444962010-03-16 16:20:34 +0000409
410 // Clear the contents of this section.
411 void Clear();
412
413 // Return the current size of the section.
414 size_t Size() const { return contents_.size(); }
415
416 // Return a label representing the start of the section.
417 //
418 // It is up to the user whether this label represents the section's
419 // position in an object file, the section's address in memory, or
420 // what have you; some applications may need both, in which case
421 // this simple-minded interface won't be enough. This class only
422 // provides a single start label, for use with the Here and Mark
423 // member functions.
424 //
425 // Ideally, we'd provide this in a subclass that actually knows more
426 // about the application at hand and can provide an appropriate
427 // collection of start labels. But then the appending member
428 // functions like Append and D32 would return a reference to the
429 // base class, not the derived class, and the chaining won't work.
430 // Since the only value here is in pretty notation, that's a fatal
431 // flaw.
432 Label start() const { return start_; }
433
434 // Return a label representing the point at which the next Appended
435 // item will appear in the section, relative to start().
436 Label Here() const { return start_ + Size(); }
437
438 // Set *LABEL to Here, and return a reference to this section.
439 Section &Mark(Label *label) { *label = Here(); return *this; }
440
441 // If there are no undefined label references left in this
442 // section, set CONTENTS to the contents of this section, as a
443 // string, and clear this section. Return true on success, or false
444 // if there were still undefined labels.
445 bool GetContents(string *contents);
446
447 private:
448 // Used internally. A reference to a label's value.
449 struct Reference {
450 Reference(size_t set_offset, Endianness set_endianness, size_t set_size,
451 const Label &set_label)
452 : offset(set_offset), endianness(set_endianness), size(set_size),
453 label(set_label) { }
454
455 // The offset of the reference within the section.
456 size_t offset;
457
458 // The endianness of the reference.
459 Endianness endianness;
460
461 // The size of the reference.
462 size_t size;
463
464 // The label to which this is a reference.
465 Label label;
466 };
467
468 // The default endianness of this section.
469 Endianness endianness_;
470
471 // The contents of the section.
472 string contents_;
473
474 // References to labels within those contents.
475 vector<Reference> references_;
476
477 // A label referring to the beginning of the section.
478 Label start_;
479};
480
ted.mielczarek35c41e02010-06-25 16:56:16 +0000481} // namespace test_assembler
jimblandy92444962010-03-16 16:20:34 +0000482} // namespace google_breakpad
483
484#endif // PROCESSOR_TEST_ASSEMBLER_H_