Compare commits
19 Commits
a79be2f807
...
12d14b7373
Author | SHA1 | Date |
---|---|---|
Zbigniew Jędrzejewski-Szmek | 12d14b7373 | |
Lennart Poettering | 5c0102f83f | |
Lennart Poettering | c85cf04fa3 | |
Lennart Poettering | 3afe5c00a5 | |
Lennart Poettering | d80b051cea | |
Lennart Poettering | 70cd1e561c | |
Lennart Poettering | bbcd38e41e | |
Lennart Poettering | 8653185a9e | |
Lennart Poettering | 0dbe57ee86 | |
Lennart Poettering | 4ce534f4cd | |
Lennart Poettering | 7851ec6634 | |
Lennart Poettering | 20b0acfacd | |
Lennart Poettering | f4474e004d | |
Lennart Poettering | 5030c85a3e | |
Lennart Poettering | e958c05703 | |
Lennart Poettering | cde8c5f722 | |
Lennart Poettering | a76560915f | |
Lennart Poettering | e9ece6a0e3 | |
Lennart Poettering | d1d8f0f369 |
6
TODO
6
TODO
|
@ -8,8 +8,6 @@ External:
|
|||
|
||||
* Fedora: add an rpmlint check that verifies that all unit files in the RPM are listed in %systemd_post macros.
|
||||
|
||||
* wiki: update journal format documentation for lz4 additions
|
||||
|
||||
Janitorial Clean-ups:
|
||||
|
||||
* Rearrange tests so that the various test-xyz.c match a specific src/basic/xyz.c again
|
||||
|
@ -41,6 +39,10 @@ Features:
|
|||
so that the client side can remain entirely unprivileged, with SUID or
|
||||
anything like that.
|
||||
|
||||
* journald: do journal file writing out-of-process, with one writer process per
|
||||
client UID, so tht synthetic hash table collisions can slow down a specific
|
||||
user's journal stream down but not the others.
|
||||
|
||||
* add "throttling" to sd-event event sources: optionally, when we wake up too
|
||||
often for one, let's turn it off entirely for a while. Use that for the
|
||||
/proc/self/mountinfo logic.
|
||||
|
|
|
@ -0,0 +1,693 @@
|
|||
---
|
||||
title: Journal File Format
|
||||
category: Interfaces
|
||||
layout: default
|
||||
---
|
||||
|
||||
# Journal File Format
|
||||
|
||||
_Note that this document describes the binary on-disk format of journals
|
||||
only. For interfacing with web technologies there's the [Journal JSON
|
||||
Format](http://www.freedesktop.org/wiki/Software/systemd/json). For transfer
|
||||
of journal data across the network there's the [Journal Export
|
||||
Format](http://www.freedesktop.org/wiki/Software/systemd/export)._
|
||||
|
||||
The systemd journal stores log data in a binary format with several features:
|
||||
|
||||
* Fully indexed by all fields
|
||||
* Can store binary data, up to 2^64-1 in size
|
||||
* Seekable
|
||||
* Primarily append-based, hence robust to corruption
|
||||
* Support for in-line compression
|
||||
* Support for in-line Forward Secure Sealing
|
||||
|
||||
This document explains the basic structure of the file format on disk. We are
|
||||
making this available primarily to allow review and provide documentation. Note
|
||||
that the actual implementation in the [systemd
|
||||
codebase](https://github.com/systemd/systemd/blob/master/src/journal/) is the
|
||||
only ultimately authoritative description of the format, so if this document
|
||||
and the code disagree, the code is right. That said we'll of course try hard to
|
||||
keep this document up-to-date and accurate.
|
||||
|
||||
Instead of implementing your own reader or writer for journal files we ask you
|
||||
to use the [Journal's native C
|
||||
API](http://www.freedesktop.org/software/systemd/man/sd-journal.html) to access
|
||||
these files. It provides you with full access to the files, and will not
|
||||
withhold any data. If you find a limitation, please ping us and we might add
|
||||
some additional interfaces for you.
|
||||
|
||||
If you need access to the raw journal data in serialized stream form without C
|
||||
API our recommendation is to make use of the [Journal Export
|
||||
Format](http://www.freedesktop.org/wiki/Software/systemd/export), which you can
|
||||
get via "journalctl -o export" or via systemd-journal-gatewayd. The export
|
||||
format is much simpler to parse, but complete and accurate. Due to its
|
||||
stream-based nature it is not indexed.
|
||||
|
||||
_Or, to put this in other words: this low-level document is probably not what
|
||||
you want to use as base of your project. You want our [C
|
||||
API](http://www.freedesktop.org/software/systemd/man/sd-journal.html) instead!
|
||||
And if you really don't want the C API, then you want the [Journal Export
|
||||
Format](http://www.freedesktop.org/wiki/Software/systemd/export) instead! This
|
||||
document is primarily for your entertainment and education. Thank you!_
|
||||
|
||||
This document assumes you have a basic understanding of the journal concepts,
|
||||
the properties of a journal entry and so on. If not, please go and read up,
|
||||
then come back! This is a good opportunity to read about the [basic properties
|
||||
of journal
|
||||
entries](http://www.freedesktop.org/software/systemd/man/systemd.journal-fields.html),
|
||||
in particular realize that they may include binary non-text data (though
|
||||
usually don't), and the same field might have multiple values assigned within
|
||||
the same entry.
|
||||
|
||||
This document describes the current format of systemd 246. The documented
|
||||
format is compatible with the format used in the first versions of the journal,
|
||||
but received various compatible and incompatible additions since.
|
||||
|
||||
If you are wondering why the journal file format has been created in the first
|
||||
place instead of adopting an existing database implementation, please have a
|
||||
look [at this
|
||||
thread](https://lists.freedesktop.org/archives/systemd-devel/2012-October/007054.html).
|
||||
|
||||
|
||||
## Basics
|
||||
|
||||
* All offsets, sizes, time values, hashes (and most other numeric values) are 64bit unsigned integers in LE format.
|
||||
* Offsets are always relative to the beginning of the file.
|
||||
* The 64bit hash function siphash24 is used for newer journal files. For older files [Jenkins lookup3](https://en.wikipedia.org/wiki/Jenkins_hash_function) is used, more specifically jenkins_hashlittle2() with the first 32bit integer it returns as higher 32bit part of the 64bit value, and the second one uses as lower 32bit part.
|
||||
* All structures are aligned to 64bit boundaries and padded to multiples of 64bit
|
||||
* The format is designed to be read and written via memory mapping using multiple mapped windows.
|
||||
* All time values are stored in usec since the respective epoch.
|
||||
* Wall clock time values are relative to the Unix time epoch, i.e. January 1st, 1970. (CLOCK_REALTIME)
|
||||
* Monotonic time values are always stored jointly with the kernel boot ID value (i.e. /proc/sys/kernel/random/boot_id) they belong to. They tend to be relative to the start of the boot, but aren't for containers. (CLOCK_MONOTONIC)
|
||||
* Randomized, unique 128bit IDs are used in various locations. These are generally UUID v4 compatible, but this is not a requirement.
|
||||
|
||||
## General Rules
|
||||
|
||||
If any kind of corruption is noticed by a writer it should immediately rotate
|
||||
the file and start a new one. No further writes should be attempted to the
|
||||
original file, but it should be left around so that as little data as possible
|
||||
is lost.
|
||||
|
||||
If any kind of corruption is noticed by a reader it should try hard to handle
|
||||
this gracefully, such as skipping over the corrupted data, but allowing access
|
||||
to as much data around it as possible.
|
||||
|
||||
A reader should verify all offsets and other data as it reads it. This includes
|
||||
checking for alignment and range of offsets in the file, especially before
|
||||
trying to read it via a memory map.
|
||||
|
||||
A reader must interleave rotated and corrupted files as good as possible and
|
||||
present them as single stream to the user.
|
||||
|
||||
All fields marked as "reserved" must be initialized with 0 when writing and be
|
||||
ignored on reading. They are currently not used but might be used later on.
|
||||
|
||||
|
||||
## Structure
|
||||
|
||||
The file format's data structures are declared in
|
||||
[journal-def.h](https://github.com/systemd/systemd/blob/master/src/journal/journal-def.h).
|
||||
|
||||
The file format begins with a header structure. After the header structure
|
||||
object structures follow. Objects are appended to the end as time
|
||||
progresses. Most data stored in these objects is not altered anymore after
|
||||
having been written once, with the exception of records necessary for
|
||||
indexing. When new data is appended to a file the writer first writes all new
|
||||
objects to the end of the file, and then links them up at front after that's
|
||||
done. Currently, seven different object types are known:
|
||||
|
||||
```c
|
||||
enum {
|
||||
OBJECT_UNUSED,
|
||||
OBJECT_DATA,
|
||||
OBJECT_FIELD,
|
||||
OBJECT_ENTRY,
|
||||
OBJECT_DATA_HASH_TABLE,
|
||||
OBJECT_FIELD_HASH_TABLE,
|
||||
OBJECT_ENTRY_ARRAY,
|
||||
OBJECT_TAG,
|
||||
_OBJECT_TYPE_MAX
|
||||
};
|
||||
```
|
||||
|
||||
* A **DATA** object, which encapsulates the contents of one field of an entry, i.e. a string such as `_SYSTEMD_UNIT=avahi-daemon.service`, or `MESSAGE=Foobar made a booboo.` but possibly including large or binary data, and always prefixed by the field name and "=".
|
||||
* A **FIELD** object, which encapsulates a field name, i.e. a string such as `_SYSTEMD_UNIT` or `MESSAGE`, without any `=` or even value.
|
||||
* An **ENTRY** object, which binds several **DATA** objects together into a log entry.
|
||||
* A **DATA_HASH_TABLE** object, which encapsulates a hash table for finding existing **DATA** objects.
|
||||
* A **FIELD_HASH_TABLE** object, which encapsulates a hash table for finding existing **FIELD** objects.
|
||||
* An **ENTRY_ARRAY** object, which encapsulates a sorted array of offsets to entries, used for seeking by binary search.
|
||||
* A **TAG** object, consisting of an FSS sealing tag for all data from the beginning of the file or the last tag written (whichever is later).
|
||||
|
||||
## Header
|
||||
|
||||
The Header struct defines, well, you guessed it, the file header:
|
||||
|
||||
```c
|
||||
_packed_ struct Header {
|
||||
uint8_t signature[8]; /* "LPKSHHRH" */
|
||||
le32_t compatible_flags;
|
||||
le32_t incompatible_flags;
|
||||
uint8_t state;
|
||||
uint8_t reserved[7];
|
||||
sd_id128_t file_id;
|
||||
sd_id128_t machine_id;
|
||||
sd_id128_t boot_id; /* last writer */
|
||||
sd_id128_t seqnum_id;
|
||||
le64_t header_size;
|
||||
le64_t arena_size;
|
||||
le64_t data_hash_table_offset;
|
||||
le64_t data_hash_table_size;
|
||||
le64_t field_hash_table_offset;
|
||||
le64_t field_hash_table_size;
|
||||
le64_t tail_object_offset;
|
||||
le64_t n_objects;
|
||||
le64_t n_entries;
|
||||
le64_t tail_entry_seqnum;
|
||||
le64_t head_entry_seqnum;
|
||||
le64_t entry_array_offset;
|
||||
le64_t head_entry_realtime;
|
||||
le64_t tail_entry_realtime;
|
||||
le64_t tail_entry_monotonic;
|
||||
/* Added in 187 */
|
||||
le64_t n_data;
|
||||
le64_t n_fields;
|
||||
/* Added in 189 */
|
||||
le64_t n_tags;
|
||||
le64_t n_entry_arrays;
|
||||
/* Added in 246 */
|
||||
le64_t data_hash_chain_depth;
|
||||
le64_t field_hash_chain_depth;
|
||||
};
|
||||
```
|
||||
|
||||
The first 8 bytes of Journal files must contain the ASCII characters LPKSHHRH.
|
||||
|
||||
If a writer finds that the **machine_id** of a file to write to does not match
|
||||
the machine it is running on it should immediately rotate the file and start a
|
||||
new one.
|
||||
|
||||
When journal file is first created the **file_id** is randomly and uniquely
|
||||
initialized.
|
||||
|
||||
When a writer opens a file it shall initialize the **boot_id** to the current
|
||||
boot id of the system.
|
||||
|
||||
The currently used part of the file is the **header_size** plus the
|
||||
**arena_size** field of the header. If a writer needs to write to a file where
|
||||
the actual file size on disk is smaller than the reported value it shall
|
||||
immediately rotate the file and start a new one. If a writer is asked to write
|
||||
to a file with a header that is shorter than his own definition of the struct
|
||||
Header, he shall immediately rotate the file and start a new one.
|
||||
|
||||
The **n_objects** field contains a counter for objects currently available in
|
||||
this file. As objects are appended to the end of the file this counter is
|
||||
increased.
|
||||
|
||||
The first object in the file starts immediately after the header. The last
|
||||
object in the file is at the offset **tail_object_offset**, which may be 0 if
|
||||
no object is in the file yet.
|
||||
|
||||
The **n_entries**, **n_data**, **n_fields**, **n_tags**, **n_entry_arrays** are
|
||||
counters of the objects of the specific types.
|
||||
|
||||
**tail_entry_seqnum** and **head_entry_seqnum** contain the sequential number
|
||||
(see below) of the last or first entry in the file, respectively, or 0 if no
|
||||
entry has been written yet.
|
||||
|
||||
**tail_entry_realtime** and **head_entry_realtime** contain the wallclock
|
||||
timestamp of the last or first entry in the file, respectively, or 0 if no
|
||||
entry has been written yet.
|
||||
|
||||
**tail_entry_monotonic** is the monotonic timestamp of the last entry in the
|
||||
file, referring to monotonic time of the boot identified by **boot_id**.
|
||||
|
||||
**data_hash_chain_depth** is a counter of the deepest chain in the data hash
|
||||
table, minus one. This is updated whenever a chain is found that is longer than
|
||||
the previous deepest chain found. Note that the counter is updated during hash
|
||||
table lookups, as the chains are traversed. This counter is used to determine
|
||||
when it is a good time to rotate the journal file, because hash collisions
|
||||
became too frequent.
|
||||
|
||||
Similar, **field_hash_chain_depth** is a counter of the deepest chain in the
|
||||
field hash table, minus one.
|
||||
|
||||
|
||||
## Extensibility
|
||||
|
||||
The format is supposed to be extensible in order to enable future additions of
|
||||
features. Readers should simply skip objects of unknown types as they read
|
||||
them. If a compatible feature extension is made a new bit is registered in the
|
||||
header's 'compatible_flags' field. If a feature extension is used that makes
|
||||
the format incompatible a new bit is registered in the header's
|
||||
'incompatible_flags' field. Readers should check these two bit fields, if they
|
||||
find a flag they don't understand in compatible_flags they should continue to
|
||||
read the file, but if they find one in 'incompatible_flags' they should fail,
|
||||
asking for an update of the software. Writers should refuse writing if there's
|
||||
an unknown bit flag in either of these fields.
|
||||
|
||||
The file header may be extended as new features are added. The size of the file
|
||||
header is stored in the header. All header fields up to "n_data" are known to
|
||||
unconditionally exist in all revisions of the file format, all fields starting
|
||||
with "n_data" needs to be explicitly checked for via a size check, since they
|
||||
were additions after the initial release.
|
||||
|
||||
Currently only five extensions flagged in the flags fields are known:
|
||||
|
||||
```c
|
||||
enum {
|
||||
HEADER_INCOMPATIBLE_COMPRESSED_XZ = 1 << 0,
|
||||
HEADER_INCOMPATIBLE_COMPRESSED_LZ4 = 1 << 1,
|
||||
HEADER_INCOMPATIBLE_KEYED_HASH = 1 << 2,
|
||||
HEADER_INCOMPATIBLE_COMPRESSED_ZSTD = 1 << 3,
|
||||
};
|
||||
|
||||
enum {
|
||||
HEADER_COMPATIBLE_SEALED = 1 << 0,
|
||||
};
|
||||
```
|
||||
|
||||
HEADER_INCOMPATIBLE_COMPRESSED_XZ indicates that the file includes DATA objects
|
||||
that are compressed using XZ. Similarly, HEADER_INCOMPATIBLE_COMPRESSED_LZ4
|
||||
indicates that the file includes DATA objects that are compressed with the LZ4
|
||||
algorithm. And HEADER_INCOMPATIBLE_COMPRESSED_ZSTD indicates that there are
|
||||
objects compressed with ZSTD.
|
||||
|
||||
HEADER_INCOMPATIBLE_KEYED_HASH indicates that instead of the unkeyed Jenkins
|
||||
hash function the keyed siphash24 hash function is used for the two hash
|
||||
tables, see below.
|
||||
|
||||
HEADER_COMPATIBLE_SEALED indicates that the file includes TAG objects required
|
||||
for Forward Secure Sealing.
|
||||
|
||||
|
||||
## Dirty Detection
|
||||
|
||||
```c
|
||||
enum {
|
||||
STATE_OFFLINE = 0,
|
||||
STATE_ONLINE = 1,
|
||||
STATE_ARCHIVED = 2,
|
||||
_STATE_MAX
|
||||
};
|
||||
```
|
||||
|
||||
If a file is opened for writing the **state** field should be set to
|
||||
STATE_ONLINE. If a file is closed after writing the **state** field should be
|
||||
set to STATE_OFFLINE. After a file has been rotated it should be set to
|
||||
STATE_ARCHIVED. If a writer is asked to write to a file that is not in
|
||||
STATE_OFFLINE it should immediately rotate the file and start a new one,
|
||||
without changing the file.
|
||||
|
||||
After and before the state field is changed fdatasync() should be executed on
|
||||
the file to ensure the dirty state hits disk.
|
||||
|
||||
|
||||
## Sequence Numbers
|
||||
|
||||
All entries carry sequence numbers that are monotonically counted up for each
|
||||
entry (starting at 1) and are unique among all files which carry the same
|
||||
**seqnum_id** field. This field is randomly generated when the journal daemon
|
||||
creates its first file. All files generated by the same journal daemon instance
|
||||
should hence carry the same seqnum_id. This should guarantee a monotonic stream
|
||||
of sequential numbers for easy interleaving even if entries are distributed
|
||||
among several files, such as the system journal and many per-user journals.
|
||||
|
||||
|
||||
## Concurrency
|
||||
|
||||
The file format is designed to be usable in a simultaneous
|
||||
single-writer/multiple-reader scenario. The synchronization model is very weak
|
||||
in order to facilitate storage on the most basic of file systems (well, the
|
||||
most basic ones that provide us with mmap() that is), and allow good
|
||||
performance. No file locking is used. The only time where disk synchronization
|
||||
via fdatasync() should be enforced is after and before changing the **state**
|
||||
field in the file header (see below). It is recommended to execute a memory
|
||||
barrier after appending and initializing new objects at the end of the file,
|
||||
and before linking them up in the earlier objects.
|
||||
|
||||
This weak synchronization model means that it is crucial that readers verify
|
||||
the structural integrity of the file as they read it and handle invalid
|
||||
structure gracefully. (Checking what you read is a pretty good idea out of
|
||||
security considerations anyway.) This specifically includes checking offset
|
||||
values, and that they point to valid objects, with valid sizes and of the type
|
||||
and hash value expected. All code must be written with the fact in mind that a
|
||||
file with inconsistent structure might just be inconsistent temporarily, and
|
||||
might become consistent later on. Payload OTOH requires less scrutiny, as it
|
||||
should only be linked up (and hence visible to readers) after it was
|
||||
successfully written to memory (though not necessarily to disk). On non-local
|
||||
file systems it is a good idea to verify the payload hashes when reading, in
|
||||
order to avoid annoyances with mmap() inconsistencies.
|
||||
|
||||
Clients intending to show a live view of the journal should use inotify() for
|
||||
this to watch for files changes. Since file writes done via mmap() do not
|
||||
result in inotify() writers shall truncate the file to its current size after
|
||||
writing one or more entries, which results in inotify events being
|
||||
generated. Note that this is not used as a transaction scheme (it doesn't
|
||||
protect anything), but merely for triggering wakeups.
|
||||
|
||||
Note that inotify will not work on network file systems if reader and writer
|
||||
reside on different hosts. Readers which detect they are run on journal files
|
||||
on a non-local file system should hence not rely on inotify for live views but
|
||||
fall back to simple time based polling of the files (maybe recheck every 2s).
|
||||
|
||||
|
||||
## Objects
|
||||
|
||||
All objects carry a common header:
|
||||
|
||||
```c
|
||||
enum {
|
||||
OBJECT_COMPRESSED_XZ = 1 << 0,
|
||||
OBJECT_COMPRESSED_LZ4 = 1 << 1,
|
||||
OBJECT_COMPRESSED_ZSTD = 1 << 2,
|
||||
};
|
||||
|
||||
_packed_ struct ObjectHeader {
|
||||
uint8_t type;
|
||||
uint8_t flags;
|
||||
uint8_t reserved[6];
|
||||
le64_t size;
|
||||
uint8_t payload[];
|
||||
};
|
||||
|
||||
The **type** field is one of the object types listed above. The **flags** field
|
||||
currently knows three flags: OBJECT_COMPRESSED_XZ, OBJECT_COMPRESSED_LZ4 and
|
||||
OBJECT_COMPRESSED_ZSTD. It is only valid for DATA objects and indicates that
|
||||
the data payload is compressed with XZ/LZ4/ZSTD. If one of the
|
||||
OBJECT_COMPRESSED_* flags is set for an object then the matching
|
||||
HEADER_INCOMPATIBLE_COMPRESSED_XZ/HEADER_INCOMPATIBLE_COMPRESSED_LZ4/HEADER_INCOMPATIBLE_COMPRESSED_ZSTD
|
||||
flag must be set for the file as well. At most one of these three bits may be
|
||||
set. The **size** field encodes the size of the object including all its
|
||||
headers and payload.
|
||||
|
||||
|
||||
## Data Objects
|
||||
|
||||
```c
|
||||
_packed_ struct DataObject {
|
||||
ObjectHeader object;
|
||||
le64_t hash;
|
||||
le64_t next_hash_offset;
|
||||
le64_t next_field_offset;
|
||||
le64_t entry_offset; /* the first array entry we store inline */
|
||||
le64_t entry_array_offset;
|
||||
le64_t n_entries;
|
||||
uint8_t payload[];
|
||||
};
|
||||
```
|
||||
|
||||
Data objects carry actual field data in the **payload[]** array, including a
|
||||
field name, a '=' and the field data. Example:
|
||||
`_SYSTEMD_UNIT=foobar.service`. The **hash** field is a hash value of the
|
||||
payload. If the `HEADER_INCOMPATIBLE_KEYED_HASH` flag is set in the file header
|
||||
this is the siphash24 hash value of the payload, keyed by the file ID as stored
|
||||
in the `.file_id` field of the file header. If the flag is not set it is the
|
||||
non-keyed Jenkins hash of the payload instead. The keyed hash is preferred as
|
||||
it makes the format more robust against attackers that want to trigger hash
|
||||
collisions in the hash table.
|
||||
|
||||
**next_hash_offset** is used to link up DATA objects in the DATA_HASH_TABLE if
|
||||
a hash collision happens (in a singly linked list, with an offset of 0
|
||||
indicating the end). **next_field_offset** is used to link up data objects with
|
||||
the same field name from the FIELD object of the field used.
|
||||
|
||||
**entry_offset** is an offset to the first ENTRY object referring to this DATA
|
||||
object. **entry_array_offset** is an offset to an ENTRY_ARRAY object with
|
||||
offsets to other entries referencing this DATA object. Storing the offset to
|
||||
the first ENTRY object in-line is an optimization given that many DATA objects
|
||||
will be referenced from a single entry only (for example, `MESSAGE=` frequently
|
||||
includes a practically unique string). **n_entries** is a counter of the total
|
||||
number of ENTRY objects that reference this object, i.e. the sum of all
|
||||
ENTRY_ARRAYS chained up from this object, plus 1.
|
||||
|
||||
The **payload[]** field contains the field name and date unencoded, unless
|
||||
OBJECT_COMPRESSED_XZ/OBJECT_COMPRESSED_LZ4/OBJECT_COMPRESSED_ZSTD is set in the
|
||||
`ObjectHeader`, in which case the payload is compressed with the indicated
|
||||
compression algorithm.
|
||||
|
||||
|
||||
## Field Objects
|
||||
|
||||
```c
|
||||
_packed_ struct FieldObject {
|
||||
ObjectHeader object;
|
||||
le64_t hash;
|
||||
le64_t next_hash_offset;
|
||||
le64_t head_data_offset;
|
||||
uint8_t payload[];
|
||||
};
|
||||
```
|
||||
|
||||
Field objects are used to enumerate all possible values a certain field name
|
||||
can take in the entire journal file.
|
||||
|
||||
The **payload[]** array contains the actual field name, without '=' or any
|
||||
field value. Example: `_SYSTEMD_UNIT`. The **hash** field is a hash value of
|
||||
the payload. As for the DATA objects, this too is either the `.file_id` keyed
|
||||
siphash24 hash of the payload, or the non-keyed Jenkins hash.
|
||||
|
||||
**next_hash_offset** is used to link up FIELD objects in the FIELD_HASH_TABLE
|
||||
if a hash collision happens (in singly linked list, offset 0 indicating the
|
||||
end). **head_data_offset** points to the first DATA object that shares this
|
||||
field name. It is the head of a singly linked list using DATA's
|
||||
**next_field_offset** offset.
|
||||
|
||||
|
||||
## Entry Objects
|
||||
|
||||
```
|
||||
_packed_ struct EntryItem {
|
||||
le64_t object_offset;
|
||||
le64_t hash;
|
||||
};
|
||||
|
||||
_packed_ struct EntryObject {
|
||||
ObjectHeader object;
|
||||
le64_t seqnum;
|
||||
le64_t realtime;
|
||||
le64_t monotonic;
|
||||
sd_id128_t boot_id;
|
||||
le64_t xor_hash;
|
||||
EntryItem items[];
|
||||
};
|
||||
```
|
||||
|
||||
An ENTRY object binds several DATA objects together into one log entry, and
|
||||
includes other metadata such as various timestamps.
|
||||
|
||||
The **seqnum** field contains the sequence number of the entry, **realtime**
|
||||
the realtime timestamp, and **monotonic** the monotonic timestamp for the boot
|
||||
identified by **boot_id**.
|
||||
|
||||
The **xor_hash** field contains a binary XOR of the hashes of the payload of
|
||||
all DATA objects referenced by this ENTRY. This value is usable to check the
|
||||
contents of the entry, being independent of the order of the DATA objects in
|
||||
the array. Note that even for files that have the
|
||||
`HEADER_INCOMPATIBLE_KEYED_HASH` flag set (and thus siphash24 the otherwise
|
||||
used hash function) the hash function used for this field, as singular
|
||||
exception, is the Jenkins lookup3 hash function. The XOR hash value is used to
|
||||
quickly compare the contents of two entries, and to define a well-defined order
|
||||
between two entries that otherwise have the same sequence numbers and
|
||||
timestamps.
|
||||
|
||||
The **items[]** array contains references to all DATA objects of this entry,
|
||||
plus their respective hashes (which are calculated the same way as in the DATA
|
||||
objects, i.e. keyed by the file ID).
|
||||
|
||||
In the file ENTRY objects are written ordered monotonically by sequence
|
||||
number. For continuous parts of the file written during the same boot
|
||||
(i.e. with the same boot_id) the monotonic timestamp is monotonic too. Modulo
|
||||
wallclock time jumps (due to incorrect clocks being corrected) the realtime
|
||||
timestamps are monotonic too.
|
||||
|
||||
|
||||
## Hash Table Objects
|
||||
|
||||
```c
|
||||
_packed_ struct HashItem {
|
||||
le64_t head_hash_offset;
|
||||
le64_t tail_hash_offset;
|
||||
};
|
||||
|
||||
_packed_ struct HashTableObject {
|
||||
ObjectHeader object;
|
||||
HashItem items[];
|
||||
};
|
||||
```
|
||||
|
||||
The structure of both DATA_HASH_TABLE and FIELD_HASH_TABLE objects are
|
||||
identical. They implement a simple hash table, which each cell containing
|
||||
offsets to the head and tail of the singly linked list of the DATA and FIELD
|
||||
objects, respectively. DATA's and FIELD's next_hash_offset field are used to
|
||||
chain up the objects. Empty cells have both offsets set to 0.
|
||||
|
||||
Each file contains exactly one DATA_HASH_TABLE and one FIELD_HASH_TABLE
|
||||
objects. Their payload is directly referred to by the file header in the
|
||||
**data_hash_table_offset**, **data_hash_table_size**,
|
||||
**field_hash_table_offset**, **field_hash_table_size** fields. These offsets do
|
||||
_not_ point to the object headers but directly to the payloads. When a new
|
||||
journal file is created the two hash table objects need to be created right
|
||||
away as first two objects in the stream.
|
||||
|
||||
If the hash table fill level is increasing over a certain fill level (Learning
|
||||
from Java's Hashtable for example: > 75%), the writer should rotate the file
|
||||
and create a new one.
|
||||
|
||||
The DATA_HASH_TABLE should be sized taking into account to the maximum size the
|
||||
file is expected to grow, as configured by the administrator or disk space
|
||||
considerations. The FIELD_HASH_TABLE should be sized to a fixed size; the
|
||||
number of fields should be pretty static as it depends only on developers'
|
||||
creativity rather than runtime parameters.
|
||||
|
||||
|
||||
## Entry Array Objects
|
||||
|
||||
|
||||
```c
|
||||
_packed_ struct EntryArrayObject {
|
||||
ObjectHeader object;
|
||||
le64_t next_entry_array_offset;
|
||||
le64_t items[];
|
||||
};
|
||||
```
|
||||
|
||||
Entry Arrays are used to store a sorted array of offsets to entries. Entry
|
||||
arrays are strictly sorted by offsets on disk, and hence by their timestamps
|
||||
and sequence numbers (with some restrictions, see above).
|
||||
|
||||
Entry Arrays are chained up. If one entry array is full another one is
|
||||
allocated and the **next_entry_array_offset** field of the old one pointed to
|
||||
it. An Entry Array with **next_entry_array_offset** set to 0 is the last in the
|
||||
list. To optimize allocation and seeking, as entry arrays are appended to a
|
||||
chain of entry arrays they should increase in size (double).
|
||||
|
||||
Due to being monotonically ordered entry arrays may be searched with a binary
|
||||
search (bisection).
|
||||
|
||||
One chain of entry arrays links up all entries written to the journal. The
|
||||
first entry array is referenced in the **entry_array_offset** field of the
|
||||
header.
|
||||
|
||||
Each DATA object also references an entry array chain listing all entries
|
||||
referencing a specific DATA object. Since many DATA objects are only referenced
|
||||
by a single ENTRY the first offset of the list is stored inside the DATA object
|
||||
itself, an ENTRY_ARRAY object is only needed if it is referenced by more than
|
||||
one ENTRY.
|
||||
|
||||
|
||||
## Tag Object
|
||||
|
||||
```c
|
||||
#define TAG_LENGTH (256/8)
|
||||
|
||||
_packed_ struct TagObject {
|
||||
ObjectHeader object;
|
||||
le64_t seqnum;
|
||||
le64_t epoch;
|
||||
uint8_t tag[TAG_LENGTH]; /* SHA-256 HMAC */
|
||||
};
|
||||
```
|
||||
|
||||
Tag objects are used to seal off the journal for alteration. In regular
|
||||
intervals a tag object is appended to the file. The tag object consists of a
|
||||
SHA-256 HMAC tag that is calculated from the objects stored in the file since
|
||||
the last tag was written, or from the beginning if no tag was written yet. The
|
||||
key for the HMAC is calculated via the externally maintained FSPRG logic for
|
||||
the epoch that is written into **epoch**. The sequence number **seqnum** is
|
||||
increased with each tag. When calculating the HMAC of objects header fields
|
||||
that are volatile are excluded (skipped). More specifically all fields that
|
||||
might validly be altered to maintain a consistent file structure (such as
|
||||
offsets to objects added later for the purpose of linked lists and suchlike)
|
||||
after an object has been written are not protected by the tag. This means a
|
||||
verifier has to independently check these fields for consistency of
|
||||
structure. For the fields excluded from the HMAC please consult the source code
|
||||
directly. A verifier should read the file from the beginning to the end, always
|
||||
calculating the HMAC for the objects it reads. Each time a tag object is
|
||||
encountered the HMAC should be verified and restarted. The tag object sequence
|
||||
numbers need to increase strictly monotonically. Tag objects themselves are
|
||||
partially protected by the HMAC (i.e. seqnum and epoch is included, the tag
|
||||
itself not).
|
||||
|
||||
|
||||
## Algorithms
|
||||
|
||||
### Reading
|
||||
|
||||
Given an offset to an entry all data fields are easily found by following the
|
||||
offsets in the data item array of the entry.
|
||||
|
||||
Listing entries without filter is done by traversing the list of entry arrays
|
||||
starting with the headers' **entry_array_offset** field.
|
||||
|
||||
Seeking to an entry by timestamp or sequence number (without any matches) is
|
||||
done via binary search in the entry arrays starting with the header's
|
||||
**entry_array_offset** field. Since these arrays double in size as more are
|
||||
added the time cost of seeking is O(log(n)*log(n)) if n is the number of
|
||||
entries in the file.
|
||||
|
||||
When seeking or listing with one field match applied the DATA object of the
|
||||
match is first identified, and then its data entry array chain traversed. The
|
||||
time cost is the same as for seeks/listings with no match.
|
||||
|
||||
If multiple matches are applied, multiple chains of entry arrays should be
|
||||
traversed in parallel. Since they all are strictly monotonically ordered by
|
||||
offset of the entries, advancing in one can be directly applied to the others,
|
||||
until an entry matching all matches is found. In the worst case seeking like
|
||||
this is O(n) where n is the number of matching entries of the "loosest" match,
|
||||
but in the common case should be much more efficient at least for the
|
||||
well-known fields, where the set of possible field values tend to be closely
|
||||
related. Checking whether an entry matches a number of matches is efficient
|
||||
since the item array of the entry contains hashes of all data fields
|
||||
referenced, and the number of data fields of an entry is generally small (<
|
||||
30).
|
||||
|
||||
When interleaving multiple journal files seeking tends to be a frequently used
|
||||
operation, but in this case can be effectively suppressed by caching results
|
||||
from previous entries.
|
||||
|
||||
When listing all possible values a certain field can take it is sufficient to
|
||||
look up the FIELD object and follow the chain of links to all DATA it includes.
|
||||
|
||||
### Writing
|
||||
|
||||
When an entry is appended to the journal for each of its data fields the data
|
||||
hash table should be checked. If the data field does not yet exist in the file
|
||||
it should be appended and added to the data hash table. When a field data
|
||||
object is added the field hash table should be checked for the field name of
|
||||
the data field, and a field object be added if necessary. After all data fields
|
||||
(and recursively all field names) of the new entry are appended and linked up
|
||||
in the hashtables the entry object should be appended and linked up too.
|
||||
|
||||
In regular intervals a tag object should be written if sealing is enabled (see
|
||||
above). Before the file is closed a tag should be written too, to seal it off.
|
||||
|
||||
Before writing an object, time and disk space limits should be checked and
|
||||
rotation triggered if necessary.
|
||||
|
||||
|
||||
## Optimizing Disk IO
|
||||
|
||||
_A few general ideas to keep in mind:_
|
||||
|
||||
The hash tables for looking up fields and data should be quickly in the memory
|
||||
cache and not hurt performance. All entries and entry arrays are ordered
|
||||
strictly by time on disk, and hence should expose an OK access pattern on
|
||||
rotating media, when read sequentially (which should be the most common case,
|
||||
given the nature of log data).
|
||||
|
||||
The disk access patterns of the binary search for entries needed for seeking
|
||||
are problematic on rotating disks. This should not be a major issue though,
|
||||
since seeking should not be a frequent operation.
|
||||
|
||||
When reading, collecting data fields for presenting entries to the user is
|
||||
problematic on rotating disks. In order to optimize these patterns the item
|
||||
array of entry objects should be sorted by disk offset before
|
||||
writing. Effectively, frequently used data objects should be in the memory
|
||||
cache quickly. Non-frequently used data objects are likely to be located
|
||||
between the previous and current entry when reading and hence should expose an
|
||||
OK access pattern. Problematic are data objects that are neither frequently nor
|
||||
infrequently referenced, which will cost seek time.
|
||||
|
||||
And that's all there is to it.
|
||||
|
||||
Thanks for your interest!
|
|
@ -98,7 +98,7 @@ And now, here's the list of (hopefully) all APIs that we have introduced with sy
|
|||
| [Unit file format](https://www.freedesktop.org/software/systemd/man/systemd.unit.html) | File format | yes | yes | numerous | no | - | no |
|
||||
| [Network](https://www.freedesktop.org/software/systemd/man/systemd.network.html) & [Netdev file format](https://www.freedesktop.org/software/systemd/man/systemd.netdev.html) | File format | yes | yes | no | no | - | no |
|
||||
| [Link file format](https://www.freedesktop.org/software/systemd/man/systemd.link.html) | File format | yes | yes | no | no | - | no |
|
||||
| [Journal File Format](https://www.freedesktop.org/wiki/Software/systemd/journal-files) | File format | yes | yes | - | maybe | - | no |
|
||||
| [Journal File Format](https://systemd.io/JOURNAL_FILE_FORMAT) | File format | yes | yes | - | maybe | - | no |
|
||||
| [Journal Export Format](https://www.freedesktop.org/wiki/Software/systemd/export) | File format | yes | yes | - | yes | - | no |
|
||||
| [Cooperation in cgroup tree](https://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups) | Treaty | yes | yes | libvirt | yes | libvirt | no |
|
||||
| [Password Agents](https://www.freedesktop.org/wiki/Software/systemd/PasswordAgents) | Socket+Files | yes | yes | - | yes | - | no |
|
||||
|
|
20
meson.build
20
meson.build
|
@ -1190,36 +1190,38 @@ want_xz = get_option('xz')
|
|||
if want_xz != 'false' and not skip_deps
|
||||
libxz = dependency('liblzma',
|
||||
required : want_xz == 'true')
|
||||
have = libxz.found()
|
||||
have_xz = libxz.found()
|
||||
else
|
||||
have = false
|
||||
have_xz = false
|
||||
libxz = []
|
||||
endif
|
||||
conf.set10('HAVE_XZ', have)
|
||||
conf.set10('HAVE_XZ', have_xz)
|
||||
|
||||
want_lz4 = get_option('lz4')
|
||||
if want_lz4 != 'false' and not skip_deps
|
||||
liblz4 = dependency('liblz4',
|
||||
version : '>= 1.3.0',
|
||||
required : want_lz4 == 'true')
|
||||
have = liblz4.found()
|
||||
have_lz4 = liblz4.found()
|
||||
else
|
||||
have = false
|
||||
have_lz4 = false
|
||||
liblz4 = []
|
||||
endif
|
||||
conf.set10('HAVE_LZ4', have)
|
||||
conf.set10('HAVE_LZ4', have_lz4)
|
||||
|
||||
want_zstd = get_option('zstd')
|
||||
if want_zstd != 'false' and not skip_deps
|
||||
libzstd = dependency('libzstd',
|
||||
required : want_zstd == 'true',
|
||||
version : '>= 1.4.0')
|
||||
have = libzstd.found()
|
||||
have_zstd = libzstd.found()
|
||||
else
|
||||
have = false
|
||||
have_zstd = false
|
||||
libzstd = []
|
||||
endif
|
||||
conf.set10('HAVE_ZSTD', have)
|
||||
conf.set10('HAVE_ZSTD', have_zstd)
|
||||
|
||||
conf.set10('HAVE_COMPRESSION', have_xz or have_lz4 or have_zstd)
|
||||
|
||||
want_xkbcommon = get_option('xkbcommon')
|
||||
if want_xkbcommon != 'false' and not skip_deps
|
||||
|
|
|
@ -286,6 +286,15 @@ static inline size_t GREEDY_ALLOC_ROUND_UP(size_t l) {
|
|||
UNIQ_T(A, aq) < UNIQ_T(B, bq) ? UNIQ_T(A, aq) : UNIQ_T(B, bq); \
|
||||
})
|
||||
|
||||
/* evaluates to (void) if _A or _B are not constant or of different types */
|
||||
#define CONST_MIN(_A, _B) \
|
||||
(__builtin_choose_expr( \
|
||||
__builtin_constant_p(_A) && \
|
||||
__builtin_constant_p(_B) && \
|
||||
__builtin_types_compatible_p(typeof(_A), typeof(_B)), \
|
||||
((_A) < (_B)) ? (_A) : (_B), \
|
||||
VOID_0))
|
||||
|
||||
#define MIN3(x, y, z) \
|
||||
({ \
|
||||
const typeof(x) _c = MIN(x, y); \
|
||||
|
|
|
@ -423,7 +423,7 @@ static int save_external_coredump(
|
|||
goto fail;
|
||||
}
|
||||
|
||||
#if HAVE_XZ || HAVE_LZ4 || HAVE_ZSTD
|
||||
#if HAVE_COMPRESSION
|
||||
/* If we will remove the coredump anyway, do not compress. */
|
||||
if (arg_compress && !maybe_remove_external_coredump(NULL, st.st_size)) {
|
||||
|
||||
|
|
|
@ -824,7 +824,7 @@ static int save_core(sd_journal *j, FILE *file, char **path, bool *unlink_temp)
|
|||
}
|
||||
|
||||
if (filename) {
|
||||
#if HAVE_XZ || HAVE_LZ4 || HAVE_ZSTD
|
||||
#if HAVE_COMPRESSION
|
||||
_cleanup_close_ int fdf;
|
||||
|
||||
fdf = open(filename, O_RDONLY | O_CLOEXEC);
|
||||
|
@ -839,8 +839,8 @@ static int save_core(sd_journal *j, FILE *file, char **path, bool *unlink_temp)
|
|||
goto error;
|
||||
}
|
||||
#else
|
||||
log_error("Cannot decompress file. Compiled without compression support.");
|
||||
r = -EOPNOTSUPP;
|
||||
r = log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
|
||||
"Cannot decompress file. Compiled without compression support.");
|
||||
goto error;
|
||||
#endif
|
||||
} else {
|
||||
|
|
|
@ -57,8 +57,11 @@ static int zstd_ret_to_errno(size_t ret) {
|
|||
#define ALIGN_8(l) ALIGN_TO(l, sizeof(size_t))
|
||||
|
||||
static const char* const object_compressed_table[_OBJECT_COMPRESSED_MAX] = {
|
||||
[OBJECT_COMPRESSED_XZ] = "XZ",
|
||||
[OBJECT_COMPRESSED_LZ4] = "LZ4",
|
||||
[OBJECT_COMPRESSED_XZ] = "XZ",
|
||||
[OBJECT_COMPRESSED_LZ4] = "LZ4",
|
||||
[OBJECT_COMPRESSED_ZSTD] = "ZSTD",
|
||||
/* If we add too many more entries here, it's going to grow quite large (and be mostly sparse), since
|
||||
* the array key is actually a bitmask, not a plain enum */
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(object_compressed, int);
|
||||
|
@ -131,6 +134,29 @@ int compress_blob_lz4(const void *src, uint64_t src_size,
|
|||
#endif
|
||||
}
|
||||
|
||||
int compress_blob_zstd(
|
||||
const void *src, uint64_t src_size,
|
||||
void *dst, size_t dst_alloc_size, size_t *dst_size) {
|
||||
#if HAVE_ZSTD
|
||||
size_t k;
|
||||
|
||||
assert(src);
|
||||
assert(src_size > 0);
|
||||
assert(dst);
|
||||
assert(dst_alloc_size > 0);
|
||||
assert(dst_size);
|
||||
|
||||
k = ZSTD_compress(dst, dst_alloc_size, src, src_size, 0);
|
||||
if (ZSTD_isError(k))
|
||||
return zstd_ret_to_errno(k);
|
||||
|
||||
*dst_size = k;
|
||||
return 0;
|
||||
#else
|
||||
return -EPROTONOSUPPORT;
|
||||
#endif
|
||||
}
|
||||
|
||||
int decompress_blob_xz(const void *src, uint64_t src_size,
|
||||
void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max) {
|
||||
|
||||
|
@ -231,15 +257,74 @@ int decompress_blob_lz4(const void *src, uint64_t src_size,
|
|||
#endif
|
||||
}
|
||||
|
||||
int decompress_blob(int compression,
|
||||
const void *src, uint64_t src_size,
|
||||
void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max) {
|
||||
int decompress_blob_zstd(
|
||||
const void *src, uint64_t src_size,
|
||||
void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max) {
|
||||
|
||||
#if HAVE_ZSTD
|
||||
size_t space;
|
||||
|
||||
assert(src);
|
||||
assert(src_size > 0);
|
||||
assert(dst);
|
||||
assert(dst_alloc_size);
|
||||
assert(dst_size);
|
||||
assert(*dst_alloc_size == 0 || *dst);
|
||||
|
||||
if (src_size > SIZE_MAX/2) /* Overflow? */
|
||||
return -ENOBUFS;
|
||||
space = src_size * 2;
|
||||
if (dst_max > 0 && space > dst_max)
|
||||
space = dst_max;
|
||||
|
||||
if (!greedy_realloc(dst, dst_alloc_size, space, 1))
|
||||
return -ENOMEM;
|
||||
|
||||
for (;;) {
|
||||
size_t k;
|
||||
|
||||
k = ZSTD_decompress(*dst, *dst_alloc_size, src, src_size);
|
||||
if (!ZSTD_isError(k)) {
|
||||
*dst_size = k;
|
||||
return 0;
|
||||
}
|
||||
if (ZSTD_getErrorCode(k) != ZSTD_error_dstSize_tooSmall)
|
||||
return zstd_ret_to_errno(k);
|
||||
|
||||
if (dst_max > 0 && space >= dst_max) /* Already at max? */
|
||||
return -ENOBUFS;
|
||||
if (space > SIZE_MAX / 2) /* Overflow? */
|
||||
return -ENOBUFS;
|
||||
|
||||
space *= 2;
|
||||
if (dst_max > 0 && space > dst_max)
|
||||
space = dst_max;
|
||||
|
||||
if (!greedy_realloc(dst, dst_alloc_size, space, 1))
|
||||
return -ENOMEM;
|
||||
}
|
||||
#else
|
||||
return -EPROTONOSUPPORT;
|
||||
#endif
|
||||
}
|
||||
|
||||
int decompress_blob(
|
||||
int compression,
|
||||
const void *src, uint64_t src_size,
|
||||
void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max) {
|
||||
|
||||
if (compression == OBJECT_COMPRESSED_XZ)
|
||||
return decompress_blob_xz(src, src_size,
|
||||
dst, dst_alloc_size, dst_size, dst_max);
|
||||
return decompress_blob_xz(
|
||||
src, src_size,
|
||||
dst, dst_alloc_size, dst_size, dst_max);
|
||||
else if (compression == OBJECT_COMPRESSED_LZ4)
|
||||
return decompress_blob_lz4(src, src_size,
|
||||
dst, dst_alloc_size, dst_size, dst_max);
|
||||
return decompress_blob_lz4(
|
||||
src, src_size,
|
||||
dst, dst_alloc_size, dst_size, dst_max);
|
||||
else if (compression == OBJECT_COMPRESSED_ZSTD)
|
||||
return decompress_blob_zstd(
|
||||
src, src_size,
|
||||
dst, dst_alloc_size, dst_size, dst_max);
|
||||
else
|
||||
return -EBADMSG;
|
||||
}
|
||||
|
@ -365,21 +450,92 @@ int decompress_startswith_lz4(const void *src, uint64_t src_size,
|
|||
#endif
|
||||
}
|
||||
|
||||
int decompress_startswith(int compression,
|
||||
const void *src, uint64_t src_size,
|
||||
void **buffer, size_t *buffer_size,
|
||||
const void *prefix, size_t prefix_len,
|
||||
uint8_t extra) {
|
||||
int decompress_startswith_zstd(
|
||||
const void *src, uint64_t src_size,
|
||||
void **buffer, size_t *buffer_size,
|
||||
const void *prefix, size_t prefix_len,
|
||||
uint8_t extra) {
|
||||
#if HAVE_ZSTD
|
||||
_cleanup_(ZSTD_freeDCtxp) ZSTD_DCtx *dctx = NULL;
|
||||
size_t k;
|
||||
|
||||
assert(src);
|
||||
assert(src_size > 0);
|
||||
assert(buffer);
|
||||
assert(buffer_size);
|
||||
assert(prefix);
|
||||
assert(*buffer_size == 0 || *buffer);
|
||||
|
||||
dctx = ZSTD_createDCtx();
|
||||
if (!dctx)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!(greedy_realloc(buffer, buffer_size, MAX(ZSTD_DStreamOutSize(), prefix_len + 1), 1)))
|
||||
return -ENOMEM;
|
||||
|
||||
ZSTD_inBuffer input = {
|
||||
.src = src,
|
||||
.size = src_size,
|
||||
};
|
||||
ZSTD_outBuffer output = {
|
||||
.dst = *buffer,
|
||||
.size = *buffer_size,
|
||||
};
|
||||
|
||||
for (;;) {
|
||||
k = ZSTD_decompressStream(dctx, &output, &input);
|
||||
if (ZSTD_isError(k)) {
|
||||
log_debug("ZSTD decoder failed: %s", ZSTD_getErrorName(k));
|
||||
return zstd_ret_to_errno(k);
|
||||
}
|
||||
|
||||
if (output.pos >= prefix_len + 1)
|
||||
return memcmp(*buffer, prefix, prefix_len) == 0 &&
|
||||
((const uint8_t*) *buffer)[prefix_len] == extra;
|
||||
|
||||
if (input.pos >= input.size)
|
||||
return 0;
|
||||
|
||||
if (*buffer_size > SIZE_MAX/2)
|
||||
return -ENOBUFS;
|
||||
|
||||
if (!(greedy_realloc(buffer, buffer_size, *buffer_size * 2, 1)))
|
||||
return -ENOMEM;
|
||||
|
||||
output.dst = *buffer;
|
||||
output.size = *buffer_size;
|
||||
}
|
||||
#else
|
||||
return -EPROTONOSUPPORT;
|
||||
#endif
|
||||
}
|
||||
|
||||
int decompress_startswith(
|
||||
int compression,
|
||||
const void *src, uint64_t src_size,
|
||||
void **buffer, size_t *buffer_size,
|
||||
const void *prefix, size_t prefix_len,
|
||||
uint8_t extra) {
|
||||
|
||||
if (compression == OBJECT_COMPRESSED_XZ)
|
||||
return decompress_startswith_xz(src, src_size,
|
||||
buffer, buffer_size,
|
||||
prefix, prefix_len,
|
||||
extra);
|
||||
return decompress_startswith_xz(
|
||||
src, src_size,
|
||||
buffer, buffer_size,
|
||||
prefix, prefix_len,
|
||||
extra);
|
||||
|
||||
else if (compression == OBJECT_COMPRESSED_LZ4)
|
||||
return decompress_startswith_lz4(src, src_size,
|
||||
buffer, buffer_size,
|
||||
prefix, prefix_len,
|
||||
extra);
|
||||
return decompress_startswith_lz4(
|
||||
src, src_size,
|
||||
buffer, buffer_size,
|
||||
prefix, prefix_len,
|
||||
extra);
|
||||
else if (compression == OBJECT_COMPRESSED_ZSTD)
|
||||
return decompress_startswith_zstd(
|
||||
src, src_size,
|
||||
buffer, buffer_size,
|
||||
prefix, prefix_len,
|
||||
extra);
|
||||
else
|
||||
return -EBADMSG;
|
||||
}
|
||||
|
|
|
@ -12,18 +12,26 @@ int compress_blob_xz(const void *src, uint64_t src_size,
|
|||
void *dst, size_t dst_alloc_size, size_t *dst_size);
|
||||
int compress_blob_lz4(const void *src, uint64_t src_size,
|
||||
void *dst, size_t dst_alloc_size, size_t *dst_size);
|
||||
int compress_blob_zstd(const void *src, uint64_t src_size,
|
||||
void *dst, size_t dst_alloc_size, size_t *dst_size);
|
||||
|
||||
static inline int compress_blob(const void *src, uint64_t src_size,
|
||||
void *dst, size_t dst_alloc_size, size_t *dst_size) {
|
||||
int r;
|
||||
#if HAVE_LZ4
|
||||
#if HAVE_ZSTD
|
||||
r = compress_blob_zstd(src, src_size, dst, dst_alloc_size, dst_size);
|
||||
if (r == 0)
|
||||
return OBJECT_COMPRESSED_ZSTD;
|
||||
#elif HAVE_LZ4
|
||||
r = compress_blob_lz4(src, src_size, dst, dst_alloc_size, dst_size);
|
||||
if (r == 0)
|
||||
return OBJECT_COMPRESSED_LZ4;
|
||||
#else
|
||||
#elif HAVE_XZ
|
||||
r = compress_blob_xz(src, src_size, dst, dst_alloc_size, dst_size);
|
||||
if (r == 0)
|
||||
return OBJECT_COMPRESSED_XZ;
|
||||
#else
|
||||
r = -EOPNOTSUPP;
|
||||
#endif
|
||||
return r;
|
||||
}
|
||||
|
@ -32,6 +40,8 @@ int decompress_blob_xz(const void *src, uint64_t src_size,
|
|||
void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
|
||||
int decompress_blob_lz4(const void *src, uint64_t src_size,
|
||||
void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
|
||||
int decompress_blob_zstd(const void *src, uint64_t src_size,
|
||||
void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
|
||||
int decompress_blob(int compression,
|
||||
const void *src, uint64_t src_size,
|
||||
void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
|
||||
|
@ -44,6 +54,10 @@ int decompress_startswith_lz4(const void *src, uint64_t src_size,
|
|||
void **buffer, size_t *buffer_size,
|
||||
const void *prefix, size_t prefix_len,
|
||||
uint8_t extra);
|
||||
int decompress_startswith_zstd(const void *src, uint64_t src_size,
|
||||
void **buffer, size_t *buffer_size,
|
||||
const void *prefix, size_t prefix_len,
|
||||
uint8_t extra);
|
||||
int decompress_startswith(int compression,
|
||||
const void *src, uint64_t src_size,
|
||||
void **buffer, size_t *buffer_size,
|
||||
|
@ -64,9 +78,14 @@ int decompress_stream_zstd(int fdf, int fdt, uint64_t max_size);
|
|||
#elif HAVE_LZ4
|
||||
# define compress_stream compress_stream_lz4
|
||||
# define COMPRESSED_EXT ".lz4"
|
||||
#else
|
||||
#elif HAVE_XZ
|
||||
# define compress_stream compress_stream_xz
|
||||
# define COMPRESSED_EXT ".xz"
|
||||
#else
|
||||
static inline int compress_stream(int fdf, int fdt, uint64_t max_size) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
# define COMPRESSED_EXT ""
|
||||
#endif
|
||||
|
||||
int decompress_stream(const char *filename, int fdf, int fdt, uint64_t max_bytes);
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
/*
|
||||
* If you change this file you probably should also change its documentation:
|
||||
*
|
||||
* http://www.freedesktop.org/wiki/Software/systemd/journal-files
|
||||
* https://systemd.io/JOURNAL_FILE_FORMAT
|
||||
*/
|
||||
|
||||
typedef struct Header Header;
|
||||
|
@ -44,13 +44,13 @@ typedef enum ObjectType {
|
|||
|
||||
/* Object flags */
|
||||
enum {
|
||||
OBJECT_COMPRESSED_XZ = 1 << 0,
|
||||
OBJECT_COMPRESSED_LZ4 = 1 << 1,
|
||||
_OBJECT_COMPRESSED_MAX
|
||||
OBJECT_COMPRESSED_XZ = 1 << 0,
|
||||
OBJECT_COMPRESSED_LZ4 = 1 << 1,
|
||||
OBJECT_COMPRESSED_ZSTD = 1 << 2,
|
||||
OBJECT_COMPRESSION_MASK = (OBJECT_COMPRESSED_XZ | OBJECT_COMPRESSED_LZ4 | OBJECT_COMPRESSED_ZSTD),
|
||||
_OBJECT_COMPRESSED_MAX = OBJECT_COMPRESSION_MASK,
|
||||
};
|
||||
|
||||
#define OBJECT_COMPRESSION_MASK (OBJECT_COMPRESSED_XZ | OBJECT_COMPRESSED_LZ4)
|
||||
|
||||
struct ObjectHeader {
|
||||
uint8_t type;
|
||||
uint8_t flags;
|
||||
|
@ -74,13 +74,17 @@ struct DataObject DataObject__contents;
|
|||
struct DataObject__packed DataObject__contents _packed_;
|
||||
assert_cc(sizeof(struct DataObject) == sizeof(struct DataObject__packed));
|
||||
|
||||
struct FieldObject {
|
||||
ObjectHeader object;
|
||||
le64_t hash;
|
||||
le64_t next_hash_offset;
|
||||
le64_t head_data_offset;
|
||||
uint8_t payload[];
|
||||
} _packed_;
|
||||
#define FieldObject__contents { \
|
||||
ObjectHeader object; \
|
||||
le64_t hash; \
|
||||
le64_t next_hash_offset; \
|
||||
le64_t head_data_offset; \
|
||||
uint8_t payload[]; \
|
||||
}
|
||||
|
||||
struct FieldObject FieldObject__contents;
|
||||
struct FieldObject__packed FieldObject__contents _packed_;
|
||||
assert_cc(sizeof(struct FieldObject) == sizeof(struct FieldObject__packed));
|
||||
|
||||
struct EntryItem {
|
||||
le64_t object_offset;
|
||||
|
@ -145,24 +149,38 @@ enum {
|
|||
|
||||
/* Header flags */
|
||||
enum {
|
||||
HEADER_INCOMPATIBLE_COMPRESSED_XZ = 1 << 0,
|
||||
HEADER_INCOMPATIBLE_COMPRESSED_LZ4 = 1 << 1,
|
||||
HEADER_INCOMPATIBLE_COMPRESSED_XZ = 1 << 0,
|
||||
HEADER_INCOMPATIBLE_COMPRESSED_LZ4 = 1 << 1,
|
||||
HEADER_INCOMPATIBLE_KEYED_HASH = 1 << 2,
|
||||
HEADER_INCOMPATIBLE_COMPRESSED_ZSTD = 1 << 3,
|
||||
};
|
||||
|
||||
#define HEADER_INCOMPATIBLE_ANY (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
|
||||
#define HEADER_INCOMPATIBLE_ANY \
|
||||
(HEADER_INCOMPATIBLE_COMPRESSED_XZ | \
|
||||
HEADER_INCOMPATIBLE_COMPRESSED_LZ4 | \
|
||||
HEADER_INCOMPATIBLE_KEYED_HASH | \
|
||||
HEADER_INCOMPATIBLE_COMPRESSED_ZSTD)
|
||||
|
||||
#if HAVE_XZ && HAVE_LZ4
|
||||
#if HAVE_XZ && HAVE_LZ4 && HAVE_ZSTD
|
||||
# define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_ANY
|
||||
#elif HAVE_XZ && HAVE_LZ4
|
||||
# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_COMPRESSED_LZ4|HEADER_INCOMPATIBLE_KEYED_HASH)
|
||||
#elif HAVE_XZ && HAVE_ZSTD
|
||||
# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_COMPRESSED_ZSTD|HEADER_INCOMPATIBLE_KEYED_HASH)
|
||||
#elif HAVE_LZ4 && HAVE_ZSTD
|
||||
# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_LZ4|HEADER_INCOMPATIBLE_COMPRESSED_ZSTD|HEADER_INCOMPATIBLE_KEYED_HASH)
|
||||
#elif HAVE_XZ
|
||||
# define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_COMPRESSED_XZ
|
||||
# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_KEYED_HASH)
|
||||
#elif HAVE_LZ4
|
||||
# define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_COMPRESSED_LZ4
|
||||
# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_LZ4|HEADER_INCOMPATIBLE_KEYED_HASH)
|
||||
#elif HAVE_ZSTD
|
||||
# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_ZSTD|HEADER_INCOMPATIBLE_KEYED_HASH)
|
||||
#else
|
||||
# define HEADER_INCOMPATIBLE_SUPPORTED 0
|
||||
# define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_KEYED_HASH
|
||||
#endif
|
||||
|
||||
enum {
|
||||
HEADER_COMPATIBLE_SEALED = 1
|
||||
HEADER_COMPATIBLE_SEALED = 1 << 0,
|
||||
};
|
||||
|
||||
#define HEADER_COMPATIBLE_ANY HEADER_COMPATIBLE_SEALED
|
||||
|
@ -172,7 +190,8 @@ enum {
|
|||
# define HEADER_COMPATIBLE_SUPPORTED 0
|
||||
#endif
|
||||
|
||||
#define HEADER_SIGNATURE ((char[]) { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' })
|
||||
#define HEADER_SIGNATURE \
|
||||
((const char[]) { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' })
|
||||
|
||||
#define struct_Header__contents { \
|
||||
uint8_t signature[8]; /* "LPKSHHRH" */ \
|
||||
|
@ -205,14 +224,18 @@ enum {
|
|||
/* Added in 189 */ \
|
||||
le64_t n_tags; \
|
||||
le64_t n_entry_arrays; \
|
||||
/* Added in 246 */ \
|
||||
le64_t data_hash_chain_depth; \
|
||||
le64_t field_hash_chain_depth; \
|
||||
}
|
||||
|
||||
struct Header struct_Header__contents;
|
||||
struct Header__packed struct_Header__contents _packed_;
|
||||
assert_cc(sizeof(struct Header) == sizeof(struct Header__packed));
|
||||
assert_cc(sizeof(struct Header) == 240);
|
||||
assert_cc(sizeof(struct Header) == 256);
|
||||
|
||||
#define FSS_HEADER_SIGNATURE ((char[]) { 'K', 'S', 'H', 'H', 'R', 'H', 'L', 'P' })
|
||||
#define FSS_HEADER_SIGNATURE \
|
||||
((const char[]) { 'K', 'S', 'H', 'H', 'R', 'H', 'L', 'P' })
|
||||
|
||||
struct FSSHeader {
|
||||
uint8_t signature[8]; /* "KSHHRHLP" */
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "btrfs-util.h"
|
||||
#include "chattr-util.h"
|
||||
#include "compress.h"
|
||||
#include "env-util.h"
|
||||
#include "fd-util.h"
|
||||
#include "format-util.h"
|
||||
#include "fs-util.h"
|
||||
|
@ -81,6 +82,9 @@
|
|||
/* The mmap context to use for the header we pick as one above the last defined typed */
|
||||
#define CONTEXT_HEADER _OBJECT_TYPE_MAX
|
||||
|
||||
/* Longest hash chain to rotate after */
|
||||
#define HASH_CHAIN_DEPTH_MAX 100
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma GCC diagnostic ignored "-Waddress-of-packed-member"
|
||||
#endif
|
||||
|
@ -388,7 +392,7 @@ JournalFile* journal_file_close(JournalFile *f) {
|
|||
|
||||
ordered_hashmap_free_free(f->chain_cache);
|
||||
|
||||
#if HAVE_XZ || HAVE_LZ4
|
||||
#if HAVE_COMPRESSION
|
||||
free(f->compress_buffer);
|
||||
#endif
|
||||
|
||||
|
@ -419,7 +423,9 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
|
|||
|
||||
h.incompatible_flags |= htole32(
|
||||
f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
|
||||
f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
|
||||
f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4 |
|
||||
f->compress_zstd * HEADER_INCOMPATIBLE_COMPRESSED_ZSTD |
|
||||
f->keyed_hash * HEADER_INCOMPATIBLE_KEYED_HASH);
|
||||
|
||||
h.compatible_flags = htole32(
|
||||
f->seal * HEADER_COMPATIBLE_SEALED);
|
||||
|
@ -445,7 +451,6 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
|
|||
}
|
||||
|
||||
static int journal_file_refresh_header(JournalFile *f) {
|
||||
sd_id128_t boot_id;
|
||||
int r;
|
||||
|
||||
assert(f);
|
||||
|
@ -458,12 +463,10 @@ static int journal_file_refresh_header(JournalFile *f) {
|
|||
else if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_id128_get_boot(&boot_id);
|
||||
r = sd_id128_get_boot(&f->header->boot_id);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
f->header->boot_id = boot_id;
|
||||
|
||||
r = journal_file_set_online(f);
|
||||
|
||||
/* Sync the online state to disk */
|
||||
|
@ -489,16 +492,23 @@ static bool warn_wrong_flags(const JournalFile *f, bool compatible) {
|
|||
f->path, type, flags & ~any);
|
||||
flags = (flags & any) & ~supported;
|
||||
if (flags) {
|
||||
const char* strv[3];
|
||||
const char* strv[5];
|
||||
unsigned n = 0;
|
||||
_cleanup_free_ char *t = NULL;
|
||||
|
||||
if (compatible && (flags & HEADER_COMPATIBLE_SEALED))
|
||||
strv[n++] = "sealed";
|
||||
if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ))
|
||||
strv[n++] = "xz-compressed";
|
||||
if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4))
|
||||
strv[n++] = "lz4-compressed";
|
||||
if (compatible) {
|
||||
if (flags & HEADER_COMPATIBLE_SEALED)
|
||||
strv[n++] = "sealed";
|
||||
} else {
|
||||
if (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ)
|
||||
strv[n++] = "xz-compressed";
|
||||
if (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
|
||||
strv[n++] = "lz4-compressed";
|
||||
if (flags & HEADER_INCOMPATIBLE_COMPRESSED_ZSTD)
|
||||
strv[n++] = "zstd-compressed";
|
||||
if (flags & HEADER_INCOMPATIBLE_KEYED_HASH)
|
||||
strv[n++] = "keyed-hash";
|
||||
}
|
||||
strv[n] = NULL;
|
||||
assert(n < ELEMENTSOF(strv));
|
||||
|
||||
|
@ -595,9 +605,12 @@ static int journal_file_verify_header(JournalFile *f) {
|
|||
|
||||
f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
|
||||
f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
|
||||
f->compress_zstd = JOURNAL_HEADER_COMPRESSED_ZSTD(f->header);
|
||||
|
||||
f->seal = JOURNAL_HEADER_SEALED(f->header);
|
||||
|
||||
f->keyed_hash = JOURNAL_HEADER_KEYED_HASH(f->header);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1003,7 +1016,13 @@ static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
|
|||
return r;
|
||||
}
|
||||
|
||||
int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret, uint64_t *offset) {
|
||||
int journal_file_append_object(
|
||||
JournalFile *f,
|
||||
ObjectType type,
|
||||
uint64_t size,
|
||||
Object **ret,
|
||||
uint64_t *ret_offset) {
|
||||
|
||||
int r;
|
||||
uint64_t p;
|
||||
Object *tail, *o;
|
||||
|
@ -1013,8 +1032,6 @@ int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, O
|
|||
assert(f->header);
|
||||
assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX);
|
||||
assert(size >= sizeof(ObjectHeader));
|
||||
assert(offset);
|
||||
assert(ret);
|
||||
|
||||
r = journal_file_set_online(f);
|
||||
if (r < 0)
|
||||
|
@ -1058,8 +1075,11 @@ int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, O
|
|||
f->header->tail_object_offset = htole64(p);
|
||||
f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
|
||||
|
||||
*ret = o;
|
||||
*offset = p;
|
||||
if (ret)
|
||||
*ret = o;
|
||||
|
||||
if (ret_offset)
|
||||
*ret_offset = p;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1081,7 +1101,7 @@ static int journal_file_setup_data_hash_table(JournalFile *f) {
|
|||
if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
|
||||
s = DEFAULT_DATA_HASH_TABLE_SIZE;
|
||||
|
||||
log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
|
||||
log_debug("Reserving %"PRIu64" entries in data hash table.", s / sizeof(HashItem));
|
||||
|
||||
r = journal_file_append_object(f,
|
||||
OBJECT_DATA_HASH_TABLE,
|
||||
|
@ -1110,6 +1130,8 @@ static int journal_file_setup_field_hash_table(JournalFile *f) {
|
|||
* number should grow very slowly only */
|
||||
|
||||
s = DEFAULT_FIELD_HASH_TABLE_SIZE;
|
||||
log_debug("Reserving %"PRIu64" entries in field hash table.", s / sizeof(HashItem));
|
||||
|
||||
r = journal_file_append_object(f,
|
||||
OBJECT_FIELD_HASH_TABLE,
|
||||
offsetof(Object, hash_table.items) + s,
|
||||
|
@ -1273,12 +1295,38 @@ static int journal_file_link_data(
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int next_hash_offset(
|
||||
JournalFile *f,
|
||||
uint64_t *p,
|
||||
le64_t *next_hash_offset,
|
||||
uint64_t *depth,
|
||||
le64_t *header_max_depth) {
|
||||
|
||||
uint64_t nextp;
|
||||
|
||||
nextp = le64toh(READ_NOW(*next_hash_offset));
|
||||
if (nextp > 0) {
|
||||
if (nextp <= *p) /* Refuse going in loops */
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
|
||||
"Detected hash item loop in %s, refusing.", f->path);
|
||||
|
||||
(*depth)++;
|
||||
|
||||
/* If the depth of this hash chain is larger than all others we have seen so far, record it */
|
||||
if (header_max_depth && f->writable)
|
||||
*header_max_depth = htole64(MAX(*depth, le64toh(*header_max_depth)));
|
||||
}
|
||||
|
||||
*p = nextp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int journal_file_find_field_object_with_hash(
|
||||
JournalFile *f,
|
||||
const void *field, uint64_t size, uint64_t hash,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
uint64_t p, osize, h, m;
|
||||
uint64_t p, osize, h, m, depth = 0;
|
||||
int r;
|
||||
|
||||
assert(f);
|
||||
|
@ -1302,7 +1350,6 @@ int journal_file_find_field_object_with_hash(
|
|||
|
||||
h = hash % m;
|
||||
p = le64toh(f->field_hash_table[h].head_hash_offset);
|
||||
|
||||
while (p > 0) {
|
||||
Object *o;
|
||||
|
||||
|
@ -1316,41 +1363,63 @@ int journal_file_find_field_object_with_hash(
|
|||
|
||||
if (ret)
|
||||
*ret = o;
|
||||
if (offset)
|
||||
*offset = p;
|
||||
if (ret_offset)
|
||||
*ret_offset = p;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
p = le64toh(o->field.next_hash_offset);
|
||||
r = next_hash_offset(
|
||||
f,
|
||||
&p,
|
||||
&o->field.next_hash_offset,
|
||||
&depth,
|
||||
JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) ? &f->header->field_hash_chain_depth : NULL);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t journal_file_hash_data(
|
||||
JournalFile *f,
|
||||
const void *data,
|
||||
size_t sz) {
|
||||
|
||||
assert(f);
|
||||
assert(data || sz == 0);
|
||||
|
||||
/* We try to unify our codebase on siphash, hence new-styled journal files utilizing the keyed hash
|
||||
* function use siphash. Old journal files use the Jenkins hash. */
|
||||
|
||||
if (JOURNAL_HEADER_KEYED_HASH(f->header))
|
||||
return siphash24(data, sz, f->header->file_id.bytes);
|
||||
|
||||
return jenkins_hash64(data, sz);
|
||||
}
|
||||
|
||||
int journal_file_find_field_object(
|
||||
JournalFile *f,
|
||||
const void *field, uint64_t size,
|
||||
Object **ret, uint64_t *offset) {
|
||||
|
||||
uint64_t hash;
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
assert(f);
|
||||
assert(field && size > 0);
|
||||
|
||||
hash = hash64(field, size);
|
||||
|
||||
return journal_file_find_field_object_with_hash(f,
|
||||
field, size, hash,
|
||||
ret, offset);
|
||||
return journal_file_find_field_object_with_hash(
|
||||
f,
|
||||
field, size,
|
||||
journal_file_hash_data(f, field, size),
|
||||
ret, ret_offset);
|
||||
}
|
||||
|
||||
int journal_file_find_data_object_with_hash(
|
||||
JournalFile *f,
|
||||
const void *data, uint64_t size, uint64_t hash,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
uint64_t p, osize, h, m;
|
||||
uint64_t p, osize, h, m, depth = 0;
|
||||
int r;
|
||||
|
||||
assert(f);
|
||||
|
@ -1386,7 +1455,7 @@ int journal_file_find_data_object_with_hash(
|
|||
goto next;
|
||||
|
||||
if (o->object.flags & OBJECT_COMPRESSION_MASK) {
|
||||
#if HAVE_XZ || HAVE_LZ4
|
||||
#if HAVE_COMPRESSION
|
||||
uint64_t l;
|
||||
size_t rsize = 0;
|
||||
|
||||
|
@ -1407,8 +1476,8 @@ int journal_file_find_data_object_with_hash(
|
|||
if (ret)
|
||||
*ret = o;
|
||||
|
||||
if (offset)
|
||||
*offset = p;
|
||||
if (ret_offset)
|
||||
*ret_offset = p;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -1421,14 +1490,21 @@ int journal_file_find_data_object_with_hash(
|
|||
if (ret)
|
||||
*ret = o;
|
||||
|
||||
if (offset)
|
||||
*offset = p;
|
||||
if (ret_offset)
|
||||
*ret_offset = p;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
next:
|
||||
p = le64toh(o->data.next_hash_offset);
|
||||
r = next_hash_offset(
|
||||
f,
|
||||
&p,
|
||||
&o->data.next_hash_offset,
|
||||
&depth,
|
||||
JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) ? &f->header->data_hash_chain_depth : NULL);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1437,24 +1513,22 @@ int journal_file_find_data_object_with_hash(
|
|||
int journal_file_find_data_object(
|
||||
JournalFile *f,
|
||||
const void *data, uint64_t size,
|
||||
Object **ret, uint64_t *offset) {
|
||||
|
||||
uint64_t hash;
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
assert(f);
|
||||
assert(data || size == 0);
|
||||
|
||||
hash = hash64(data, size);
|
||||
|
||||
return journal_file_find_data_object_with_hash(f,
|
||||
data, size, hash,
|
||||
ret, offset);
|
||||
return journal_file_find_data_object_with_hash(
|
||||
f,
|
||||
data, size,
|
||||
journal_file_hash_data(f, data, size),
|
||||
ret, ret_offset);
|
||||
}
|
||||
|
||||
static int journal_file_append_field(
|
||||
JournalFile *f,
|
||||
const void *field, uint64_t size,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
uint64_t hash, p;
|
||||
uint64_t osize;
|
||||
|
@ -1464,7 +1538,7 @@ static int journal_file_append_field(
|
|||
assert(f);
|
||||
assert(field && size > 0);
|
||||
|
||||
hash = hash64(field, size);
|
||||
hash = journal_file_hash_data(f, field, size);
|
||||
|
||||
r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
|
||||
if (r < 0)
|
||||
|
@ -1474,8 +1548,8 @@ static int journal_file_append_field(
|
|||
if (ret)
|
||||
*ret = o;
|
||||
|
||||
if (offset)
|
||||
*offset = p;
|
||||
if (ret_offset)
|
||||
*ret_offset = p;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1507,8 +1581,8 @@ static int journal_file_append_field(
|
|||
if (ret)
|
||||
*ret = o;
|
||||
|
||||
if (offset)
|
||||
*offset = p;
|
||||
if (ret_offset)
|
||||
*ret_offset = p;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1516,7 +1590,7 @@ static int journal_file_append_field(
|
|||
static int journal_file_append_data(
|
||||
JournalFile *f,
|
||||
const void *data, uint64_t size,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
uint64_t hash, p;
|
||||
uint64_t osize;
|
||||
|
@ -1527,7 +1601,7 @@ static int journal_file_append_data(
|
|||
assert(f);
|
||||
assert(data || size == 0);
|
||||
|
||||
hash = hash64(data, size);
|
||||
hash = journal_file_hash_data(f, data, size);
|
||||
|
||||
r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
|
||||
if (r < 0)
|
||||
|
@ -1537,8 +1611,8 @@ static int journal_file_append_data(
|
|||
if (ret)
|
||||
*ret = o;
|
||||
|
||||
if (offset)
|
||||
*offset = p;
|
||||
if (ret_offset)
|
||||
*ret_offset = p;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1550,7 +1624,7 @@ static int journal_file_append_data(
|
|||
|
||||
o->data.hash = htole64(hash);
|
||||
|
||||
#if HAVE_XZ || HAVE_LZ4
|
||||
#if HAVE_COMPRESSION
|
||||
if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) {
|
||||
size_t rsize = 0;
|
||||
|
||||
|
@ -1608,8 +1682,8 @@ static int journal_file_append_data(
|
|||
if (ret)
|
||||
*ret = o;
|
||||
|
||||
if (offset)
|
||||
*offset = p;
|
||||
if (ret_offset)
|
||||
*ret_offset = p;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1833,7 +1907,7 @@ static int journal_file_append_entry_internal(
|
|||
uint64_t xor_hash,
|
||||
const EntryItem items[], unsigned n_items,
|
||||
uint64_t *seqnum,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
uint64_t np;
|
||||
uint64_t osize;
|
||||
Object *o;
|
||||
|
@ -1872,8 +1946,8 @@ static int journal_file_append_entry_internal(
|
|||
if (ret)
|
||||
*ret = o;
|
||||
|
||||
if (offset)
|
||||
*offset = np;
|
||||
if (ret_offset)
|
||||
*ret_offset = np;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1977,7 +2051,7 @@ int journal_file_append_entry(
|
|||
const sd_id128_t *boot_id,
|
||||
const struct iovec iovec[], unsigned n_iovec,
|
||||
uint64_t *seqnum,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
unsigned i;
|
||||
EntryItem *items;
|
||||
|
@ -2020,7 +2094,20 @@ int journal_file_append_entry(
|
|||
if (r < 0)
|
||||
return r;
|
||||
|
||||
xor_hash ^= le64toh(o->data.hash);
|
||||
/* When calculating the XOR hash field, we need to take special care if the "keyed-hash"
|
||||
* journal file flag is on. We use the XOR hash field to quickly determine the identity of a
|
||||
* specific record, and give records with otherwise identical position (i.e. match in seqno,
|
||||
* timestamp, …) a stable ordering. But for that we can't have it that the hash of the
|
||||
* objects in each file is different since they are keyed. Hence let's calculate the Jenkins
|
||||
* hash here for that. This also has the benefit that cursors for old and new journal files
|
||||
* are completely identical (they include the XOR hash after all). For classic Jenkins-hash
|
||||
* files things are easier, we can just take the value from the stored record directly. */
|
||||
|
||||
if (JOURNAL_HEADER_KEYED_HASH(f->header))
|
||||
xor_hash ^= jenkins_hash64(iovec[i].iov_base, iovec[i].iov_len);
|
||||
else
|
||||
xor_hash ^= le64toh(o->data.hash);
|
||||
|
||||
items[i].object_offset = htole64(p);
|
||||
items[i].hash = o->data.hash;
|
||||
}
|
||||
|
@ -2029,7 +2116,7 @@ int journal_file_append_entry(
|
|||
* times for rotating media. */
|
||||
typesafe_qsort(items, n_iovec, entry_item_cmp);
|
||||
|
||||
r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, ret, offset);
|
||||
r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, ret, ret_offset);
|
||||
|
||||
/* If the memory mapping triggered a SIGBUS then we return an
|
||||
* IO error and ignore the error code passed down to us, since
|
||||
|
@ -2098,7 +2185,7 @@ static int generic_array_get(
|
|||
JournalFile *f,
|
||||
uint64_t first,
|
||||
uint64_t i,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
Object *o;
|
||||
uint64_t p = 0, a, t = 0;
|
||||
|
@ -2148,8 +2235,8 @@ found:
|
|||
if (ret)
|
||||
*ret = o;
|
||||
|
||||
if (offset)
|
||||
*offset = p;
|
||||
if (ret_offset)
|
||||
*ret_offset = p;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -2159,7 +2246,7 @@ static int generic_array_get_plus_one(
|
|||
uint64_t extra,
|
||||
uint64_t first,
|
||||
uint64_t i,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
Object *o;
|
||||
|
||||
|
@ -2175,13 +2262,13 @@ static int generic_array_get_plus_one(
|
|||
if (ret)
|
||||
*ret = o;
|
||||
|
||||
if (offset)
|
||||
*offset = extra;
|
||||
if (ret_offset)
|
||||
*ret_offset = extra;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return generic_array_get(f, first, i-1, ret, offset);
|
||||
return generic_array_get(f, first, i-1, ret, ret_offset);
|
||||
}
|
||||
|
||||
enum {
|
||||
|
@ -2198,8 +2285,8 @@ static int generic_array_bisect(
|
|||
int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
|
||||
direction_t direction,
|
||||
Object **ret,
|
||||
uint64_t *offset,
|
||||
uint64_t *idx) {
|
||||
uint64_t *ret_offset,
|
||||
uint64_t *ret_idx) {
|
||||
|
||||
uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
|
||||
bool subtract_one = false;
|
||||
|
@ -2398,11 +2485,11 @@ found:
|
|||
if (ret)
|
||||
*ret = o;
|
||||
|
||||
if (offset)
|
||||
*offset = p;
|
||||
if (ret_offset)
|
||||
*ret_offset = p;
|
||||
|
||||
if (idx)
|
||||
*idx = t + i + (subtract_one ? -1 : 0);
|
||||
if (ret_idx)
|
||||
*ret_idx = t + i + (subtract_one ? -1 : 0);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -2416,8 +2503,8 @@ static int generic_array_bisect_plus_one(
|
|||
int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
|
||||
direction_t direction,
|
||||
Object **ret,
|
||||
uint64_t *offset,
|
||||
uint64_t *idx) {
|
||||
uint64_t *ret_offset,
|
||||
uint64_t *ret_idx) {
|
||||
|
||||
int r;
|
||||
bool step_back = false;
|
||||
|
@ -2453,13 +2540,13 @@ static int generic_array_bisect_plus_one(
|
|||
return 0;
|
||||
}
|
||||
|
||||
r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
|
||||
r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, ret_offset, ret_idx);
|
||||
|
||||
if (r == 0 && step_back)
|
||||
goto found;
|
||||
|
||||
if (r > 0 && idx)
|
||||
(*idx)++;
|
||||
if (r > 0 && ret_idx)
|
||||
(*ret_idx)++;
|
||||
|
||||
return r;
|
||||
|
||||
|
@ -2471,11 +2558,11 @@ found:
|
|||
if (ret)
|
||||
*ret = o;
|
||||
|
||||
if (offset)
|
||||
*offset = extra;
|
||||
if (ret_offset)
|
||||
*ret_offset = extra;
|
||||
|
||||
if (idx)
|
||||
*idx = 0;
|
||||
if (ret_idx)
|
||||
*ret_idx = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -2518,17 +2605,18 @@ int journal_file_move_to_entry_by_seqnum(
|
|||
uint64_t seqnum,
|
||||
direction_t direction,
|
||||
Object **ret,
|
||||
uint64_t *offset) {
|
||||
uint64_t *ret_offset) {
|
||||
assert(f);
|
||||
assert(f->header);
|
||||
|
||||
return generic_array_bisect(f,
|
||||
le64toh(f->header->entry_array_offset),
|
||||
le64toh(f->header->n_entries),
|
||||
seqnum,
|
||||
test_object_seqnum,
|
||||
direction,
|
||||
ret, offset, NULL);
|
||||
return generic_array_bisect(
|
||||
f,
|
||||
le64toh(f->header->entry_array_offset),
|
||||
le64toh(f->header->n_entries),
|
||||
seqnum,
|
||||
test_object_seqnum,
|
||||
direction,
|
||||
ret, ret_offset, NULL);
|
||||
}
|
||||
|
||||
static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
|
||||
|
@ -2557,17 +2645,18 @@ int journal_file_move_to_entry_by_realtime(
|
|||
uint64_t realtime,
|
||||
direction_t direction,
|
||||
Object **ret,
|
||||
uint64_t *offset) {
|
||||
uint64_t *ret_offset) {
|
||||
assert(f);
|
||||
assert(f->header);
|
||||
|
||||
return generic_array_bisect(f,
|
||||
le64toh(f->header->entry_array_offset),
|
||||
le64toh(f->header->n_entries),
|
||||
realtime,
|
||||
test_object_realtime,
|
||||
direction,
|
||||
ret, offset, NULL);
|
||||
return generic_array_bisect(
|
||||
f,
|
||||
le64toh(f->header->entry_array_offset),
|
||||
le64toh(f->header->n_entries),
|
||||
realtime,
|
||||
test_object_realtime,
|
||||
direction,
|
||||
ret, ret_offset, NULL);
|
||||
}
|
||||
|
||||
static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
|
||||
|
@ -2609,7 +2698,7 @@ int journal_file_move_to_entry_by_monotonic(
|
|||
uint64_t monotonic,
|
||||
direction_t direction,
|
||||
Object **ret,
|
||||
uint64_t *offset) {
|
||||
uint64_t *ret_offset) {
|
||||
|
||||
Object *o;
|
||||
int r;
|
||||
|
@ -2622,14 +2711,15 @@ int journal_file_move_to_entry_by_monotonic(
|
|||
if (r == 0)
|
||||
return -ENOENT;
|
||||
|
||||
return generic_array_bisect_plus_one(f,
|
||||
le64toh(o->data.entry_offset),
|
||||
le64toh(o->data.entry_array_offset),
|
||||
le64toh(o->data.n_entries),
|
||||
monotonic,
|
||||
test_object_monotonic,
|
||||
direction,
|
||||
ret, offset, NULL);
|
||||
return generic_array_bisect_plus_one(
|
||||
f,
|
||||
le64toh(o->data.entry_offset),
|
||||
le64toh(o->data.entry_array_offset),
|
||||
le64toh(o->data.n_entries),
|
||||
monotonic,
|
||||
test_object_monotonic,
|
||||
direction,
|
||||
ret, ret_offset, NULL);
|
||||
}
|
||||
|
||||
void journal_file_reset_location(JournalFile *f) {
|
||||
|
@ -2735,7 +2825,7 @@ int journal_file_next_entry(
|
|||
JournalFile *f,
|
||||
uint64_t p,
|
||||
direction_t direction,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
uint64_t i, n, ofs;
|
||||
int r;
|
||||
|
@ -2792,8 +2882,8 @@ int journal_file_next_entry(
|
|||
"%s: entry array not properly ordered at entry %" PRIu64,
|
||||
f->path, i);
|
||||
|
||||
if (offset)
|
||||
*offset = ofs;
|
||||
if (ret_offset)
|
||||
*ret_offset = ofs;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -2803,7 +2893,7 @@ int journal_file_next_entry_for_data(
|
|||
Object *o, uint64_t p,
|
||||
uint64_t data_offset,
|
||||
direction_t direction,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
uint64_t i, n, ofs;
|
||||
Object *d;
|
||||
|
@ -2868,8 +2958,8 @@ int journal_file_next_entry_for_data(
|
|||
"%s data entry array not properly ordered at entry %" PRIu64,
|
||||
f->path, i);
|
||||
|
||||
if (offset)
|
||||
*offset = ofs;
|
||||
if (ret_offset)
|
||||
*ret_offset = ofs;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -2879,7 +2969,7 @@ int journal_file_move_to_entry_by_offset_for_data(
|
|||
uint64_t data_offset,
|
||||
uint64_t p,
|
||||
direction_t direction,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
int r;
|
||||
Object *d;
|
||||
|
@ -2890,14 +2980,15 @@ int journal_file_move_to_entry_by_offset_for_data(
|
|||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return generic_array_bisect_plus_one(f,
|
||||
le64toh(d->data.entry_offset),
|
||||
le64toh(d->data.entry_array_offset),
|
||||
le64toh(d->data.n_entries),
|
||||
p,
|
||||
test_object_offset,
|
||||
direction,
|
||||
ret, offset, NULL);
|
||||
return generic_array_bisect_plus_one(
|
||||
f,
|
||||
le64toh(d->data.entry_offset),
|
||||
le64toh(d->data.entry_array_offset),
|
||||
le64toh(d->data.n_entries),
|
||||
p,
|
||||
test_object_offset,
|
||||
direction,
|
||||
ret, ret_offset, NULL);
|
||||
}
|
||||
|
||||
int journal_file_move_to_entry_by_monotonic_for_data(
|
||||
|
@ -2906,7 +2997,7 @@ int journal_file_move_to_entry_by_monotonic_for_data(
|
|||
sd_id128_t boot_id,
|
||||
uint64_t monotonic,
|
||||
direction_t direction,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
Object *o, *d;
|
||||
int r;
|
||||
|
@ -2973,8 +3064,8 @@ int journal_file_move_to_entry_by_monotonic_for_data(
|
|||
if (p == q) {
|
||||
if (ret)
|
||||
*ret = qo;
|
||||
if (offset)
|
||||
*offset = q;
|
||||
if (ret_offset)
|
||||
*ret_offset = q;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -2988,7 +3079,7 @@ int journal_file_move_to_entry_by_seqnum_for_data(
|
|||
uint64_t data_offset,
|
||||
uint64_t seqnum,
|
||||
direction_t direction,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
Object *d;
|
||||
int r;
|
||||
|
@ -2999,14 +3090,15 @@ int journal_file_move_to_entry_by_seqnum_for_data(
|
|||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return generic_array_bisect_plus_one(f,
|
||||
le64toh(d->data.entry_offset),
|
||||
le64toh(d->data.entry_array_offset),
|
||||
le64toh(d->data.n_entries),
|
||||
seqnum,
|
||||
test_object_seqnum,
|
||||
direction,
|
||||
ret, offset, NULL);
|
||||
return generic_array_bisect_plus_one(
|
||||
f,
|
||||
le64toh(d->data.entry_offset),
|
||||
le64toh(d->data.entry_array_offset),
|
||||
le64toh(d->data.n_entries),
|
||||
seqnum,
|
||||
test_object_seqnum,
|
||||
direction,
|
||||
ret, ret_offset, NULL);
|
||||
}
|
||||
|
||||
int journal_file_move_to_entry_by_realtime_for_data(
|
||||
|
@ -3014,7 +3106,7 @@ int journal_file_move_to_entry_by_realtime_for_data(
|
|||
uint64_t data_offset,
|
||||
uint64_t realtime,
|
||||
direction_t direction,
|
||||
Object **ret, uint64_t *offset) {
|
||||
Object **ret, uint64_t *ret_offset) {
|
||||
|
||||
Object *d;
|
||||
int r;
|
||||
|
@ -3025,14 +3117,15 @@ int journal_file_move_to_entry_by_realtime_for_data(
|
|||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return generic_array_bisect_plus_one(f,
|
||||
le64toh(d->data.entry_offset),
|
||||
le64toh(d->data.entry_array_offset),
|
||||
le64toh(d->data.n_entries),
|
||||
realtime,
|
||||
test_object_realtime,
|
||||
direction,
|
||||
ret, offset, NULL);
|
||||
return generic_array_bisect_plus_one(
|
||||
f,
|
||||
le64toh(d->data.entry_offset),
|
||||
le64toh(d->data.entry_array_offset),
|
||||
le64toh(d->data.n_entries),
|
||||
realtime,
|
||||
test_object_realtime,
|
||||
direction,
|
||||
ret, ret_offset, NULL);
|
||||
}
|
||||
|
||||
void journal_file_dump(JournalFile *f) {
|
||||
|
@ -3135,7 +3228,7 @@ void journal_file_print_header(JournalFile *f) {
|
|||
"Sequential number ID: %s\n"
|
||||
"State: %s\n"
|
||||
"Compatible flags:%s%s\n"
|
||||
"Incompatible flags:%s%s%s\n"
|
||||
"Incompatible flags:%s%s%s%s%s\n"
|
||||
"Header size: %"PRIu64"\n"
|
||||
"Arena size: %"PRIu64"\n"
|
||||
"Data hash table size: %"PRIu64"\n"
|
||||
|
@ -3160,6 +3253,8 @@ void journal_file_print_header(JournalFile *f) {
|
|||
(le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
|
||||
JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
|
||||
JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
|
||||
JOURNAL_HEADER_COMPRESSED_ZSTD(f->header) ? " COMPRESSED-ZSTD" : "",
|
||||
JOURNAL_HEADER_KEYED_HASH(f->header) ? " KEYED-HASH" : "",
|
||||
(le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
|
||||
le64toh(f->header->header_size),
|
||||
le64toh(f->header->arena_size),
|
||||
|
@ -3193,6 +3288,14 @@ void journal_file_print_header(JournalFile *f) {
|
|||
printf("Entry array objects: %"PRIu64"\n",
|
||||
le64toh(f->header->n_entry_arrays));
|
||||
|
||||
if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth))
|
||||
printf("Deepest field hash chain: %" PRIu64"\n",
|
||||
f->header->field_hash_chain_depth);
|
||||
|
||||
if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth))
|
||||
printf("Deepest data hash chain: %" PRIu64"\n",
|
||||
f->header->data_hash_chain_depth);
|
||||
|
||||
if (fstat(f->fd, &st) >= 0)
|
||||
printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (uint64_t) st.st_blocks * 512ULL));
|
||||
}
|
||||
|
@ -3272,7 +3375,9 @@ int journal_file_open(
|
|||
.prot = prot_from_flags(flags),
|
||||
.writable = (flags & O_ACCMODE) != O_RDONLY,
|
||||
|
||||
#if HAVE_LZ4
|
||||
#if HAVE_ZSTD
|
||||
.compress_zstd = compress,
|
||||
#elif HAVE_LZ4
|
||||
.compress_lz4 = compress,
|
||||
#elif HAVE_XZ
|
||||
.compress_xz = compress,
|
||||
|
@ -3285,19 +3390,31 @@ int journal_file_open(
|
|||
#endif
|
||||
};
|
||||
|
||||
/* We turn on keyed hashes by default, but provide an environment variable to turn them off, if
|
||||
* people really want that */
|
||||
r = getenv_bool("SYSTEMD_JOURNAL_KEYED_HASH");
|
||||
if (r < 0) {
|
||||
if (r != -ENXIO)
|
||||
log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_KEYED_HASH environment variable, ignoring.");
|
||||
f->keyed_hash = true;
|
||||
} else
|
||||
f->keyed_hash = r;
|
||||
|
||||
if (DEBUG_LOGGING) {
|
||||
static int last_seal = -1, last_compress = -1;
|
||||
static int last_seal = -1, last_compress = -1, last_keyed_hash = -1;
|
||||
static uint64_t last_bytes = UINT64_MAX;
|
||||
char bytes[FORMAT_BYTES_MAX];
|
||||
|
||||
if (last_seal != f->seal ||
|
||||
last_keyed_hash != f->keyed_hash ||
|
||||
last_compress != JOURNAL_FILE_COMPRESS(f) ||
|
||||
last_bytes != f->compress_threshold_bytes) {
|
||||
|
||||
log_debug("Journal effective settings seal=%s compress=%s compress_threshold_bytes=%s",
|
||||
yes_no(f->seal), yes_no(JOURNAL_FILE_COMPRESS(f)),
|
||||
log_debug("Journal effective settings seal=%s keyed_hash=%s compress=%s compress_threshold_bytes=%s",
|
||||
yes_no(f->seal), yes_no(f->keyed_hash), yes_no(JOURNAL_FILE_COMPRESS(f)),
|
||||
format_bytes(bytes, sizeof bytes, f->compress_threshold_bytes));
|
||||
last_seal = f->seal;
|
||||
last_keyed_hash = f->keyed_hash;
|
||||
last_compress = JOURNAL_FILE_COMPRESS(f);
|
||||
last_bytes = f->compress_threshold_bytes;
|
||||
}
|
||||
|
@ -3735,7 +3852,7 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
|
|||
return -E2BIG;
|
||||
|
||||
if (o->object.flags & OBJECT_COMPRESSION_MASK) {
|
||||
#if HAVE_XZ || HAVE_LZ4
|
||||
#if HAVE_COMPRESSION
|
||||
size_t rsize = 0;
|
||||
|
||||
r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
|
||||
|
@ -3755,7 +3872,11 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
|
|||
if (r < 0)
|
||||
return r;
|
||||
|
||||
xor_hash ^= le64toh(u->data.hash);
|
||||
if (JOURNAL_HEADER_KEYED_HASH(to->header))
|
||||
xor_hash ^= jenkins_hash64(data, l);
|
||||
else
|
||||
xor_hash ^= le64toh(u->data.hash);
|
||||
|
||||
items[i].object_offset = htole64(h);
|
||||
items[i].hash = u->data.hash;
|
||||
|
||||
|
@ -3942,11 +4063,9 @@ bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Let's check if the hash tables grew over a certain fill
|
||||
* level (75%, borrowing this value from Java's hash table
|
||||
* implementation), and if so suggest a rotation. To calculate
|
||||
* the fill level we need the n_data field, which only exists
|
||||
* in newer versions. */
|
||||
/* Let's check if the hash tables grew over a certain fill level (75%, borrowing this value from
|
||||
* Java's hash table implementation), and if so suggest a rotation. To calculate the fill level we
|
||||
* need the n_data field, which only exists in newer versions. */
|
||||
|
||||
if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
|
||||
if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
|
||||
|
@ -3970,6 +4089,22 @@ bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
|
|||
return true;
|
||||
}
|
||||
|
||||
/* If there are too many hash collisions somebody is most likely playing games with us. Hence, if our
|
||||
* longest chain is longer than some threshold, let's suggest rotation. */
|
||||
if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) &&
|
||||
le64toh(f->header->data_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) {
|
||||
log_debug("Data hash table of %s has deepest hash chain of length %" PRIu64 ", suggesting rotation.",
|
||||
f->path, le64toh(f->header->data_hash_chain_depth));
|
||||
return true;
|
||||
}
|
||||
|
||||
if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) &&
|
||||
le64toh(f->header->field_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) {
|
||||
log_debug("Field hash table of %s has deepest hash chain of length at %" PRIu64 ", suggesting rotation.",
|
||||
f->path, le64toh(f->header->field_hash_chain_depth));
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Are the data objects properly indexed by field objects? */
|
||||
if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
|
||||
JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
|
||||
|
|
|
@ -67,10 +67,12 @@ typedef struct JournalFile {
|
|||
bool writable:1;
|
||||
bool compress_xz:1;
|
||||
bool compress_lz4:1;
|
||||
bool compress_zstd:1;
|
||||
bool seal:1;
|
||||
bool defrag_on_close:1;
|
||||
bool close_fd:1;
|
||||
bool archive:1;
|
||||
bool keyed_hash:1;
|
||||
|
||||
direction_t last_direction;
|
||||
LocationType location_type;
|
||||
|
@ -105,7 +107,7 @@ typedef struct JournalFile {
|
|||
unsigned last_seen_generation;
|
||||
|
||||
uint64_t compress_threshold_bytes;
|
||||
#if HAVE_XZ || HAVE_LZ4
|
||||
#if HAVE_COMPRESSION
|
||||
void *compress_buffer;
|
||||
size_t compress_buffer_size;
|
||||
#endif
|
||||
|
@ -187,13 +189,19 @@ static inline bool VALID_EPOCH(uint64_t u) {
|
|||
(le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
|
||||
|
||||
#define JOURNAL_HEADER_SEALED(h) \
|
||||
(!!(le32toh((h)->compatible_flags) & HEADER_COMPATIBLE_SEALED))
|
||||
FLAGS_SET(le32toh((h)->compatible_flags), HEADER_COMPATIBLE_SEALED)
|
||||
|
||||
#define JOURNAL_HEADER_COMPRESSED_XZ(h) \
|
||||
(!!(le32toh((h)->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED_XZ))
|
||||
FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_XZ)
|
||||
|
||||
#define JOURNAL_HEADER_COMPRESSED_LZ4(h) \
|
||||
(!!(le32toh((h)->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED_LZ4))
|
||||
FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
|
||||
|
||||
#define JOURNAL_HEADER_COMPRESSED_ZSTD(h) \
|
||||
FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_ZSTD)
|
||||
|
||||
#define JOURNAL_HEADER_KEYED_HASH(h) \
|
||||
FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_KEYED_HASH)
|
||||
|
||||
int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret);
|
||||
|
||||
|
@ -260,5 +268,7 @@ int journal_file_map_field_hash_table(JournalFile *f);
|
|||
|
||||
static inline bool JOURNAL_FILE_COMPRESS(JournalFile *f) {
|
||||
assert(f);
|
||||
return f->compress_xz || f->compress_lz4;
|
||||
return f->compress_xz || f->compress_lz4 || f->compress_zstd;
|
||||
}
|
||||
|
||||
uint64_t journal_file_hash_data(JournalFile *f, const void *data, size_t sz);
|
||||
|
|
|
@ -32,7 +32,7 @@ struct Match {
|
|||
/* For concrete matches */
|
||||
char *data;
|
||||
size_t size;
|
||||
le64_t le_hash;
|
||||
uint64_t hash; /* old-style jenkins hash. New-style siphash is different per file, hence won't be cached here */
|
||||
|
||||
/* For terms */
|
||||
LIST_HEAD(Match, matches);
|
||||
|
|
|
@ -163,9 +163,9 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
|
|||
return r;
|
||||
}
|
||||
|
||||
h2 = hash64(b, b_size);
|
||||
h2 = journal_file_hash_data(f, b, b_size);
|
||||
} else
|
||||
h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
|
||||
h2 = journal_file_hash_data(f, o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
|
||||
|
||||
if (h1 != h2) {
|
||||
error(offset, "Invalid hash (%08"PRIx64" vs. %08"PRIx64, h1, h2);
|
||||
|
@ -925,9 +925,10 @@ int journal_file_verify(
|
|||
goto fail;
|
||||
}
|
||||
|
||||
if ((o->object.flags & OBJECT_COMPRESSED_XZ) &&
|
||||
(o->object.flags & OBJECT_COMPRESSED_LZ4)) {
|
||||
error(p, "Objected with double compression");
|
||||
if (!!(o->object.flags & OBJECT_COMPRESSED_XZ) +
|
||||
!!(o->object.flags & OBJECT_COMPRESSED_LZ4) +
|
||||
!!(o->object.flags & OBJECT_COMPRESSED_ZSTD) > 1) {
|
||||
error(p, "Object has multiple compression flags set");
|
||||
r = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
@ -944,6 +945,12 @@ int journal_file_verify(
|
|||
goto fail;
|
||||
}
|
||||
|
||||
if ((o->object.flags & OBJECT_COMPRESSED_ZSTD) && !JOURNAL_HEADER_COMPRESSED_ZSTD(f->header)) {
|
||||
error(p, "ZSTD compressed object in file without ZSTD compression");
|
||||
r = -EBADMSG;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
switch (o->object.type) {
|
||||
|
||||
case OBJECT_DATA:
|
||||
|
|
|
@ -13,7 +13,7 @@ void jenkins_hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t
|
|||
|
||||
uint32_t jenkins_hashbig(const void *key, size_t length, uint32_t initval) _pure_;
|
||||
|
||||
static inline uint64_t hash64(const void *data, size_t length) {
|
||||
static inline uint64_t jenkins_hash64(const void *data, size_t length) {
|
||||
uint32_t a = 0, b = 0;
|
||||
|
||||
jenkins_hashlittle2(data, length, &a, &b);
|
||||
|
|
|
@ -240,7 +240,7 @@ static void match_free_if_empty(Match *m) {
|
|||
|
||||
_public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
|
||||
Match *l3, *l4, *add_here = NULL, *m;
|
||||
le64_t le_hash;
|
||||
uint64_t hash;
|
||||
|
||||
assert_return(j, -EINVAL);
|
||||
assert_return(!journal_pid_changed(j), -ECHILD);
|
||||
|
@ -279,7 +279,9 @@ _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size)
|
|||
assert(j->level1->type == MATCH_OR_TERM);
|
||||
assert(j->level2->type == MATCH_AND_TERM);
|
||||
|
||||
le_hash = htole64(hash64(data, size));
|
||||
/* Old-style Jenkins (unkeyed) hashing only here. We do not cover new-style siphash (keyed) hashing
|
||||
* here, since it's different for each file, and thus can't be pre-calculated in the Match object. */
|
||||
hash = jenkins_hash64(data, size);
|
||||
|
||||
LIST_FOREACH(matches, l3, j->level2->matches) {
|
||||
assert(l3->type == MATCH_OR_TERM);
|
||||
|
@ -289,7 +291,7 @@ _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size)
|
|||
|
||||
/* Exactly the same match already? Then ignore
|
||||
* this addition */
|
||||
if (l4->le_hash == le_hash &&
|
||||
if (l4->hash == hash &&
|
||||
l4->size == size &&
|
||||
memcmp(l4->data, data, size) == 0)
|
||||
return 0;
|
||||
|
@ -315,7 +317,7 @@ _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size)
|
|||
if (!m)
|
||||
goto fail;
|
||||
|
||||
m->le_hash = le_hash;
|
||||
m->hash = hash;
|
||||
m->size = size;
|
||||
m->data = memdup(data, size);
|
||||
if (!m->data)
|
||||
|
@ -501,9 +503,16 @@ static int next_for_match(
|
|||
assert(f);
|
||||
|
||||
if (m->type == MATCH_DISCRETE) {
|
||||
uint64_t dp;
|
||||
uint64_t dp, hash;
|
||||
|
||||
r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
|
||||
/* If the keyed hash logic is used, we need to calculate the hash fresh per file. Otherwise
|
||||
* we can use what we pre-calculated. */
|
||||
if (JOURNAL_HEADER_KEYED_HASH(f->header))
|
||||
hash = journal_file_hash_data(f, m->data, m->size);
|
||||
else
|
||||
hash = m->hash;
|
||||
|
||||
r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp);
|
||||
if (r <= 0)
|
||||
return r;
|
||||
|
||||
|
@ -590,9 +599,14 @@ static int find_location_for_match(
|
|||
assert(f);
|
||||
|
||||
if (m->type == MATCH_DISCRETE) {
|
||||
uint64_t dp;
|
||||
uint64_t dp, hash;
|
||||
|
||||
r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
|
||||
if (JOURNAL_HEADER_KEYED_HASH(f->header))
|
||||
hash = journal_file_hash_data(f, m->data, m->size);
|
||||
else
|
||||
hash = m->hash;
|
||||
|
||||
r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp);
|
||||
if (r <= 0)
|
||||
return r;
|
||||
|
||||
|
@ -2313,7 +2327,7 @@ _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **
|
|||
|
||||
compression = o->object.flags & OBJECT_COMPRESSION_MASK;
|
||||
if (compression) {
|
||||
#if HAVE_XZ || HAVE_LZ4
|
||||
#if HAVE_COMPRESSION
|
||||
r = decompress_startswith(compression,
|
||||
o->data.payload, l,
|
||||
&f->compress_buffer, &f->compress_buffer_size,
|
||||
|
@ -2380,7 +2394,7 @@ static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **da
|
|||
|
||||
compression = o->object.flags & OBJECT_COMPRESSION_MASK;
|
||||
if (compression) {
|
||||
#if HAVE_XZ || HAVE_LZ4
|
||||
#if HAVE_COMPRESSION
|
||||
size_t rsize;
|
||||
int r;
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ typedef int (compress_t)(const void *src, uint64_t src_size, void *dst,
|
|||
typedef int (decompress_t)(const void *src, uint64_t src_size,
|
||||
void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
|
||||
|
||||
#if HAVE_XZ || HAVE_LZ4
|
||||
#if HAVE_COMPRESSION
|
||||
|
||||
static usec_t arg_duration;
|
||||
static size_t arg_start;
|
||||
|
@ -143,7 +143,7 @@ static void test_compress_decompress(const char* label, const char* type,
|
|||
#endif
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
#if HAVE_XZ || HAVE_LZ4
|
||||
#if HAVE_COMPRESSION
|
||||
test_setup_logging(LOG_INFO);
|
||||
|
||||
if (argc >= 2) {
|
||||
|
@ -167,6 +167,9 @@ int main(int argc, char *argv[]) {
|
|||
#endif
|
||||
#if HAVE_LZ4
|
||||
test_compress_decompress("LZ4", i, compress_blob_lz4, decompress_blob_lz4);
|
||||
#endif
|
||||
#if HAVE_ZSTD
|
||||
test_compress_decompress("ZSTD", i, compress_blob_zstd, decompress_blob_zstd);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -44,7 +44,7 @@ typedef int (decompress_sw_t)(const void *src, uint64_t src_size,
|
|||
typedef int (compress_stream_t)(int fdf, int fdt, uint64_t max_bytes);
|
||||
typedef int (decompress_stream_t)(int fdf, int fdt, uint64_t max_size);
|
||||
|
||||
#if HAVE_XZ || HAVE_LZ4 || HAVE_ZSTD
|
||||
#if HAVE_COMPRESSION
|
||||
_unused_ static void test_compress_decompress(const char *compression,
|
||||
compress_blob_t compress,
|
||||
decompress_blob_t decompress,
|
||||
|
@ -265,7 +265,7 @@ static void test_lz4_decompress_partial(void) {
|
|||
#endif
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
#if HAVE_XZ || HAVE_LZ4 || HAVE_ZSTD
|
||||
#if HAVE_COMPRESSION
|
||||
_unused_ const char text[] =
|
||||
"text\0foofoofoofoo AAAA aaaaaaaaa ghost busters barbarbar FFF"
|
||||
"foofoofoofoo AAAA aaaaaaaaa ghost busters barbarbar FFF";
|
||||
|
@ -339,8 +339,25 @@ int main(int argc, char *argv[]) {
|
|||
#endif
|
||||
|
||||
#if HAVE_ZSTD
|
||||
test_compress_decompress("ZSTD", compress_blob_zstd, decompress_blob_zstd,
|
||||
text, sizeof(text), false);
|
||||
test_compress_decompress("ZSTD", compress_blob_zstd, decompress_blob_zstd,
|
||||
data, sizeof(data), true);
|
||||
|
||||
test_decompress_startswith("ZSTD",
|
||||
compress_blob_zstd, decompress_startswith_zstd,
|
||||
text, sizeof(text), false);
|
||||
test_decompress_startswith("ZSTD",
|
||||
compress_blob_zstd, decompress_startswith_zstd,
|
||||
data, sizeof(data), true);
|
||||
test_decompress_startswith("ZSTD",
|
||||
compress_blob_zstd, decompress_startswith_zstd,
|
||||
huge, HUGE_SIZE, true);
|
||||
|
||||
test_compress_stream("ZSTD", "zstdcat",
|
||||
compress_stream_zstd, decompress_stream_zstd, srcfile);
|
||||
|
||||
test_decompress_startswith_short("ZSTD", compress_blob_zstd, decompress_startswith_zstd);
|
||||
#else
|
||||
log_info("/* ZSTD test skipped */");
|
||||
#endif
|
||||
|
|
|
@ -58,7 +58,7 @@ static void verify_contents(sd_journal *j, unsigned skip) {
|
|||
assert_se(i == N_ENTRIES);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
static void run_test(void) {
|
||||
JournalFile *one, *two, *three;
|
||||
char t[] = "/var/tmp/journal-stream-XXXXXX";
|
||||
unsigned i;
|
||||
|
@ -68,12 +68,6 @@ int main(int argc, char *argv[]) {
|
|||
size_t l;
|
||||
dual_timestamp previous_ts = DUAL_TIMESTAMP_NULL;
|
||||
|
||||
/* journal_file_open requires a valid machine id */
|
||||
if (access("/etc/machine-id", F_OK) != 0)
|
||||
return log_tests_skipped("/etc/machine-id not found");
|
||||
|
||||
test_setup_logging(LOG_DEBUG);
|
||||
|
||||
assert_se(mkdtemp(t));
|
||||
assert_se(chdir(t) >= 0);
|
||||
(void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
|
||||
|
@ -177,6 +171,22 @@ int main(int argc, char *argv[]) {
|
|||
printf("%.*s\n", (int) l, (const char*) data);
|
||||
|
||||
assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
/* journal_file_open requires a valid machine id */
|
||||
if (access("/etc/machine-id", F_OK) != 0)
|
||||
return log_tests_skipped("/etc/machine-id not found");
|
||||
|
||||
test_setup_logging(LOG_DEBUG);
|
||||
|
||||
/* Run this test twice. Once with old hashing and once with new hashing */
|
||||
assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "1", 1) >= 0);
|
||||
run_test();
|
||||
|
||||
assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "0", 1) >= 0);
|
||||
run_test();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -157,7 +157,7 @@ static void test_empty(void) {
|
|||
(void) journal_file_close(f4);
|
||||
}
|
||||
|
||||
#if HAVE_XZ || HAVE_LZ4
|
||||
#if HAVE_COMPRESSION
|
||||
static bool check_compressed(uint64_t compress_threshold, uint64_t data_size) {
|
||||
dual_timestamp ts;
|
||||
JournalFile *f;
|
||||
|
@ -251,7 +251,7 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
test_non_empty();
|
||||
test_empty();
|
||||
#if HAVE_XZ || HAVE_LZ4
|
||||
#if HAVE_COMPRESSION
|
||||
test_min_compress_size();
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in New Issue