BinaryLogReader¶
BinaryLogReader provides sequential access to market data stored in the binary log format. It handles segment discovery, time filtering, symbol filtering, and CRC verification.
struct ReaderConfig
{
std::filesystem::path data_dir;
std::optional<int64_t> from_ns;
std::optional<int64_t> to_ns;
std::set<uint32_t> symbols;
bool verify_crc{true};
};
class BinaryLogReader
{
public:
explicit BinaryLogReader(ReaderConfig config);
// Static inspection (no event reading)
static DatasetSummary inspect(const std::filesystem::path& data_dir);
static DatasetSummary inspectWithSymbols(const std::filesystem::path& data_dir);
// Instance methods
DatasetSummary summary();
uint64_t count();
std::set<uint32_t> availableSymbols();
// Iteration
using EventCallback = std::function<bool(const ReplayEvent&)>;
bool forEach(EventCallback callback);
bool forEachFrom(int64_t start_ts_ns, EventCallback callback);
// Metadata
std::optional<std::pair<int64_t, int64_t>> timeRange() const;
ReaderStats stats() const;
std::vector<std::filesystem::path> segmentFiles() const;
const std::vector<SegmentInfo>& segments() const;
};
Purpose¶
- Read market data from binary log segments in chronological order.
- Filter events by time range and symbol set.
- Support both full scans and timestamp-based seeks.
Configuration¶
| Field | Type | Description |
|---|---|---|
| data_dir | filesystem::path |
Directory containing .floxlog files |
| from_ns | optional<int64_t> |
Start timestamp filter (inclusive) |
| to_ns | optional<int64_t> |
End timestamp filter (inclusive) |
| symbols | set<uint32_t> |
Symbol IDs to include (empty = all) |
| verify_crc | bool |
Verify CRC32 checksums (default: true) |
Core Methods¶
| Method | Description |
|---|---|
inspect() |
Static scan of directory, returns metadata without reading events |
inspectWithSymbols() |
Like inspect() but also collects symbol IDs |
summary() |
Returns dataset metadata after scanning |
count() |
Returns total event count across all segments |
forEach() |
Iterate all events matching filters |
forEachFrom() |
Iterate events starting from a timestamp |
timeRange() |
Returns (first_event_ns, last_event_ns) pair |
stats() |
Returns read statistics (events, bytes, errors) |
segmentFiles() |
Returns list of segment file paths |
segments() |
Returns detailed segment information |
Data Structures¶
DatasetSummary¶
struct DatasetSummary
{
std::filesystem::path data_dir;
int64_t first_event_ns{0};
int64_t last_event_ns{0};
uint64_t total_events{0};
uint32_t segment_count{0};
uint64_t total_bytes{0};
std::set<uint32_t> symbols;
uint32_t segments_with_index{0};
uint32_t segments_without_index{0};
// Helper methods
bool empty() const;
std::chrono::nanoseconds duration() const;
double durationSeconds() const;
double durationMinutes() const;
double durationHours() const;
bool fullyIndexed() const;
};
ReplayEvent¶
struct ReplayEvent
{
EventType type; // Trade, BookSnapshot, or BookDelta
int64_t timestamp_ns; // Event timestamp
TradeRecord trade; // Populated for Trade events
BookRecordHeader book_header; // Populated for Book events
std::vector<BookLevel> bids;
std::vector<BookLevel> asks;
};
ReaderStats¶
struct ReaderStats
{
uint64_t files_read{0};
uint64_t events_read{0};
uint64_t trades_read{0};
uint64_t book_updates_read{0};
uint64_t bytes_read{0};
uint64_t crc_errors{0};
};
SegmentInfo¶
struct SegmentInfo
{
std::filesystem::path path;
int64_t first_event_ns{0};
int64_t last_event_ns{0};
uint32_t event_count{0};
bool has_index{false};
uint64_t index_offset{0};
};
Usage¶
replay::ReaderConfig config{
.data_dir = "/data/market",
.from_ns = start_timestamp,
.to_ns = end_timestamp,
.symbols = {1, 2, 3}
};
replay::BinaryLogReader reader(config);
reader.forEach([](const replay::ReplayEvent& event) {
if (event.type == replay::EventType::Trade) {
// Process trade
} else {
// Process book update
}
return true; // Continue iteration
});
Time Utilities¶
The time_utils namespace provides helper functions:
namespace replay::time_utils
{
int64_t toNanos(std::chrono::system_clock::time_point tp);
std::chrono::system_clock::time_point fromNanos(int64_t ns);
int64_t nowNanos();
int64_t secondsToNanos(int64_t seconds);
int64_t millisToNanos(int64_t millis);
int64_t microsToNanos(int64_t micros);
double nanosToSeconds(int64_t ns);
}
BinaryLogIterator¶
Low-level iterator for reading a single segment file:
class BinaryLogIterator
{
public:
explicit BinaryLogIterator(const std::filesystem::path& segment_path);
bool next(ReplayEvent& out);
bool seekToTimestamp(int64_t target_ts_ns);
bool loadIndex();
const SegmentHeader& header() const;
bool isValid() const;
bool isCompressed() const;
bool hasIndex() const;
};
Ordering guarantee¶
forEach and forEachFrom always deliver events in monotonically non-decreasing timestamp_ns order, regardless of how the underlying segment was recorded.
For segments written with the current writer (which sets SegmentFlags::Sorted), events are streamed directly — O(1) memory, early exit on false return is immediate.
For legacy segments without the flag, all events in the segment are buffered and sorted before delivery — O(segment events) memory, early exit stops delivery but the segment has already been read in full.
BinaryLogIterator is a low-level block-by-block streaming API with no ordering guarantee. Use BinaryLogReader when ordered output is required.
Notes¶
- Segments are automatically discovered and sorted by filename.
- Compressed segments (LZ4) are transparently decompressed.
- Seeking uses segment indexes when available for O(log n) lookup.
- The callback returning
falsestops iteration early. - File extension is
.floxlog.
Live-tail safety¶
Active segments — those whose writer is still appending — are safe to read. The writer fflushes after each compressed block and refreshes the segment header, so a snapshot of the file taken mid-run (e.g. via rsync) contains complete blocks and a non-zero SegmentHeader.
If the header is still zero-initialized at the time of read (event_count == 0 on a compressed segment), scanSegments() and inspect() recover the metadata: every CompressedBlockHeader is walked to sum event_count, and the first / last viable blocks are decompressed for first_event_ns / last_event_ns. The very last block is often truncated because the writer flushes its block header before the compressed payload is fully written, so the scan iterates backwards until one block decompresses successfully.