Skip to content

Validator

SegmentValidator and DatasetValidator verify integrity of binary log files, detecting corruption, CRC mismatches, timestamp anomalies, and structural issues.

struct ValidatorConfig
{
  bool verify_crc{true};
  bool verify_timestamps{true};
  bool verify_index{true};
  bool scan_all_events{true};
  bool stop_on_first_error{false};
  int64_t max_timestamp_jump_ns{3600LL * 1000000000LL};  // 1 hour
};

class SegmentValidator
{
public:
  explicit SegmentValidator(ValidatorConfig config = {});

  SegmentValidationResult validate(const std::filesystem::path& segment_path);

  using ProgressCallback = std::function<void(uint64_t bytes_processed,
                                               uint64_t total_bytes)>;
  SegmentValidationResult validate(const std::filesystem::path& segment_path,
                                   ProgressCallback progress);
};

class DatasetValidator
{
public:
  explicit DatasetValidator(ValidatorConfig config = {});

  DatasetValidationResult validate(const std::filesystem::path& data_dir);

  using ProgressCallback = std::function<void(uint32_t segment_index,
                                               uint32_t total_segments,
                                               const std::filesystem::path& current_file)>;
  DatasetValidationResult validate(const std::filesystem::path& data_dir,
                                   ProgressCallback progress);
};

Issue Types

Type Description
InvalidMagic File magic number doesn't match
InvalidVersion Unsupported format version
HeaderCorrupted Segment header unreadable
FrameCrcMismatch Frame CRC32 check failed
FrameSizeTooLarge Frame size exceeds limits
FrameTypeUnknown Unknown event type
FrameTruncated Incomplete frame data
BlockMagicInvalid Compressed block magic mismatch
BlockDecompressionFailed LZ4 decompression error
IndexCrcMismatch Index CRC32 check failed
IndexNotSorted Index entries not in order
TimestampOutOfOrder Events not chronologically sorted
TimestampJumpTooLarge Suspicious timestamp gap
EventCountMismatch Header count doesn't match actual
FileTruncated File ends unexpectedly

Severity Levels

Level Meaning
Info Informational, not a problem
Warning Potential issue, data still readable
Error Definite problem, some data may be lost
Critical File unusable

Validation Results

SegmentValidationResult

struct SegmentValidationResult
{
  std::filesystem::path path;
  bool valid;
  std::vector<ValidationIssue> issues;

  bool header_valid;
  uint32_t reported_event_count;
  int64_t reported_first_ts;
  int64_t reported_last_ts;
  bool is_compressed;
  CompressionType compression_type;

  uint32_t actual_event_count;
  int64_t actual_first_ts;
  int64_t actual_last_ts;
  uint64_t bytes_scanned;

  bool has_index;
  bool index_valid;
  uint32_t index_entry_count;

  uint32_t trades_found;
  uint32_t book_updates_found;
  uint32_t crc_errors;
  uint32_t timestamp_anomalies;

  bool hasErrors() const;
  bool hasCritical() const;
};

DatasetValidationResult

struct DatasetValidationResult
{
  std::filesystem::path data_dir;
  bool valid;
  std::vector<SegmentValidationResult> segments;

  uint32_t total_segments;
  uint32_t valid_segments;
  uint32_t corrupted_segments;
  uint64_t total_events;
  uint64_t total_bytes;

  int64_t first_timestamp;
  int64_t last_timestamp;

  uint32_t total_errors;
  uint32_t total_warnings;
};

Usage

Single Segment

SegmentValidator validator;
auto result = validator.validate("/data/market.floxseg");

if (result.hasErrors()) {
    for (const auto& issue : result.issues) {
        std::cerr << "Issue at offset " << issue.file_offset
                  << ": " << issue.message << "\n";
    }
}

Entire Dataset

DatasetValidator validator;
auto result = validator.validate("/data/market");

std::cout << "Valid: " << result.valid_segments
          << "/" << result.total_segments << " segments\n";
std::cout << "Events: " << result.total_events << "\n";
std::cout << "Errors: " << result.total_errors << "\n";

With Progress

DatasetValidator validator;
auto result = validator.validate("/data/market",
    [](uint32_t current, uint32_t total, const auto& path) {
        std::cout << "Validating " << current << "/" << total
                  << ": " << path.filename() << "\n";
    });

Repair

SegmentRepairer can fix common issues in corrupted segments.

struct RepairConfig
{
  bool backup_before_repair{true};
  std::string backup_suffix{".backup"};
  bool fix_header_timestamps{true};
  bool fix_event_count{true};
  bool rebuild_index{true};
  bool remove_corrupted_frames{false};
  bool truncate_at_corruption{false};
};

class SegmentRepairer
{
public:
  explicit SegmentRepairer(RepairConfig config = {});

  RepairResult repair(const std::filesystem::path& segment_path);
  RepairResult repair(const std::filesystem::path& segment_path,
                      const SegmentValidationResult& validation);
};

Repair Usage

SegmentValidator validator;
auto validation = validator.validate("/data/corrupted.floxseg");

if (validation.hasErrors()) {
    SegmentRepairer repairer;
    auto repair = repairer.repair("/data/corrupted.floxseg", validation);

    if (repair.success) {
        for (const auto& action : repair.actions_taken) {
            std::cout << "Fixed: " << action << "\n";
        }
    }
}

Convenience Functions

// Quick validation
bool ok = replay::isValidSegment("/data/market.floxseg");
bool ok = replay::isValidDataset("/data/market");

Notes

  • Validation scans entire file by default for complete integrity check.
  • CRC verification can be disabled for faster validation.
  • Timestamp jump detection catches clock sync issues in recorded data.
  • Repair always creates backups unless explicitly disabled.