Meta gRPC API
Catalog management, ingestion, merge tasks, and cluster operations
The meta service exposes a gRPC API on port 9500 (configurable via META_BIND). It manages all cluster metadata: datasets, schemas, views, projections, ingestion streams, merge tasks, and segment lifecycle. gRPC reflection is enabled for tooling support.
Services Overview
| Service | Description |
|---|---|
| CatalogService | Manages the data catalog: datasets, schemas, views, projections, and their relationships. |
| IngestionService | Authenticates ingest tokens and manages collector streams. |
| IngestTokenService | Manages ingest tokens for authentication and signal routing. |
| NurseryService | Manages ingest stream lifecycle, merged segment registration, and offset tracking. |
| MergeTaskService | Coordinates segment merge tasks between the janitor and merge workers. |
| SegmentDeletionService | Manages cleanup of tombstoned segments from storage. |
| SegmentLookupService | Discovers segments matching a time range and dataset filter. |
| JanitorService | Provides cluster statistics and merge task management for maintenance operations. |
CatalogService
Manages the data catalog: datasets, schemas, views, projections, and their relationships.
RPCs
| RPC | Description |
|---|---|
ListDatasources | List all datasources (legacy aggregated view) |
CreateDataset | Create a new dataset |
GetDataset | Get dataset by ID or name |
ListDatasets | List datasets with pagination |
DeleteDataset | Delete a dataset |
SetShardingFields | Configure dataset sharding fields |
GetShardingFields | Get dataset sharding configuration |
SetDatasetRetention | Dataset Retention & Merge Span |
SetDatasetMaxMergeTimeSpan |
Proto Definition
service CatalogService {
rpc ListDatasources(ListDatasourcesRequest) returns (ListDatasourcesResponse);
// Dataset CRUD
rpc CreateDataset(CreateDatasetRequest) returns (CreateDatasetResponse);
rpc GetDataset(GetDatasetRequest) returns (GetDatasetResponse);
rpc ListDatasets(ListDatasetsRequest) returns (ListDatasetsResponse);
rpc DeleteDataset(DeleteDatasetRequest) returns (DeleteDatasetResponse);
// Dataset Sharding Fields
rpc SetShardingFields(SetShardingFieldsRequest) returns (SetShardingFieldsResponse);
rpc GetShardingFields(GetShardingFieldsRequest) returns (GetShardingFieldsResponse);
// Dataset Retention & Merge Span
rpc SetDatasetRetention(SetDatasetRetentionRequest) returns (SetDatasetRetentionResponse);
rpc SetDatasetMaxMergeTimeSpan(SetDatasetMaxMergeTimeSpanRequest) returns (SetDatasetMaxMergeTimeSpanResponse);
}
// region: Common Types
enum BqlType {
BQL_TYPE_BOOL = 0;
BQL_TYPE_INT = 1;
BQL_TYPE_LONG = 2;
BQL_TYPE_REAL = 3;
BQL_TYPE_STRING = 4;
BQL_TYPE_DATETIME = 5;
BQL_TYPE_TIMESPAN = 6;
BQL_TYPE_GUID = 7;
BQL_TYPE_DYNAMIC = 8;
}
message PaginationRequest {
optional uint32 limit = 1;
optional string after = 2;
optional string before = 3;
}
message PaginationResponse {
optional string next_cursor = 1;
optional string prev_cursor = 2;
uint64 total = 3;
}
message Column {
string name = 1;
BqlType type = 2;
}
// endregion
// region: Dataset Messages
message CreateDatasetRequest {
string name = 1;
}
message CreateDatasetResponse {
string id = 1;
string name = 2;
}
message GetDatasetRequest {
oneof identifier {
string id = 1;
string name = 2;
}
}
message GetDatasetResponse {
string id = 1;
string name = 2;
// Retention in nanoseconds. Unset = no retention configured.
optional int64 retention_ns = 3;
// Explicit per-dataset merge span cap in nanoseconds. Unset = derive from
// retention (or fall back to the global cap).
optional int64 max_merge_time_span_ns = 4;
}
message ListDatasetsRequest {
PaginationRequest pagination = 1;
}
message ListDatasetsResponse {
repeated Dataset items = 1;
PaginationResponse pagination = 2;
}
message Dataset {
string id = 1;
string name = 2;
optional int64 retention_ns = 3;
optional int64 max_merge_time_span_ns = 4;
}
message DeleteDatasetRequest {
oneof identifier {
string id = 1;
string name = 2;
}
}
message DeleteDatasetResponse {
bool success = 1;
}
message SetShardingFieldsRequest {
oneof identifier {
string id = 1;
string name = 2;
}
repeated ShardingField fields = 3;
}
message SetShardingFieldsResponse {
repeated ShardingField fields = 1;
}
message GetShardingFieldsRequest {
oneof identifier {
string id = 1;
string name = 2;
}
}
message GetShardingFieldsResponse {
repeated ShardingField fields = 1;
}
// Set per-dataset retention. Unset `retention_ns` clears the setting
// (reverts to "no retention configured").
message SetDatasetRetentionRequest {
oneof identifier {
string id = 1;
string name = 2;
}
optional int64 retention_ns = 3;
}
message SetDatasetRetentionResponse {
optional int64 retention_ns = 1;
}
// Set the per-dataset merge-span override. Unset `max_merge_time_span_ns`
// clears the override (planner falls back to the retention formula).
message SetDatasetMaxMergeTimeSpanRequest {
oneof identifier {
string id = 1;
string name = 2;
}
optional int64 max_merge_time_span_ns = 3;
}
message SetDatasetMaxMergeTimeSpanResponse {
optional int64 max_merge_time_span_ns = 1;
}
// endregion
// region: Datasource Listing (read-only summary across datasets)
message ListDatasourcesRequest {
optional uint32 limit = 1;
optional string after = 2;
optional string before = 3;
}
message ListDatasourcesResponse {
repeated DatasourceSummary items = 1;
optional string next_cursor = 2;
optional string prev_cursor = 3;
uint64 total = 4;
}
message DatasourceSummary {
string id = 1;
string name = 2;
}
// endregionIngestionService
Authenticates ingest tokens and manages collector streams.
RPCs
| RPC | Description |
|---|---|
RegisterForStream | Authenticate an ingest token and get/create a stream for a collector |
Proto Definition
service IngestionService {
// RegisterForStream authenticates an ingest token and returns a stream for this collector.
// Returns the existing active stream for the collector_id, or creates a new one.
rpc RegisterForStream(RegisterForStreamRequest) returns (RegisterForStreamResponse);
}
message RegisterForStreamRequest {
reserved 1;
string collector_string = 2;
string collector_id = 3;
string ingest_token = 4;
}
message StreamConfig {
CloudObjectPrefix cloud_object_prefix = 1;
uint64 flush_timeout_ms = 2;
uint64 max_segment_size_bytes = 3;
}
message RegisterForStreamResponse {
string stream_id = 1;
StreamConfig config = 2;
// UUID of the ingest token (from the meta database).
// Tjalfe uses this as the key in CombinedSignals.token_signals
// so the plaintext token is never persisted to S3.
string token_id = 3;
}IngestTokenService
Manages ingest tokens for authentication and signal routing.
RPCs
| RPC | Description |
|---|---|
CreateIngestToken | Create a new token (plaintext shown only once) |
ValidateIngestToken | Validate a token and return its routing |
RevokeIngestToken | Revoke a token by name |
DeleteIngestToken | |
ListIngestTokens | List all tokens |
GetIngestToken | Get token details by ID |
UpdateIngestTokenRouting | Update signal routing for a token |
Proto Definition
service IngestTokenService {
rpc CreateIngestToken(CreateIngestTokenRequest) returns (CreateIngestTokenResponse);
rpc ValidateIngestToken(ValidateIngestTokenRequest) returns (ValidateIngestTokenResponse);
rpc RevokeIngestToken(RevokeIngestTokenRequest) returns (RevokeIngestTokenResponse);
rpc DeleteIngestToken(DeleteIngestTokenRequest) returns (DeleteIngestTokenResponse);
rpc ListIngestTokens(ListIngestTokensRequest) returns (ListIngestTokensResponse);
rpc GetIngestToken(GetIngestTokenRequest) returns (GetIngestTokenResponse);
rpc UpdateIngestTokenRouting(UpdateIngestTokenRoutingRequest) returns (UpdateIngestTokenRoutingResponse);
}
message IngestTokenRouting {
string traces_dataset_id = 1;
string logs_dataset_id = 2;
string metrics_dataset_id = 3;
}
message IngestTokenInfo {
string id = 1;
string name = 2;
string created_at = 3;
string revoked_at = 4; // empty if not revoked
IngestTokenRouting routing = 5;
string token_hint = 6; // last 4 chars of the plaintext token
}
message CreateIngestTokenRequest {
string name = 1;
string dataset_name = 2;
}
message CreateIngestTokenResponse {
string plaintext_token = 1; // shown only once
IngestTokenInfo token = 2;
}
message ValidateIngestTokenRequest {
string plaintext_token = 1;
}
message ValidateIngestTokenResponse {
string token_id = 1;
IngestTokenRouting routing = 2;
}
message RevokeIngestTokenRequest {
string token_id = 1;
}
message RevokeIngestTokenResponse {
// empty on success
}
message DeleteIngestTokenRequest {
string token_id = 1;
}
message DeleteIngestTokenResponse {
// empty on success
}
message ListIngestTokensRequest {
// no parameters
}
message ListIngestTokensResponse {
repeated IngestTokenInfo tokens = 1;
}
message GetIngestTokenRequest {
string token_id = 1;
}
message GetIngestTokenResponse {
IngestTokenInfo token = 1;
}
message UpdateIngestTokenRoutingRequest {
string token_id = 1;
IngestTokenRouting routing = 2;
}
message UpdateIngestTokenRoutingResponse {
// empty on success
}NurseryService
Manages ingest stream lifecycle, merged segment registration, and offset tracking.
RPCs
| RPC | Description |
|---|---|
GetIngestStreamsForNursery | Get all active streams and dataset routing info |
MarkStreamStopped | Mark a stream as stopped (client disconnect or timeout) |
RegisterMergedSegments | |
UpdateDeletedOffset | Update the deleted offset after cleaning baby segments |
DeleteIngestStreamDataset | Remove a fully-processed dataset entry from a stream |
Proto Definition
service NurseryService {
rpc GetIngestStreamsForNursery(GetIngestStreamsRequest)
returns (GetIngestStreamsResponse);
rpc MarkStreamStopped(MarkStreamStoppedRequest)
returns (MarkStreamStoppedResponse);
rpc RegisterMergedSegments(RegisterMergedSegmentsRequest)
returns (RegisterMergedSegmentsResponse);
rpc UpdateDeletedOffset(UpdateDeletedOffsetRequest)
returns (UpdateDeletedOffsetResponse);
rpc DeleteIngestStreamDataset(DeleteIngestStreamDatasetRequest)
returns (DeleteIngestStreamDatasetResponse);
}
message GetIngestStreamsRequest {}
// Checkpoint tracking the mapping between a merged segment offset and stream offset
message OffsetCheckpoint {
// The MVCC version of the merged segment
int64 segment_offset = 1;
// The stream offset at the time of this merge
int64 stream_offset = 2;
// Ingest timestamp of the baby segment (nanoseconds since unix epoch)
int64 ingest_timestamp_nanos = 3;
}
// Dataset-specific routing information within a stream
message IngestStreamDatasetInfo {
string dataset_id = 1;
// Last merged offset. Nursery should fetch from merged_offset + 1
int64 merged_offset = 2;
// Ingest timestamp of the last merged baby segment (nanoseconds since unix epoch)
// NOTE: Source precision is seconds (from S3 last_modified), sub-second portion is always zero
// Used by query engine to determine which segments to query
int64 merged_ingest_time_nanos = 3;
// Highest offset that has been deleted from S3 (default -1 = no data deleted yet)
// Nursery should only delete offsets <= merged_offset
int64 deleted_offset = 4;
// Ingest timestamp of the baby segment at deleted_offset (nanoseconds since unix epoch)
// NOTE: Source precision is seconds (from S3 last_modified), sub-second portion is always zero
int64 deleted_ingest_time_nanos = 5;
// Dataset stream creation time in nanoseconds since unix epoch
int64 created_at_nanos = 6;
// S3 location for uploading merged segments: endpoint, bucket, path prefix
// Path format: dataset/<dataset_id>/merged/
CloudObjectPrefix cloud_merged_segments_prefix = 7;
// Checkpoints tracking recent merge points (segment_offset, stream_offset, ingest_timestamp)
// Pruned to keep only checkpoints from the last 30 minutes
repeated OffsetCheckpoint offset_checkpoints = 8;
}
// External stream information - matches two-table design (ingest_streams + ingest_stream_datasets)
message IngestStreamInfo {
string stream_id = 1;
// S3 location for baby segments: endpoint, bucket, path prefix
// Path format: ingest-stream/<stream_id>/
CloudObjectPrefix cloud_stream_prefix = 2;
// If set, stream is stopped (no more baby segments will arrive)
// Nursery can release stopped streams after processing remaining segments
optional int64 stopped_at_nanos = 3;
// Offset of the stop message (only set if stopped_at_nanos is set)
// A fully-merged stream has merged_offset == stopped_offset
optional int64 stopped_offset = 4;
// Dataset-specific routing information for this stream
repeated IngestStreamDatasetInfo datasets = 5;
}
// Per-token routing: maps each signal type to a target dataset.
// Keyed by token_id (UUID), matching the key in CombinedSignals.token_signals.
message IngestTokenRoutingInfo {
// UUID of the ingest token (from the meta database).
// Matches the key used in CombinedSignals.token_signals.
string token_id = 1;
string traces_dataset_id = 2;
string logs_dataset_id = 3;
string metrics_dataset_id = 4;
}
message GetIngestStreamsResponse {
repeated IngestStreamInfo streams = 1;
// All known datasets (name → id mapping for routing resolution)
repeated DatasetInfo datasets = 2;
// Monotonic sequence number for staleness detection.
// Nursery should discard responses with a lower sequence than already seen.
int64 response_seq = 3;
// Active ingest token routing for token-based signal routing
repeated IngestTokenRoutingInfo token_routing = 4;
}
// Reason why a stream was stopped
enum StopReason {
STOP_REASON_UNSPECIFIED = 0;
// Client (ingester) wrote a stop message
STOP_REASON_CLIENT = 1;
// Nursery detected inactivity timeout
STOP_REASON_TIMEOUT = 2;
}
message MarkStreamStoppedRequest {
string stream_id = 1;
// When the stream was stopped (nanoseconds since unix epoch)
int64 stopped_at_nanos = 2;
// Final processed offset in the stream
int64 final_offset = 3;
// Reason for stopping
StopReason reason = 4;
}
message MarkStreamStoppedResponse {}
// Per-stream offset info for merged segment registration
message StreamOffsetInfo {
string stream_id = 1;
string dataset_id = 2;
// New merged_offset after this merge (max baby segment offset included)
int64 new_merged_offset = 3;
// Ingest timestamp (S3 last_modified) of the highest-offset baby segment
// NOTE: S3 provides second-level precision, sub-second portion is always zero
int64 ingest_time_nanos = 4;
// What merged_offset was when nursery selected segments for merge.
// Meta validates this matches current value to detect conflicts.
int64 start_merged_offset = 5;
}
// One merged segment registration: a new segment plus the per-stream offset
// advances for streams that contributed data to it.
message MergeRegistration {
CreateSegment segment = 1;
// Per-stream offset advance for streams that contributed segments to this merge.
// Each item's start_merged_offset must equal the current merged_offset for that
// (stream, dataset). Within a batch, items are applied in order, so item N+1's
// start_merged_offset must equal item N's new_merged_offset (for the same stream).
repeated StreamOffsetInfo stream_offsets = 2;
}
// Register one or more merged segments for a single dataset, atomically.
//
// All merges in the batch must share the same dataset_id (server enforced).
// `merges` are applied in order in one Postgres transaction; on any failure the
// whole transaction rolls back. After all merges, `idle_advances` are applied —
// these advance merged_offset for streams that didn't contribute segments to
// any merge in this batch (e.g. streams whose follower processed keep-alives or
// a stop message). Idle-advance checkpoints reuse the last allocated segment
// version in the batch.
message RegisterMergedSegmentsRequest {
// Must be non-empty.
repeated MergeRegistration merges = 1;
// Optional. Applied last in the same transaction.
repeated StreamOffsetInfo idle_advances = 2;
}
message RegisterMergedSegmentsResponse {
// segment_versions[i] is the MVCC version assigned to merges[i].
repeated int64 segment_versions = 1;
// Updated stream info for nursery to cache (avoids separate poll)
repeated IngestStreamInfo streams = 2;
// Monotonic sequence number for staleness detection (same counter as GetIngestStreamsResponse)
int64 response_seq = 3;
}
message UpdateDeletedOffsetRequest {
string stream_id = 1;
int64 deleted_offset = 3;
// Ingest timestamp of the baby segment at deleted_offset (nanoseconds since unix epoch)
int64 deleted_ingest_time_nanos = 4;
}
message UpdateDeletedOffsetResponse {}
message DeleteIngestStreamDatasetRequest {
string stream_id = 1;
string dataset_id = 2;
// The current (highest) offset nursery is processing for this stream.
// Meta validates this is >= the dataset's merged_offset as a sanity check.
int64 current_offset = 3;
// Nursery's view of the dataset's merged_offset.
// Meta validates this matches its own merged_offset and deleted_offset
// to catch stale nursery state.
int64 merged_offset = 4;
}
message DeleteIngestStreamDatasetResponse {}MergeTaskService
Coordinates segment merge tasks between the janitor and merge workers.
RPCs
| RPC | Description |
|---|---|
PollForSegmentMergeTask | Worker polls for an available merge task |
CompleteSegmentMergeTask | Worker reports successful merge with the new segment |
FailSegmentMergeTask | Worker reports merge failure |
Proto Definition
service MergeTaskService {
rpc PollForSegmentMergeTask(PollForSegmentMergeTaskRequest) returns (PollForSegmentMergeTaskResponse);
rpc CompleteSegmentMergeTask(CompleteSegmentMergeTaskRequest) returns (CompleteSegmentMergeTaskResponse);
rpc FailSegmentMergeTask(FailSegmentMergeTaskRequest) returns (FailSegmentMergeTaskResponse);
}
message PollForSegmentMergeTaskRequest {
string worker_id = 1;
}
message PollForSegmentMergeTaskResponse {
optional SegmentMergeTaskInfo task = 1;
}
message CompleteSegmentMergeTaskRequest {
string task_id = 1;
CreateSegment merged_segment = 2;
string rewrite_journal_relative_path = 3;
}
message CompleteSegmentMergeTaskResponse {
oneof result {
CompleteSegmentMergeTaskSuccess success = 1;
CompleteSegmentMergeTaskError error = 2;
}
}
message CompleteSegmentMergeTaskSuccess {}
message CompleteSegmentMergeTaskError {
string message = 1;
}
message FailSegmentMergeTaskRequest {
string task_id = 1;
string error_message = 2;
}
message FailSegmentMergeTaskResponse {}SegmentDeletionService
Manages cleanup of tombstoned segments from storage.
RPCs
| RPC | Description |
|---|---|
GetTombstonedSegmentsForDeletion | Get segments marked for deletion |
ConfirmTombstoneDeletion | Confirm segments have been deleted from storage |
ConfirmRewriteJournalDeletion |
Proto Definition
service SegmentDeletionService {
rpc GetTombstonedSegmentsForDeletion(GetTombstonedSegmentsForDeletionRequest) returns (GetTombstonedSegmentsForDeletionResponse);
rpc ConfirmTombstoneDeletion(ConfirmTombstoneDeletionRequest) returns (ConfirmTombstoneDeletionResponse);
rpc ConfirmRewriteJournalDeletion(ConfirmRewriteJournalDeletionRequest) returns (ConfirmRewriteJournalDeletionResponse);
}
message GetTombstonedSegmentsForDeletionRequest {
// Empty - service uses internal configuration
}
message GetTombstonedSegmentsForDeletionResponse {
repeated TombstonedSegmentInfo segments = 1;
}
message TombstonedSegmentInfo {
string segment_id = 1;
string dataset_id = 2;
CloudObjectKey cloud_object_key = 3;
int64 tombstone_time_nanos = 4;
int64 tombstone_version = 5;
}
message ConfirmTombstoneDeletionRequest {
repeated string segment_ids = 1;
}
message ConfirmTombstoneDeletionResponse {
oneof result {
ConfirmTombstoneDeletionSuccess success = 1;
ConfirmTombstoneDeletionError error = 2;
}
}
message ConfirmTombstoneDeletionSuccess {
int32 deleted_count = 1;
repeated RewriteJournalReadyForDeletion journals_ready = 2;
}
message RewriteJournalReadyForDeletion {
string output_segment_id = 1;
CloudObjectKey journal_key = 2;
}
message ConfirmTombstoneDeletionError {
string message = 1;
}
message ConfirmRewriteJournalDeletionRequest {
repeated string output_segment_ids = 1;
}
message ConfirmRewriteJournalDeletionResponse {
int32 cleared_count = 1;
}SegmentLookupService
Discovers segments matching a time range and dataset filter.
RPCs
| RPC | Description |
|---|---|
FindSegments | Find segments by time range and dataset names (paginated) |
GetSegmentsByIds | Get specific segments by their IDs |
ListWarmCandidates | Returns segments ordered for cache warming priority (newest-first by time_range.end_time, globally across all datasets). Paginated. |
Proto Definition
service SegmentLookupService {
rpc FindSegments(FindSegmentsRequest) returns (FindSegmentsResponse);
rpc GetSegmentsByIds(GetSegmentsByIdsRequest) returns (GetSegmentsByIdsResponse);
// Returns segments ordered for cache warming priority (newest-first by
// time_range.end_time, globally across all datasets). Paginated.
rpc ListWarmCandidates(ListWarmCandidatesRequest) returns (ListWarmCandidatesResponse);
}
message FindSegmentsRequest {
TimeRange time_range = 1;
repeated string dataset_names = 3;
int32 limit = 4;
optional string cursor = 5;
}
message PagedResponse {
optional string next_cursor = 1;
}
message FindSegmentsResponse {
repeated SegmentInfo segments = 1;
PagedResponse page = 2;
// Snapshot of meta's monotonic segment-version counter
// (`counters.version`) read in the same transaction as the
// segment list. QC pins this as the watermark `V` for the query:
// the cloud QWS lane sees only segments with `version <= V`; a
// future nursery lane (Phase 4-5 of docs/dev/query-exec-in-nursery.md)
// fills the gap with locally-staged segments that are either
// unregistered or registered at `version > V`. The two sets are
// disjoint by construction, so the merged result has no double-count.
uint64 current_version = 3;
}
message GetSegmentsByIdsRequest {
repeated string segment_ids = 1;
}
message GetSegmentsByIdsResponse {
repeated SegmentInfo segments = 1;
}
message ListWarmCandidatesRequest {
// Max segments per page (capped server-side).
int32 limit = 1;
// Opaque cursor from previous page; empty on first call.
optional string cursor = 2;
// Shard-filter fields will be added later as part of the sharding design.
}
message ListWarmCandidatesResponse {
// Ordered newest-first by time_range.end_time, then by id descending.
repeated SegmentInfo segments = 1;
PagedResponse page = 2;
}JanitorService
Provides cluster statistics and merge task management for maintenance operations.
RPCs
| RPC | Description |
|---|---|
GetJanitorStats | Get segment count, sizes, and outstanding merge tasks |
GetSegmentSizeStats | Get segment size distribution by tier |
ListMergeTasks | List active merge tasks |
GetMergeTask | Get detailed info about a specific merge task |
UnclaimMergeTask | Release a stale merge task claim |
CreateMergeTasks | Create new merge tasks for eligible segments |
RewriteAllSegments | Create one single-segment merge task per existing segment not already in a merge task. This forces every segment through the rewrite pipeline to pick up index fixes. |
DeleteMergeTask |
Proto Definition
service JanitorService {
rpc GetJanitorStats(GetJanitorStatsRequest) returns (GetJanitorStatsResponse);
rpc GetSegmentSizeStats(GetSegmentSizeStatsRequest) returns (GetSegmentSizeStatsResponse);
rpc ListMergeTasks(ListMergeTasksRequest) returns (ListMergeTasksResponse);
rpc GetMergeTask(GetMergeTaskRequest) returns (GetMergeTaskResponse);
rpc UnclaimMergeTask(UnclaimMergeTaskRequest) returns (UnclaimMergeTaskResponse);
rpc CreateMergeTasks(CreateMergeTasksRequest) returns (CreateMergeTasksResponse);
// Create one single-segment merge task per existing segment not already in a merge task.
// This forces every segment through the rewrite pipeline to pick up index fixes.
rpc RewriteAllSegments(RewriteAllSegmentsRequest) returns (RewriteAllSegmentsResponse);
rpc DeleteMergeTask(DeleteMergeTaskRequest) returns (DeleteMergeTaskResponse);
}
message GetSegmentSizeStatsRequest {
// Optional maximum compressed size in bytes (e.g., 10485760 for 10MB).
// Only segments with compressed_size <= this value are included.
optional int64 max_compressed_size = 1;
// Optional minimum age in seconds. Only segments created after now - min_age are included.
optional int64 min_age_secs = 2;
// Optional maximum age in seconds. Only segments created before now - max_age are included.
optional int64 max_age_secs = 3;
optional string dataset_name = 4;
}
message SegmentSizeTier {
int32 tier = 1;
string tier_min = 2;
string tier_max = 3;
int64 segment_count = 4;
int64 total_size = 5;
int64 smallest = 6;
int64 largest = 7;
int64 avg_size = 8;
string earliest_time = 9;
string latest_time = 10;
}
message GetSegmentSizeStatsResponse {
repeated SegmentSizeTier tiers = 1;
}
message GetJanitorStatsRequest {
optional string dataset_name = 1;
}
message GetJanitorStatsResponse {
int64 segment_count = 1;
int64 outstanding_merge_tasks = 2;
int64 total_size_bytes = 3;
int64 total_compressed_size_bytes = 4;
int64 segments_incl_tombstoned = 5;
}
message ListMergeTasksRequest {
optional string dataset_name = 1;
}
message ListMergeTasksResponse {
repeated MergeTaskSummary tasks = 1;
}
message MergeTaskSummary {
string task_id = 1;
string dataset_id = 2;
int32 segment_count = 3;
string assigned_to_worker = 4;
string created_at = 5;
string assigned_at = 6;
}
message GetMergeTaskRequest {
string task_id = 1;
}
message UnclaimMergeTaskRequest {
string task_id = 1;
// Minimum duration in seconds the task must have been claimed before it can be unclaimed.
// Defaults to 3600 (1 hour) if not set.
int64 min_claimed_secs = 2;
}
message UnclaimMergeTaskResponse {
// True if the task was unclaimed, false if it was not found or not yet old enough.
bool unclaimed = 1;
}
message CreateMergeTasksRequest {
// Optional maximum compressed size in bytes. Only segments with compressed_size <= this value.
optional int64 max_compressed_size = 1;
// Optional minimum age in seconds. Only segments older than now - min_age.
optional int64 min_age_secs = 2;
// Optional maximum age in seconds. Only segments newer than now - max_age.
optional int64 max_age_secs = 3;
optional string dataset_name = 4;
}
message CreateMergeTasksResponse {
// Number of merge tasks created
int32 tasks_created = 1;
// Total number of segments included across all tasks
int64 segments_included = 2;
}
message GetMergeTaskResponse {
string task_id = 1;
string dataset_id = 2;
int32 segment_count = 3;
string assigned_to_worker = 4;
string created_at = 5;
int64 smallest_segment_size = 6;
int64 largest_segment_size = 7;
int64 smallest_segment_compressed_size = 8;
int64 largest_segment_compressed_size = 9;
string earliest_time = 10;
string latest_time = 11;
int64 total_size = 12;
int64 total_compressed_size = 13;
string assigned_at = 14;
}
message RewriteAllSegmentsRequest {
// Optional: limit to a specific dataset
optional string dataset_name = 1;
}
message RewriteAllSegmentsResponse {
// Number of single-segment merge tasks created
int32 tasks_created = 1;
// Total number of segments that will be rewritten
int64 segments_included = 2;
}
message DeleteMergeTaskRequest {
string task_id = 1;
}
message DeleteMergeTaskResponse {
// True if the task was found and deleted.
bool deleted = 1;
}Common Types
Shared message types used across multiple services.
message CloudBucket {
string endpoint = 1;
string bucket = 2;
string region = 3; // empty = provider default
string path = 4; // customer-configured base path within bucket (can be empty)
string provider = 5; // "s3" (default) or "gcs"
}
message CloudObjectPrefix {
CloudBucket bucket = 1;
string prefix = 2; // e.g. "datasets/ds-123/segments"
}
message CloudObjectKey {
CloudObjectPrefix prefix = 1;
string key = 2; // e.g. "seg-456.ttseg"
}
message TimeRange {
int64 start_time = 1; // in nanoseconds since the unix epoch
int64 end_time = 2; // in nanoseconds since the unix epoch
}
message CreateSegment {
string dataset_id = 1;
TimeRange time_range = 2;
int64 size_bytes = 3;
int64 compressed_size_bytes = 4;
int32 number_of_events = 5;
CloudObjectKey cloud_object_key = 6;
}
message SegmentInfo {
string id = 1;
TimeRange time_range = 2;
int64 size_bytes = 3;
int64 compressed_size_bytes = 4;
int32 number_of_events = 5;
string dataset_id = 6;
CloudObjectKey cloud_object_key = 7;
}
message ShardingField {
string field_name = 1;
double weight = 2;
}
message SegmentMergeTaskInfo {
string task_id = 1;
string dataset_id = 2;
repeated SegmentInfo segments = 4;
CloudObjectPrefix cloud_object_prefix = 5;
repeated ShardingField sharding_fields = 6;
}
message DatasetInfo {
string id = 1;
string name = 2;
CloudObjectPrefix cloud_object_prefix = 3;
}