Skip to content

Commit

Permalink
storage: Add post challenge reading metrics (#228)
Browse files Browse the repository at this point in the history
* storage: Add post challenge reading metrics

* make gen

* make lint happy
  • Loading branch information
magik6k authored Dec 6, 2024
1 parent fd9f8c5 commit 3ca4d0c
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 6 deletions.
9 changes: 5 additions & 4 deletions harmony/harmonydb/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ import (
"go.opencensus.io/stats"
"go.opencensus.io/stats/view"
"go.opencensus.io/tag"

"github.com/filecoin-project/lotus/metrics"
)

var (
Expand Down Expand Up @@ -43,7 +41,7 @@ var DBMeasures = struct {

// CacheViews groups all cache-related default views.
func init() {
metrics.RegisterViews(
err := view.Register(
&view.View{
Measure: DBMeasures.Hits,
Aggregation: view.Sum(),
Expand All @@ -65,7 +63,10 @@ func init() {
TagKeys: []tag.Key{dbTag},
},
)
err := prometheus.Register(DBMeasures.Waits)
if err != nil {
panic(err)
}
err = prometheus.Register(DBMeasures.Waits)
if err != nil {
panic(err)
}
Expand Down
56 changes: 54 additions & 2 deletions lib/paths/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"
"context"
"encoding/json"
"fmt"
"math/bits"
"math/rand"
"os"
Expand All @@ -15,6 +16,8 @@ import (
"time"

"github.com/ipfs/go-cid"
"go.opencensus.io/stats"
"go.opencensus.io/tag"
"golang.org/x/xerrors"

ffi "github.com/filecoin-project/filecoin-ffi"
Expand All @@ -30,6 +33,9 @@ import (
"github.com/filecoin-project/lotus/storage/sealer/fsutil"
)

// time abow which a warn log will be emitted for slow PoSt reads
var SlowPoStCheckThreshold = 45 * time.Second

type LocalStorage interface {
GetStorage() (storiface.StorageConfig, error)
SetStorage(func(*storiface.StorageConfig)) error
Expand Down Expand Up @@ -959,23 +965,57 @@ func (st *Local) GenerateSingleVanillaProof(ctx context.Context, minerID abi.Act
if si.Update {
src, si, err := st.AcquireSector(ctx, sr, storiface.FTUpdate|storiface.FTUpdateCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
// Record the error with tags
ctx, _ = tag.New(ctx,
tag.Upsert(updateTagKey, fmt.Sprintf("%t", si.Update != "")),
tag.Upsert(cacheIDTagKey, ""),
tag.Upsert(sealedIDTagKey, ""),
)
stats.Record(ctx, GenerateSingleVanillaProofErrors.M(1))
return nil, xerrors.Errorf("acquire sector: %w", err)
}
cache, sealed = src.UpdateCache, src.Update
cacheID, sealedID = si.UpdateCache, si.Update
} else {
src, si, err := st.AcquireSector(ctx, sr, storiface.FTSealed|storiface.FTCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
// Record the error with tags
ctx, _ = tag.New(ctx,
tag.Upsert(updateTagKey, fmt.Sprintf("%t", si.Update != "")),
tag.Upsert(cacheIDTagKey, ""),
tag.Upsert(sealedIDTagKey, ""),
)
stats.Record(ctx, GenerateSingleVanillaProofErrors.M(1))
return nil, xerrors.Errorf("acquire sector: %w", err)
}
cache, sealed = src.Cache, src.Sealed
cacheID, sealedID = si.Cache, si.Sealed
}

if sealed == "" || cache == "" {
// Record the error with tags
ctx, _ = tag.New(ctx,
tag.Upsert(updateTagKey, fmt.Sprintf("%t", si.Update)),
tag.Upsert(cacheIDTagKey, cacheID),
tag.Upsert(sealedIDTagKey, sealedID),
)
stats.Record(ctx, GenerateSingleVanillaProofErrors.M(1))
return nil, errPathNotFound
}

// Add metrics context with tags
ctx, err := tag.New(ctx,
tag.Upsert(updateTagKey, fmt.Sprintf("%t", si.Update)),
tag.Upsert(cacheIDTagKey, cacheID),
tag.Upsert(sealedIDTagKey, sealedID),
)
if err != nil {
log.Errorw("failed to create tagged context", "err", err)
}

// Record that the function was called
stats.Record(ctx, GenerateSingleVanillaProofCalls.M(1))

psi := ffi.PrivateSectorInfo{
SectorInfo: proof.SectorInfo{
SealProof: si.SealProof,
Expand All @@ -996,11 +1036,23 @@ func (st *Local) GenerateSingleVanillaProof(ctx context.Context, minerID abi.Act

select {
case r := <-resCh:
// Record the duration upon successful completion
duration := time.Since(start).Milliseconds()
stats.Record(ctx, GenerateSingleVanillaProofDuration.M(duration))

if duration > SlowPoStCheckThreshold.Milliseconds() {
log.Warnw("slow GenerateSingleVanillaProof", "duration", duration, "cache-id", cacheID, "sealed-id", sealedID, "cache", cache, "sealed", sealed, "sector", si)
}

return r.Unwrap()
case <-ctx.Done():
log.Errorw("failed to generate valilla PoSt proof before context cancellation", "err", ctx.Err(), "duration", time.Since(start), "cache-id", cacheID, "sealed-id", sealedID, "cache", cache, "sealed", sealed)
// Record the duration and error if the context is canceled
duration := time.Since(start).Milliseconds()
stats.Record(ctx, GenerateSingleVanillaProofDuration.M(duration))
stats.Record(ctx, GenerateSingleVanillaProofErrors.M(1))
log.Errorw("failed to generate vanilla PoSt proof before context cancellation", "err", ctx.Err(), "duration", duration, "cache-id", cacheID, "sealed-id", sealedID, "cache", cache, "sealed", sealed)

// this will leave the GenerateSingleVanillaProof goroutine hanging, but that's still less bad than failing PoSt
// This will leave the GenerateSingleVanillaProof goroutine hanging, but that's still less bad than failing PoSt
return nil, xerrors.Errorf("failed to generate vanilla proof before context cancellation: %w", ctx.Err())
}
}
Expand Down
48 changes: 48 additions & 0 deletions lib/paths/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package paths

import (
"go.opencensus.io/stats"
"go.opencensus.io/stats/view"
"go.opencensus.io/tag"
)

var (
updateTagKey, _ = tag.NewKey("update")
cacheIDTagKey, _ = tag.NewKey("cache_id")
sealedIDTagKey, _ = tag.NewKey("sealed_id")

pre = "curio_stor_"

// Buckets for the duration histogram (in seconds)
durationBuckets = []float64{0.1, 1, 5, 12, 20, 60, 90, 150, 300, 500, 900, 1500, 3000, 6000, 15000, 30000, 60000, 90000, 200_000, 600_000, 1000_000}
)

var (
// Measures
GenerateSingleVanillaProofCalls = stats.Int64(pre+"generate_single_vanilla_proof_calls", "Number of calls to GenerateSingleVanillaProof", stats.UnitDimensionless)
GenerateSingleVanillaProofErrors = stats.Int64(pre+"generate_single_vanilla_proof_errors", "Number of errors in GenerateSingleVanillaProof", stats.UnitDimensionless)
GenerateSingleVanillaProofDuration = stats.Int64(pre+"generate_single_vanilla_proof_duration_seconds", "Duration of GenerateSingleVanillaProof in seconds", stats.UnitMilliseconds)
)

func init() {
err := view.Register(
&view.View{
Measure: GenerateSingleVanillaProofCalls,
Aggregation: view.Count(),
TagKeys: []tag.Key{updateTagKey, cacheIDTagKey, sealedIDTagKey},
},
&view.View{
Measure: GenerateSingleVanillaProofErrors,
Aggregation: view.Count(),
TagKeys: []tag.Key{updateTagKey, cacheIDTagKey, sealedIDTagKey},
},
&view.View{
Measure: GenerateSingleVanillaProofDuration,
Aggregation: view.Distribution(durationBuckets...),
TagKeys: []tag.Key{updateTagKey, cacheIDTagKey, sealedIDTagKey},
},
)
if err != nil {
panic(err)
}
}

0 comments on commit 3ca4d0c

Please sign in to comment.