Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: webui/snap: Button for resuming all stuck sectors #230

Merged
merged 2 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 86 additions & 1 deletion web/api/webrpc/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package webrpc
import (
"context"

"golang.org/x/xerrors"

"github.com/filecoin-project/go-state-types/abi"

"github.com/filecoin-project/curio/tasks/snap"
Expand Down Expand Up @@ -40,7 +42,7 @@ func (a *WebRPC) UpgradeSectors(ctx context.Context) ([]UpgradeSector, error) {
return sectors, nil
}

func (a *WebRPC) UpgradeResetTaskIDs(ctx context.Context, spid, sectorNum uint64) error {
func (a *WebRPC) UpgradeResetTaskIDs(ctx context.Context, spid, sectorNum int64) error {
_, err := a.deps.DB.Exec(ctx, `SELECT unset_task_id_snap($1, $2)`, spid, sectorNum)
return err
}
Expand All @@ -54,3 +56,86 @@ func (a *WebRPC) UpgradeDelete(ctx context.Context, spid, sectorNum uint64) erro
_, err := a.deps.DB.Exec(ctx, `DELETE FROM sectors_snap_pipeline WHERE sp_id = $1 AND sector_number = $2`, spid, sectorNum)
return err
}

type snapMissingTask struct {
SpID int64 `db:"sp_id"`
SectorNumber int64 `db:"sector_number"`
AllTaskIDs []int64 `db:"all_task_ids"`
MissingTaskIDs []int64 `db:"missing_task_ids"`
TotalTasks int `db:"total_tasks"`
MissingTasksCount int `db:"missing_tasks_count"`
RestartStatus string `db:"restart_status"`
}

func (smt snapMissingTask) sectorID() abi.SectorID {
return abi.SectorID{Miner: abi.ActorID(smt.SpID), Number: abi.SectorNumber(smt.SectorNumber)}
}

func (a *WebRPC) pipelineSnapMissingTasks(ctx context.Context) ([]snapMissingTask, error) {
var tasks []snapMissingTask
err := a.deps.DB.Select(ctx, &tasks, `
WITH sector_tasks AS (
SELECT
sp.sp_id,
sp.sector_number,
get_snap_pipeline_tasks(sp.sp_id, sp.sector_number) AS task_ids
FROM
sectors_snap_pipeline sp
),
missing_tasks AS (
SELECT
st.sp_id,
st.sector_number,
st.task_ids,
array_agg(CASE WHEN ht.id IS NULL THEN task_id ELSE NULL END) AS missing_task_ids
FROM
sector_tasks st
CROSS JOIN UNNEST(st.task_ids) WITH ORDINALITY AS t(task_id, task_order)
LEFT JOIN harmony_task ht ON ht.id = task_id
GROUP BY
st.sp_id, st.sector_number, st.task_ids
)
SELECT
mt.sp_id,
mt.sector_number,
mt.task_ids AS all_task_ids,
mt.missing_task_ids,
array_length(mt.task_ids, 1) AS total_tasks,
array_length(mt.missing_task_ids, 1) AS missing_tasks_count,
CASE
WHEN array_length(mt.task_ids, 1) = array_length(mt.missing_task_ids, 1) THEN 'All tasks missing'
ELSE 'Some tasks missing'
END AS restart_status
FROM
missing_tasks mt
WHERE
array_length(mt.task_ids, 1) > 0 -- Has at least one task
AND array_length(array_remove(mt.missing_task_ids, NULL), 1) > 0 -- At least one task is missing
ORDER BY
mt.sp_id, mt.sector_number;`)
if err != nil {
return nil, xerrors.Errorf("failed to fetch missing SNAP tasks: %w", err)
}

return tasks, nil
}

func (a *WebRPC) PipelineSnapRestartAll(ctx context.Context) error {
missing, err := a.pipelineSnapMissingTasks(ctx)
if err != nil {
return err
}

for _, mt := range missing {
if len(mt.AllTaskIDs) != len(mt.MissingTaskIDs) || len(mt.MissingTaskIDs) == 0 {
continue
}

log.Infow("Restarting SNAP sector", "sector", mt.sectorID(), "missing_tasks", mt.MissingTasksCount)

if err := a.UpgradeResetTaskIDs(ctx, mt.SpID, mt.SectorNumber); err != nil {
return err
}
}
return nil
}
15 changes: 14 additions & 1 deletion web/static/snap/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,26 @@
<title>Upgrade Pipeline</title>
<script type="module" src="/ux/curio-ux.mjs"></script>
<script type="module" src="upgrade-sectors.mjs"></script>
<script type="module" src="restart-all-snap-button.mjs"></script>
</head>

<body style="visibility:hidden" data-bs-theme="dark">
<curio-ux>
<section class="section">
<div class="app-head">
<div class="head-left">
<h1>Upgrading Sectors</h1>
</div>
<hr />
</div>
<div class="row">
<div class="row-md-auto" style="width: 50%">
<div class="info-block">
<restart-all-snap-button></restart-all-snap-button>
</div>
</div>
</div>
<div class="row">
<h1>Upgrading Sectors</h1>
<div class="col-md-auto" style="max-width: 95%">
<upgrade-sectors></upgrade-sectors>
</div>
Expand Down
40 changes: 40 additions & 0 deletions web/static/snap/restart-all-snap-button.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { LitElement, html } from 'https://cdn.jsdelivr.net/gh/lit/dist@3/all/lit-all.min.js';
import RPCCall from '/lib/jsonrpc.mjs';

class RestartAllSnapButton extends LitElement {
static properties = {
isProcessing: { type: Boolean },
};

constructor() {
super();
this.isProcessing = false;
}

render() {
return html`
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3" crossorigin="anonymous">
<button
@click="${this.handleClick}"
class="btn ${this.isProcessing ? 'btn-secondary' : 'btn-primary'}"
?disabled="${this.isProcessing}"
>
${this.isProcessing ? 'Processing...' : 'Resume All'}
</button>
`;
}

async handleClick() {
this.isProcessing = true;
try {
await RPCCall('PipelineSnapRestartAll', []);
console.log('Resume All operation completed successfully');
} catch (error) {
console.error('Error during Resume All operation:', error);
} finally {
this.isProcessing = false;
}
}
}

customElements.define('restart-all-snap-button', RestartAllSnapButton);