-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
component: prometheus: enable/disable http jobs like rules #316
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -97,6 +97,73 @@ func (c *Component) ConfigComponentLogger(logger logr.Logger, component string, | |
return logger.WithName("DSC.Components." + component) | ||
} | ||
|
||
func getJobName(job map[any]any) (string, bool) { | ||
nameAny, ok := job["job_name"] | ||
if !ok { | ||
fmt.Println("Could not fetch job_name") | ||
return "", false | ||
} | ||
name, ok := nameAny.(string) | ||
if !ok { | ||
fmt.Println("job_name is not a string") | ||
return "", false | ||
} | ||
|
||
return name, true | ||
} | ||
|
||
func getJobIdx(scrapeConfigs *[]any, jobName string) (int, bool) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i am thinking about this function: so
can be
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's split it into 2 parts.
and it's a matter of taste. I found preparing the list and then update it in one place nicer. |
||
for i, j := range *scrapeConfigs { | ||
job, ok := j.(map[any]any) | ||
if !ok { | ||
fmt.Println("scrape_configs element is not array") | ||
return 0, false | ||
} | ||
name, ok := getJobName(job) | ||
if ok && name == jobName { | ||
return i, true | ||
} | ||
} | ||
return 0, false | ||
} | ||
|
||
func updateJob(prometheusContent *map[any]any, jobStr string, enable bool) { | ||
var job map[any]any | ||
|
||
if err := yaml.Unmarshal([]byte(jobStr), &job); err != nil { | ||
fmt.Printf("Error Unmarshaling job: %v\n", err) | ||
return | ||
} | ||
|
||
scrapeConfigsAny, ok := (*prometheusContent)["scrape_configs"] | ||
if !ok { | ||
fmt.Println("Could not fetch scrape_configs") | ||
return | ||
} | ||
|
||
scrapeConfigs, ok := scrapeConfigsAny.([]any) | ||
if !ok { | ||
fmt.Println("scrape_configs is not an array") | ||
return | ||
} | ||
|
||
name, ok := getJobName(job) | ||
if !ok { | ||
return | ||
} | ||
|
||
idx, exists := getJobIdx(&scrapeConfigs, name) | ||
switch { | ||
case enable && !exists: | ||
scrapeConfigs = append(scrapeConfigs, job) | ||
case !enable && exists: | ||
scrapeConfigs = append(scrapeConfigs[:idx], scrapeConfigs[idx+1:]...) | ||
default: | ||
return | ||
} | ||
(*prometheusContent)["scrape_configs"] = scrapeConfigs | ||
} | ||
|
||
// UpdatePrometheusConfig update prometheus-configs.yaml to include/exclude <component>.rules | ||
// parameter enable when set to true to add new rules, when set to false to remove existing rules. | ||
func (c *Component) UpdatePrometheusConfig(_ client.Client, enable bool, component string) error { | ||
|
@@ -116,19 +183,23 @@ func (c *Component) UpdatePrometheusConfig(_ client.Client, enable bool, compone | |
DeadManSnitchRules string `yaml:"deadmanssnitch-alerting.rules"` | ||
DashboardRRules string `yaml:"rhods-dashboard-recording.rules"` | ||
DashboardARules string `yaml:"rhods-dashboard-alerting.rules"` | ||
DashboardJob string `yaml:"rhods-dashboard-job"` | ||
DSPRRules string `yaml:"data-science-pipelines-operator-recording.rules"` | ||
DSPARules string `yaml:"data-science-pipelines-operator-alerting.rules"` | ||
DSPJob string `yaml:"data-science-pipelines-operator-job"` | ||
MMRRules string `yaml:"model-mesh-recording.rules"` | ||
MMARules string `yaml:"model-mesh-alerting.rules"` | ||
OdhModelRRules string `yaml:"odh-model-controller-recording.rules"` | ||
OdhModelARules string `yaml:"odh-model-controller-alerting.rules"` | ||
CFORRules string `yaml:"codeflare-recording.rules"` | ||
CFOARules string `yaml:"codeflare-alerting.rules"` | ||
CFOJob string `yaml:"codeflare-job"` | ||
RayARules string `yaml:"ray-alerting.rules"` | ||
KueueARules string `yaml:"kueue-alerting.rules"` | ||
TrainingOperatorARules string `yaml:"trainingoperator-alerting.rules"` | ||
WorkbenchesRRules string `yaml:"workbenches-recording.rules"` | ||
WorkbenchesARules string `yaml:"workbenches-alerting.rules"` | ||
WorkbenchesJob string `yaml:"workbenches-job"` | ||
TrustyAIRRules string `yaml:"trustyai-recording.rules"` | ||
TrustyAIARules string `yaml:"trustyai-alerting.rules"` | ||
KserveRRules string `yaml:"kserve-recording.rules"` | ||
|
@@ -179,6 +250,17 @@ func (c *Component) UpdatePrometheusConfig(_ client.Client, enable bool, compone | |
} | ||
} | ||
|
||
job, ok := map[string]string{ | ||
"codeflare": configMap.Data.CFOJob, | ||
"data-science-pipelines-operator": configMap.Data.DSPJob, | ||
"rhods-dashboard": configMap.Data.DashboardJob, | ||
"workbenches": configMap.Data.WorkbenchesJob, | ||
}[component] | ||
|
||
if ok { | ||
updateJob(&prometheusContent, job, enable) | ||
} | ||
|
||
// Marshal back | ||
newDataYAML, err := yaml.Marshal(&prometheusContent) | ||
if err != nil { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
have not touched the detail of this PR, but should
component.go
change get into ODH first?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
100%
It's a branch to test it first.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was thinking that in downstream they anyway should come as one patch (upstream changes do not break anything there, no problem), but looks like having the code without prometheus changes even if does not work as expected, does not break existing setup.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah, this is similar situation to MR's PR.
we can use this one to test first and backport to ODH as long as we have the logic applied to both upstream and downstream.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll split it for the real submission, it's fine.