Skip to content

Commit c86e736

Browse files
committed
feat: Add a rds-instance-stop chaos fault
Signed-off-by: Jongwoo Han <[email protected]>
1 parent fc646d6 commit c86e736

File tree

10 files changed

+808
-0
lines changed

10 files changed

+808
-0
lines changed

bin/experiment/experiment.go

+3
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ import (
5656
ebsLossByTag "github.com/litmuschaos/litmus-go/experiments/kube-aws/ebs-loss-by-tag/experiment"
5757
ec2TerminateByID "github.com/litmuschaos/litmus-go/experiments/kube-aws/ec2-terminate-by-id/experiment"
5858
ec2TerminateByTag "github.com/litmuschaos/litmus-go/experiments/kube-aws/ec2-terminate-by-tag/experiment"
59+
rdsInstanceStop "github.com/litmuschaos/litmus-go/experiments/kube-aws/rds-instance-stop/experiment"
5960
k6Loadgen "github.com/litmuschaos/litmus-go/experiments/load/k6-loadgen/experiment"
6061
springBootFaults "github.com/litmuschaos/litmus-go/experiments/spring-boot/spring-boot-faults/experiment"
6162
vmpoweroff "github.com/litmuschaos/litmus-go/experiments/vmware/vm-poweroff/experiment"
@@ -149,6 +150,8 @@ func main() {
149150
ebsLossByID.EBSLossByID(clients)
150151
case "ebs-loss-by-tag":
151152
ebsLossByTag.EBSLossByTag(clients)
153+
case "rds-instance-stop":
154+
rdsInstanceStop.RDSInstanceStop(clients)
152155
case "node-restart":
153156
nodeRestart.NodeRestart(clients)
154157
case "pod-dns-error":
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
package lib
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"os/signal"
7+
"strings"
8+
"syscall"
9+
"time"
10+
11+
"github.com/litmuschaos/litmus-go/pkg/cerrors"
12+
awslib "github.com/litmuschaos/litmus-go/pkg/cloud/aws/rds"
13+
"github.com/litmuschaos/litmus-go/pkg/events"
14+
experimentTypes "github.com/litmuschaos/litmus-go/pkg/kube-aws/rds-instance-stop/types"
15+
"github.com/litmuschaos/litmus-go/pkg/probe"
16+
"github.com/palantir/stacktrace"
17+
18+
"github.com/litmuschaos/litmus-go/pkg/clients"
19+
"github.com/litmuschaos/litmus-go/pkg/log"
20+
"github.com/litmuschaos/litmus-go/pkg/types"
21+
"github.com/litmuschaos/litmus-go/pkg/utils/common"
22+
)
23+
24+
var (
25+
err error
26+
inject, abort chan os.Signal
27+
)
28+
29+
func PrepareRDSInstanceStop(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error {
30+
31+
// Inject channel is used to transmit signal notifications.
32+
inject = make(chan os.Signal, 1)
33+
// Catch and relay certain signal(s) to inject channel.
34+
signal.Notify(inject, os.Interrupt, syscall.SIGTERM)
35+
36+
// Abort channel is used to transmit signal notifications.
37+
abort = make(chan os.Signal, 1)
38+
// Catch and relay certain signal(s) to abort channel.
39+
signal.Notify(abort, os.Interrupt, syscall.SIGTERM)
40+
41+
// Waiting for the ramp time before chaos injection
42+
if experimentsDetails.RampTime != 0 {
43+
log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime)
44+
common.WaitForDuration(experimentsDetails.RampTime)
45+
}
46+
47+
// Get the instance identifier or list of instance identifiers
48+
instanceIdentifierList := strings.Split(experimentsDetails.RDSInstanceIdentifier, ",")
49+
if experimentsDetails.RDSInstanceIdentifier == "" || len(instanceIdentifierList) == 0 {
50+
return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no RDS instance identifier found to stop"}
51+
}
52+
53+
instanceIdentifierList = common.FilterBasedOnPercentage(experimentsDetails.InstanceAffectedPerc, instanceIdentifierList)
54+
log.Infof("[Chaos]:Number of Instance targeted: %v", len(instanceIdentifierList))
55+
56+
// Watching for the abort signal and revert the chaos
57+
go abortWatcher(experimentsDetails, instanceIdentifierList, chaosDetails)
58+
59+
switch strings.ToLower(experimentsDetails.Sequence) {
60+
case "serial":
61+
if err = injectChaosInSerialMode(experimentsDetails, instanceIdentifierList, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
62+
return stacktrace.Propagate(err, "could not run chaos in serial mode")
63+
}
64+
case "parallel":
65+
if err = injectChaosInParallelMode(experimentsDetails, instanceIdentifierList, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
66+
return stacktrace.Propagate(err, "could not run chaos in parallel mode")
67+
}
68+
default:
69+
return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)}
70+
}
71+
72+
// Waiting for the ramp time after chaos injection
73+
if experimentsDetails.RampTime != 0 {
74+
log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime)
75+
common.WaitForDuration(experimentsDetails.RampTime)
76+
}
77+
return nil
78+
}
79+
80+
// injectChaosInSerialMode will inject the rds instance termination in serial mode that is one after other
81+
func injectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetails, instanceIdentifierList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error {
82+
83+
select {
84+
case <-inject:
85+
// Stopping the chaos execution, if abort signal received
86+
os.Exit(0)
87+
default:
88+
// ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin
89+
ChaosStartTimeStamp := time.Now()
90+
duration := int(time.Since(ChaosStartTimeStamp).Seconds())
91+
92+
for duration < experimentsDetails.ChaosDuration {
93+
94+
log.Infof("[Info]: Target instance identifier list, %v", instanceIdentifierList)
95+
96+
if experimentsDetails.EngineName != "" {
97+
msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on rds instance"
98+
types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails)
99+
events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine")
100+
}
101+
102+
for i, identifier := range instanceIdentifierList {
103+
104+
// Deleting the RDS instance
105+
log.Info("[Chaos]: Stopping the desired RDS instance")
106+
if err := awslib.RDSInstanceStop(identifier, experimentsDetails.Region); err != nil {
107+
return stacktrace.Propagate(err, "rds instance failed to stop")
108+
}
109+
110+
common.SetTargets(identifier, "injected", "RDS", chaosDetails)
111+
112+
// Wait for rds instance to completely stop
113+
log.Infof("[Wait]: Wait for RDS instance '%v' to get in stopped state", identifier)
114+
if err := awslib.WaitForRDSInstanceDown(experimentsDetails.Timeout, experimentsDetails.Delay, identifier, experimentsDetails.Region); err != nil {
115+
return stacktrace.Propagate(err, "rds instance failed to stop")
116+
}
117+
118+
// Run the probes during chaos
119+
// the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration
120+
if len(resultDetails.ProbeDetails) != 0 && i == 0 {
121+
if err = probe.RunProbes(chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil {
122+
return stacktrace.Propagate(err, "failed to run probes")
123+
}
124+
}
125+
126+
// Wait for chaos interval
127+
log.Infof("[Wait]: Waiting for chaos interval of %vs", experimentsDetails.ChaosInterval)
128+
time.Sleep(time.Duration(experimentsDetails.ChaosInterval) * time.Second)
129+
130+
// Starting the RDS instance
131+
log.Info("[Chaos]: Starting back the RDS instance")
132+
if err = awslib.RDSInstanceStart(identifier, experimentsDetails.Region); err != nil {
133+
return stacktrace.Propagate(err, "rds instance failed to start")
134+
}
135+
136+
// Wait for rds instance to get in available state
137+
log.Infof("[Wait]: Wait for RDS instance '%v' to get in available state", identifier)
138+
if err := awslib.WaitForRDSInstanceUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.Region, identifier); err != nil {
139+
return stacktrace.Propagate(err, "rds instance failed to start")
140+
}
141+
142+
common.SetTargets(identifier, "reverted", "RDS", chaosDetails)
143+
}
144+
duration = int(time.Since(ChaosStartTimeStamp).Seconds())
145+
}
146+
}
147+
return nil
148+
}
149+
150+
// injectChaosInParallelMode will inject the rds instance termination in parallel mode that is all at once
151+
func injectChaosInParallelMode(experimentsDetails *experimentTypes.ExperimentDetails, instanceIdentifierList []string, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error {
152+
153+
select {
154+
case <-inject:
155+
// stopping the chaos execution, if abort signal received
156+
os.Exit(0)
157+
default:
158+
//ChaosStartTimeStamp contains the start timestamp, when the chaos injection begin
159+
ChaosStartTimeStamp := time.Now()
160+
duration := int(time.Since(ChaosStartTimeStamp).Seconds())
161+
162+
for duration < experimentsDetails.ChaosDuration {
163+
164+
log.Infof("[Info]: Target instance identifier list, %v", instanceIdentifierList)
165+
166+
if experimentsDetails.EngineName != "" {
167+
msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on rds instance"
168+
types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails)
169+
events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine")
170+
}
171+
172+
// PowerOff the instance
173+
for _, identifier := range instanceIdentifierList {
174+
// Stopping the RDS instance
175+
log.Info("[Chaos]: Stopping the desired RDS instance")
176+
if err := awslib.RDSInstanceStop(identifier, experimentsDetails.Region); err != nil {
177+
return stacktrace.Propagate(err, "rds instance failed to stop")
178+
}
179+
common.SetTargets(identifier, "injected", "RDS", chaosDetails)
180+
}
181+
182+
for _, identifier := range instanceIdentifierList {
183+
// Wait for rds instance to completely stop
184+
log.Infof("[Wait]: Wait for RDS instance '%v' to get in stopped state", identifier)
185+
if err := awslib.WaitForRDSInstanceDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.Region, identifier); err != nil {
186+
return stacktrace.Propagate(err, "rds instance failed to stop")
187+
}
188+
common.SetTargets(identifier, "reverted", "RDS", chaosDetails)
189+
}
190+
191+
// Run the probes during chaos
192+
if len(resultDetails.ProbeDetails) != 0 {
193+
if err := probe.RunProbes(chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil {
194+
return stacktrace.Propagate(err, "failed to run probes")
195+
}
196+
}
197+
198+
// Wait for chaos interval
199+
log.Infof("[Wait]: Waiting for chaos interval of %vs", experimentsDetails.ChaosInterval)
200+
time.Sleep(time.Duration(experimentsDetails.ChaosInterval) * time.Second)
201+
202+
// Starting the RDS instance
203+
for _, identifier := range instanceIdentifierList {
204+
log.Info("[Chaos]: Starting back the RDS instance")
205+
if err = awslib.RDSInstanceStart(identifier, experimentsDetails.Region); err != nil {
206+
return stacktrace.Propagate(err, "rds instance failed to start")
207+
}
208+
}
209+
210+
for _, identifier := range instanceIdentifierList {
211+
// Wait for rds instance to get in available state
212+
log.Infof("[Wait]: Wait for RDS instance '%v' to get in available state", identifier)
213+
if err := awslib.WaitForRDSInstanceUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.Region, identifier); err != nil {
214+
return stacktrace.Propagate(err, "rds instance failed to start")
215+
}
216+
}
217+
218+
for _, identifier := range instanceIdentifierList {
219+
common.SetTargets(identifier, "reverted", "RDS", chaosDetails)
220+
}
221+
duration = int(time.Since(ChaosStartTimeStamp).Seconds())
222+
}
223+
}
224+
return nil
225+
}
226+
227+
// watching for the abort signal and revert the chaos
228+
func abortWatcher(experimentsDetails *experimentTypes.ExperimentDetails, instanceIdentifierList []string, chaosDetails *types.ChaosDetails) {
229+
230+
<-abort
231+
232+
log.Info("[Abort]: Chaos Revert Started")
233+
for _, identifier := range instanceIdentifierList {
234+
instanceState, err := awslib.GetRDSInstanceStatus(identifier, experimentsDetails.Region)
235+
if err != nil {
236+
log.Errorf("Failed to get instance status when an abort signal is received: %v", err)
237+
}
238+
if instanceState != "running" {
239+
240+
log.Info("[Abort]: Waiting for the RDS instance to get down")
241+
if err := awslib.WaitForRDSInstanceDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.Region, identifier); err != nil {
242+
log.Errorf("Unable to wait till stop of the instance: %v", err)
243+
}
244+
245+
log.Info("[Abort]: Starting RDS instance as abort signal received")
246+
err := awslib.RDSInstanceStart(identifier, experimentsDetails.Region)
247+
if err != nil {
248+
log.Errorf("RDS instance failed to start when an abort signal is received: %v", err)
249+
}
250+
}
251+
common.SetTargets(identifier, "reverted", "RDS", chaosDetails)
252+
}
253+
log.Info("[Abort]: Chaos Revert Completed")
254+
os.Exit(1)
255+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
## Experiment Metadata
2+
3+
<table>
4+
<tr>
5+
<th> Name </th>
6+
<th> Description </th>
7+
<th> Documentation Link </th>
8+
</tr>
9+
<tr>
10+
<td> RDS Instance Stop </td>
11+
<td> This experiment causes termination of an RDS instance before bringing it back to available state using the instance identifier after the specified chaos duration. We can also control the number of target instance using instance affected percentage</td>
12+
<td> <a href="https://litmuschaos.github.io/litmus/experiments/categories/aws/rds-instance-stop/"> Here </a> </td>
13+
</tr>
14+
</table>

0 commit comments

Comments
 (0)