3 * Copyright 2016-2018 The nlfaultinjection Authors.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
21 * Implementation of the fault-injection utilities.
24 #ifndef __STDC_LIMIT_MACROS
25 #define __STDC_LIMIT_MACROS
33 #include <nlfaultinjection.hpp>
37 namespace FaultInjection {
39 static void Die(void) __attribute__((noreturn));
41 static GlobalContext *sGlobalContext = NULL;
44 * The callback function that implements the deterministic
45 * injection feature (see FailAtFault).
47 static bool DeterministicCbFn(Identifier aId,
56 if (aRecord->mNumCallsToSkip)
58 aRecord->mNumCallsToSkip--;
60 else if (aRecord->mNumCallsToFail)
62 aRecord->mNumCallsToFail--;
70 * Callback list node for DeterministicCbFn.
71 * This node terminates all callback lists.
73 static Callback sDeterministicCb = { DeterministicCbFn, NULL, NULL };
76 * The callback function that implements the random
77 * injection feature (see FailRandomlyAtFault).
79 static bool RandomCbFn(Identifier aId,
88 if (aRecord->mPercentage > 0)
90 int randValue = (rand() % 100) + 1;
91 if (randValue <= aRecord->mPercentage)
101 * Callback list node for RandomCbFn.
102 * Note that this is initialized to point to sDeterministicCb.
103 * All Record instances are initialized to point to
104 * this callback node.
106 static Callback sRandomCb = { RandomCbFn, NULL, &sDeterministicCb };
109 * Alias for the address of the first default callback.
111 static const Callback *sEndOfCustomCallbacks = &sRandomCb;
114 * Initialize the Manager instance.
116 * @param[in] inNumFaults The size of inFaultArray, equal to the number of fault IDs.
117 * @param[in] inFaultArray A pointer to an array of Record, in which this object
118 * will store the configuration of each fault.
119 * @param[in] inManagerName A pointer to a C string containing the name of the Manager.
120 * @param[in] inFaultNames A pointer to an array of inNumFaults C strings that describe
123 * @return -EINVAL if the inputs are not valid.
126 int32_t Manager::Init(size_t inNumFaults,
127 Record *inFaultArray,
129 const Name *inFaultNames)
134 nlEXPECT_ACTION((inNumFaults > 0 && inFaultArray && inManagerName && inFaultNames), exit, err = -EINVAL);
136 mName = inManagerName;
137 mNumFaults = inNumFaults;
138 mFaultRecords = inFaultArray;
139 mFaultNames = inFaultNames;
144 // Link all callback lists to the two default callbacks.
145 for (i = 0; i < mNumFaults; i++)
147 mFaultRecords[i].mCallbackList = &sRandomCb;
155 * Configure a fault to be triggered randomly, with a given probability defined as a percentage
156 * This is meant to be used on live systems to generate a build that will encounter random failures.
158 * @param[in] inId The fault ID
159 * @param[in] inPercentage An integer between 0 and 100. 100 means "always". 0 means "never".
161 * @return -EINVAL if the inputs are not valid.
164 int32_t Manager::FailRandomlyAtFault(Identifier inId,
165 uint8_t inPercentage)
169 nlEXPECT_ACTION((inId < mNumFaults && inPercentage <= 100),
175 mFaultRecords[inId].mNumCallsToSkip = 0;
176 mFaultRecords[inId].mNumCallsToFail = 0;
177 mFaultRecords[inId].mPercentage = inPercentage;
186 * Configure a fault to be triggered deterministically.
188 * @param[in] inId The fault ID
189 * @param[in] inNumCallsToSkip The number of times this fault is to be skipped before it
191 * @param[in] inNumCallsToFail The number of times the fault should be triggered.
192 * @param[in] inTakeMutex By default this method takes the Manager's mutex.
193 * If inTakeMutex is set to kMutexDoNotTake, the mutex is not taken.
195 * @return -EINVAL if the inputs are not valid.
198 int32_t Manager::FailAtFault(Identifier inId,
199 uint32_t inNumCallsToSkip,
200 uint32_t inNumCallsToFail,
205 nlEXPECT_ACTION(inId < mNumFaults && inNumCallsToSkip <= UINT16_MAX && inNumCallsToFail <= UINT16_MAX, exit, err = -EINVAL);
212 mFaultRecords[inId].mNumCallsToSkip = static_cast<uint16_t>(inNumCallsToSkip);
213 mFaultRecords[inId].mNumCallsToFail = static_cast<uint16_t>(inNumCallsToFail);
214 mFaultRecords[inId].mPercentage = 0;
226 * @overload int32_t FailAtFault(Identifier inId, uint32_t inNumCallsToSkip, uint32_t inNumCallsToFail, bool inTakeMutex)
228 int32_t Manager::FailAtFault(Identifier inId,
229 uint32_t inNumCallsToSkip,
230 uint32_t inNumCallsToFail)
232 return FailAtFault(inId, inNumCallsToSkip, inNumCallsToFail, kMutexTake);
236 * Configure a fault to reboot the system when triggered.
237 * If the application has installed a RebootCallbackFn, it will
238 * be invoked when fault inId is triggered.
239 * If the application has not installed the callback, the system
242 * @param[in] inId The fault ID
244 * @return -EINVAL if the inputs are not valid.
247 int32_t Manager::RebootAtFault(Identifier inId)
251 nlEXPECT_ACTION(inId < mNumFaults, exit, err = -EINVAL);
255 mFaultRecords[inId].mReboot = true;
264 * Store a set of arguments for a given fault ID.
265 * The array of arguments is made available to the code injected with
266 * the nlFAULT_INJECT macro.
267 * For this to work for a given fault ID, the Manager must allocate memory to
268 * store the arguments and configure the Record's mLengthOfArguments and
269 * mArguments members accordingly.
271 * @param[in] inId The fault ID
272 * @param[in] inNumArgs The number of arguments in the array pointed to by inArgs.
273 * @param[in] inArgs The pointer to the array of integers to be stored in the fault
275 * @return -EINVAL if the inputs are not valid.
278 int32_t Manager::StoreArgsAtFault(Identifier inId, uint16_t inNumArgs, int32_t *inArgs)
283 nlEXPECT_ACTION(inId < mNumFaults &&
284 mFaultRecords[inId].mArguments != NULL &&
285 mFaultRecords[inId].mLengthOfArguments >= inNumArgs &&
286 inNumArgs <= UINT8_MAX,
292 for (i = 0; i < inNumArgs; i++)
294 mFaultRecords[inId].mArguments[i] = inArgs[i];
297 mFaultRecords[inId].mNumArguments = static_cast<uint8_t>(inNumArgs);
306 * Attach a callback to a fault ID.
307 * Calling this twice does not attach the callback twice.
309 * @param[in] inId The fault ID
310 * @param[in] inCallback The callback node to be attached to the fault
313 * @return -EINVAL if the inputs are not valid.
316 int32_t Manager::InsertCallbackAtFault(Identifier inId,
317 Callback *inCallBack)
321 // Make sure it's not already there
322 err = RemoveCallbackAtFault(inId, inCallBack);
324 nlEXPECT_SUCCESS(err, exit);
328 // Insert the callback at the beginning of the list.
329 // Remember that all lists end into the two default (deterministic
330 // and random) callbacks!
331 inCallBack->mNext = mFaultRecords[inId].mCallbackList;
332 mFaultRecords[inId].mCallbackList = inCallBack;
341 * Detaches a callback from a fault.
343 * @param[in] inId The fault
344 * @param[in] inCallback The callback node to be removed.
345 * @param[in] inTakeMutex By default this method takes the Manager's mutex.
346 * If inTakeMutex is set to kMutexDoNotTake, the mutex is not taken.
348 * @return -EINVAL if the inputs are not valid.
351 int32_t Manager::RemoveCallbackAtFault(Identifier inId,
352 Callback *inCallBack,
356 Callback **cb = NULL;
358 nlEXPECT_ACTION((inId < mNumFaults) && (inCallBack != NULL), exit, err = -EINVAL);
365 cb = &mFaultRecords[inId].mCallbackList;
369 if (*cb == inCallBack)
374 cb = &((*cb)->mNext);
387 * @overload int32_t Manager::RemoveCallbackAtFault(Identifier inId, Callback *inCallBack, bool inTakeMutex)
389 int32_t Manager::RemoveCallbackAtFault(Identifier inId,
390 Callback *inCallBack)
392 return RemoveCallbackAtFault(inId, inCallBack, kMutexTake);
396 * When the program traverses the location at which a fault should be injected, this method is invoked
397 * on the manager to query the configuration of the fault ID.
399 * A fault can be triggered randomly, deterministically or on a call-by-call basis by a callback.
400 * All three types of trigger can be installed at the same time, and they all get a chance of
401 * injecting the fault.
403 * @param[in] inId The fault ID
404 * @param[in] inTakeMutex By default this method takes the Manager's mutex.
405 * If inTakeMutex is set to kMutexDoNotTake, the mutex is not taken.
407 * @return true if the fault should be injected; false otherwise.
409 bool Manager::CheckFault(Identifier inId, bool inTakeMutex)
413 Callback *next = NULL;
416 nlEXPECT(inId < mNumFaults, exit);
423 cb = mFaultRecords[inId].mCallbackList;
427 // Save mNext now, in case the callback removes itself
428 // calling RemoveCallbackAtFault
430 if (cb->mCallBackFn(inId, &mFaultRecords[inId], cb->mContext))
437 reboot = mFaultRecords[inId].mReboot;
439 if (retval && sGlobalContext && sGlobalContext->mCbTable.mPostInjectionCb)
441 sGlobalContext->mCbTable.mPostInjectionCb(this, inId, &mFaultRecords[inId]);
444 if (retval && reboot)
446 // If the application has not setup a context and/or reboot callback, the system will crash
447 if (sGlobalContext && sGlobalContext->mCbTable.mRebootCb)
449 sGlobalContext->mCbTable.mRebootCb();
457 mFaultRecords[inId].mNumTimesChecked++;
469 * @overload bool CheckFault(Identifier inId, bool inTakeMutex)
471 bool Manager::CheckFault(Identifier inId)
473 return CheckFault(inId, kMutexTake);
477 * When the program traverses the location at which a fault should be injected, this method is invoked
478 * on the manager to query the configuration of the fault ID.
480 * This version of the method retrieves the arguments stored in the Record.
482 * A fault can be triggered randomly, deterministically or on a call-by-call basis by a callback.
483 * All three types of trigger can be installed at the same time, and they all get a chance of
484 * injecting the fault.
486 * @param[in] inId The fault ID
487 * @param[in] outNumArgs The length of the array pointed to by outArgs
488 * @param[in] outArgs The array of arguments configured for the faultId
489 * @param[in] inTakeMutex By default this method takes the Manager's mutex.
490 * If inTakeMutex is set to kMutexDoNotTake, the mutex is not taken.
492 * @return true if the fault should be injected; false otherwise.
494 bool Manager::CheckFault(Identifier inId, uint16_t &outNumArgs, int32_t *&outArgs, bool inTakeMutex)
503 retval = CheckFault(inId, kMutexDoNotTake);
506 outNumArgs = mFaultRecords[inId].mNumArguments;
507 outArgs = mFaultRecords[inId].mArguments;
519 * @overload bool CheckFault(Identifier inId, uint16_t &outNumArgs, int32_t *&outArgs, bool inTakeMutex)
521 bool Manager::CheckFault(Identifier inId, uint16_t &outNumArgs, int32_t *&outArgs)
523 return CheckFault(inId, outNumArgs, outArgs, kMutexTake);
527 * Reset the counters in the fault Records
528 * Note that calling this method does not impact the current configuration
529 * in any way (including the number of times a fault is to be skipped
530 * before it should fail).
532 void Manager::ResetFaultCounters(void)
538 for (id = 0; id < mNumFaults; id++)
540 mFaultRecords[id].mNumTimesChecked = 0;
547 * Reset the configuration of a fault Record
549 * @param[in] inId The fault ID
551 * @return -EINVAL if the inputs are not valid.
554 int32_t Manager::ResetFaultConfigurations(Identifier inId)
559 nlEXPECT_ACTION((inId < mNumFaults),
565 mFaultRecords[inId].mNumCallsToSkip = 0;
566 mFaultRecords[inId].mNumCallsToFail = 0;
567 mFaultRecords[inId].mPercentage = 0;
568 mFaultRecords[inId].mReboot = 0;
569 mFaultRecords[inId].mNumArguments = 0;
571 cb = mFaultRecords[inId].mCallbackList;
572 // All callback handling code in this module is based on the assumption
573 // that custom callbacks are inserted at the beginning of the list
574 while (cb != sEndOfCustomCallbacks && cb != NULL)
576 (void)RemoveCallbackAtFault(inId, cb, kMutexDoNotTake);
577 cb = mFaultRecords[inId].mCallbackList;
587 * Reset the configuration of all fault Records
589 * @return -EINVAL if the inputs are not valid.
592 int32_t Manager::ResetFaultConfigurations(void)
597 for (id = 0; id < mNumFaults; id++)
599 err = ResetFaultConfigurations(id);
600 nlEXPECT(err == 0, exit);
608 * Take the Manager's mutex.
610 void Manager::Lock(void)
619 * Release the Manager's mutex.
621 void Manager::Unlock(void)
625 mUnlock(mLockContext);
630 * Configure the instance of GlobalContext to use.
631 * On systems in which faults are configured and injected from different threads,
632 * this function should be called before threads are started.
634 * @param[in] inGlobalContext Pointer to the GlobalContext provided by the application
636 void SetGlobalContext(GlobalContext *inGlobalContext)
638 sGlobalContext = inGlobalContext;
644 * This implementation does not check for ERANGE, as it assumes a very simple
645 * underlying implementation of strtol.
647 * @param[in] str Pointer to a string representing an integer
649 * @param[out] num Pointer to the integer result
651 * @return true in case of success; false if the string does not
652 * contain an integer.
654 static bool ParseInt(const char *str, int32_t *num)
660 tmp = strtol(str, &endptr, 10);
661 if (!endptr || *endptr != '\0')
667 *num = static_cast<int32_t>(tmp);
674 * Parse an unsigned integer
676 * @param[in] str Pointer to a string representing an insigned int
678 * @param[out] num Pointer to the unsigned integer result
680 * @return true in case of success; false if the string does not
681 * contain an unsigned integer.
683 static bool ParseUInt(const char *str, uint32_t *num)
688 retval = ParseInt(str, &tmpint);
697 *num = static_cast<uint32_t>(tmpint);
705 * Parse a fault-injection configuration string and apply the configuration.
707 * @param[in] aFaultInjectionStr The configuration string. An example of a valid string that
708 * enables two faults is "system_buffer_f5_s1:inet_send_p33"
709 * An example of a configuration string that
710 * also passes three integer arguments to the fault point is
711 * "system_buffer_f5_s1_a10_a7_a-4"
713 * "<module>_<fault>_{f<numTimesToFail>[_s<numTimesToSkip>],p<randomFailurePercentage>}[_a<integer>]..."
715 * @param[in] inArray An array of GetManagerFn callbacks
716 * to be used to parse the string.
718 * @param[in] inArraySize Num of elements in inArray
720 * @return true if the string can be parsed completely; false otherwise
722 bool ParseFaultInjectionStr(char *aFaultInjectionStr, const GetManagerFn *inArray, size_t inArraySize)
724 ManagerTable table = { inArray, inArraySize };
725 size_t numTables = 1;
727 return ParseFaultInjectionStr(aFaultInjectionStr, &table, numTables);
731 * Parse a fault-injection configuration string and apply the configuration.
733 * @param[in] aFaultInjectionStr The configuration string. An example of a valid string that
734 * enables two faults is "system_buffer_f5_s1:inet_send_p33"
735 * An example of a configuration string that
736 * also passes three integer arguments to the fault point is
737 * "system_buffer_f5_s1_a10_a7_a-4"
739 * "<module>_<fault>_{f<numTimesToFail>[_s<numTimesToSkip>],p<randomFailurePercentage>}[_a<integer>]..."
741 * @param[in] inTables An array of ManagerTable structures
742 * to be used to parse the string.
744 * @param[in] inNumTables Size of inTables
746 * @return true if the string can be parsed completely; false otherwise
748 bool ParseFaultInjectionStr(char *aFaultInjectionStr, const ManagerTable *inTables, size_t inNumTables)
751 char *savePtr1 = NULL;
753 char *savePtr2 = NULL;
754 char *outerString = aFaultInjectionStr;
756 nl::FaultInjection::Identifier j = 0;
759 int32_t args[kMaxFaultArgs];
760 uint16_t numArgs = 0;
762 nl::FaultInjection::Manager *mgr = NULL;
763 nl::FaultInjection::Identifier faultId = 0;
765 memset(args, 0, sizeof(args));
767 while ((tok1 = strtok_r(outerString, ":", &savePtr1)))
769 uint32_t numTimesToFail = 0;
770 uint32_t numTimesToSkip = 0;
771 uint32_t percentage = 0;
772 bool gotPercentage = false;
773 bool gotReboot = false;
774 bool gotArguments = false;
775 const Name *faultNames = NULL;
779 tok2 = strtok_r(tok1, "_", &savePtr2);
780 nlEXPECT(tok2 != NULL, exit);
782 // this is the module
783 for (i = 0; i < inNumTables; i++)
785 for (j = 0; j < inTables[i].mNumItems; j++)
787 nl::FaultInjection::Manager &tmpMgr = inTables[i].mArray[j]();
788 if (!strcmp(tok2, tmpMgr.GetName()))
795 nlEXPECT(mgr != NULL, exit);
797 tok2 = strtok_r(NULL, "_", &savePtr2);
798 nlEXPECT(tok2 != NULL, exit);
800 // this is the fault name
801 faultNames = mgr->GetFaultNames();
802 for (j = 0; j < mgr->GetNumFaults(); j++)
804 if (!strcmp(tok2, faultNames[j]))
811 nlEXPECT(j != mgr->GetNumFaults(), exit);
813 while ((tok2 = strtok_r(NULL, "_", &savePtr2)))
820 nlEXPECT(numArgs < kMaxFaultArgs, exit);
824 nlEXPECT(ParseInt(&(tok2[1]), &tmp), exit);
825 args[numArgs++] = tmp;
829 nlEXPECT(ParseUInt(&(tok2[1]), &numTimesToFail), exit);
832 nlEXPECT(ParseUInt(&(tok2[1]), &numTimesToSkip), exit);
835 gotPercentage = true;
836 nlEXPECT(ParseUInt(&(tok2[1]), &percentage), exit);
837 nlEXPECT(percentage <= 100, exit);
850 err = mgr->StoreArgsAtFault(faultId, numArgs, args);
851 nlEXPECT_SUCCESS(err, exit);
856 err = mgr->FailRandomlyAtFault(faultId, static_cast<uint8_t>(percentage));
857 nlEXPECT_SUCCESS(err, exit);
861 err = mgr->FailAtFault(faultId, numTimesToSkip, numTimesToFail);
862 nlEXPECT_SUCCESS(err, exit);
866 err = mgr->RebootAtFault(faultId);
867 nlEXPECT_SUCCESS(err, exit);
878 * Internal function to kill the process if a
879 * fault is supposed to reboot the process but the application
880 * has not installed a callback
882 static void Die(void)
885 *((volatile long *)1) = 0;
888 } // namespace FaultInjection