To avoid running a rig for long time with crashed gpu edit the crontab. like this:
*/5 * * * * cat /var/run/ethos/status.file | grep problem && sudo reboot
*/5 * * * * cat /var/run/ethos/crashed_gpus.file | grep 0 && sudo reboot
*/5 * * * * cat /var/run/ethos/crashed_gpus.file | grep 1 && sudo reboot
*/5 * * * * cat /var/run/ethos/crashed_gpus.file | grep 2 && sudo reboot
*/5 * * * * cat /var/run/ethos/crashed_gpus.file | grep 3 && sudo reboot
*/5 * * * * cat /var/run/ethos/crashed_gpus.file | grep 4 && sudo reboot
*/5 * * * * cat /var/run/ethos/crashed_gpus.file | grep 5 && sudo reboot
*/5 * * * * cat /var/run/ethos/crashed_gpus.file | grep 6 && sudo reboot
