Try this. Edit the file lib/worker.c. Add #include <syslog.h> at the top. Go to the function gather_output.
Change the code from this:
Code: Select all
static void gather_output(child_process *cp, iobuf *io, int final)
{
for (;;) {
char buf[4096];
int rd;
rd = read(io->fd, buf, sizeof(buf));
if (rd < 0) {
if (errno == EAGAIN && !final)
break;
if (errno == EINTR || errno == EAGAIN)
continue;
if (!final && errno != EAGAIN)
wlog("job %d (pid=%ld): Failed to read(): %s", cp->id, (long)cp->ei->pid, strerror(errno));
}
Code: Select all
static void gather_output(child_process *cp, iobuf *io, int final)
{
int retry = 5;
for (;;) {
char buf[4096];
int rd;
rd = read(io->fd, buf, sizeof(buf));
if (rd < 0) {
if (errno == EAGAIN && !final)
break;
if (errno == EINTR || errno == EAGAIN) {
char buf[1024];
if (--retry == 0) {
sprintf(buf, "job %d (pid=%ld): Failed to read(): %s", cp->id, (long)cp->ei->pid, strerror(errno));
syslog(LOG_ERR, buf);
break;
}
sprintf(buf, "job %d (pid=%ld): read() returned error %d", cp->id, (long)cp->ei->pid, errno);
syslog(LOG_ERR, buf);
sleep(1);
continue;
}
if (!final && errno != EAGAIN)
wlog("job %d (pid=%ld): Failed to read(): %s", cp->id, (long)cp->ei->pid, strerror(errno));
}
Only do this on your test system. It could introduce unacceptable delays in a production system. If it works, I'll do something more production-ready.