ptrace(PTRACE_CONT) cannot resume just-attached processes -


i writing program needs attach other processes (which might created previous instance of program) , watch when terminate.

if keep program running during lifetime of processes created, works fine; if start process, kill program, restart it, created process remains in stopped state forever (it seems ptrace(ptrace_cont,...) can not resume it). code snippet attached bellow:

static int exitflag = 0; static void sighandler (int/* signum */) { exitflag = 1; }  int jsfnode::run (void) {     /* load jobs */     {         vector <jobinfo2> jobs;         loadstruct <vector <jobinfo2> > (                 jobfile (), jobs);         (unsigned i=0 ; i<jobs.size () ; i++) {             jobinfo2& info = jobs [i];             string name = info.parm.name;             if (m_jobs.find (name) == m_jobs.end ()) {                 job2& job = m_jobs [name];                 job.info = info;                 /* trace can wait() */                 switch (info.state) {                 case js2active:                 case js2canceling:                 case js2suspending:                 if (ptrace (ptrace_attach, info.pid, 0, 0))                         jdebug ("ptrace_attach failed for: %d (%s)\n", info.pid,                                         strerror (errno));                 default: break;                 }             }         }     }      /* run until signaled stop */     signal (sigint, sighandler);     while (!exitflag)         sleep (1);      /* save jobs */     {         vector <jobinfo2> jobs;         (map <string, job2>::iterator it=m_jobs.begin () ;                         it!=m_jobs.end () ; it++) {             jobinfo2& info = it->second.info;             ptrace (ptrace_detach, info.pid, null, null);             jobs.push_back (info);         }         savestruct <vector <jobinfo2> > (                 jobfile (), jobs);     }      return 0; }  void jsfnode::startjob (job2 & job) {     jobparm2 parm = job.info.parm;     jdebug ("starting \"%s\"..\n", parm.name.c_str());      /* uid of run-as user */     uid_t uid = 0;  /* run root if specified user invalid */     struct passwd * pwe = getpwnam (parm.user.c_str());     if (pwe != null)         uid = pwe->pw_uid;      /* prepare script file */     string scriptfile = m_workdir+"/"+parm.name+"_scriptfile";     ofstream ofscriptfile (scriptfile.c_str());     ofscriptfile << parm.script;     ofscriptfile.close();     chown (scriptfile.c_str(), uid, uid);     chmod (scriptfile.c_str(), s_irwxu|s_irwxg|s_irwxo);      /* prepare mpimachinefile */     string machinefile = m_workdir+"/"+parm.name+"_machinefile";     ofstream ofmachinefile (machinefile.c_str());     (resource::iterator it=parm.res.begin () ; it!=parm.res.end () ; it++)         ofmachinefile << *it << ':' << parm.taskpernode << '\n';     ofmachinefile.close ();     chown (machinefile.c_str(), uid, uid);     chmod (machinefile.c_str(), s_irwxu|s_irwxg|s_irwxo);      /* prepare redirection channels */     int ipipe [2] = {-1,-1};     int opipe [2] = {-1,-1};     if (parm.redio > 0) {         if (pipe (ipipe) == -1) {             unlink:             unlink (machinefile.c_str());             unlink (scriptfile.c_str());             return; /* not fail job, try later */         }         if (pipe (opipe) == -1) {             close:             close (ipipe [0]);             close (ipipe [1]);             goto unlink;         }     }      /* ok, fork it! -----------------> */      pid_t pid;     if ((pid = fork ()) == -1) {         close (opipe [0]);         close (opipe [1]);         goto close;     }      if (pid == 0) {         /* enable parent-tracing */         ptrace (ptrace_traceme, 0, null, null);          /* drop root privilege */         setuid (uid);          /* redirect stdin/stdout */         if (parm.redio) {             if (dup2 (ipipe [0],0)<0 ||                 dup2 (opipe [1],1)<0)                 exit (errno);             close (ipipe [0]);             close (ipipe [1]);             close (opipe [0]);             close (opipe [1]);         }          /* prepare arguments/environments */         char * arg[] = {                 strdup (scriptfile.c_str()),                 strdup (parm.args.c_str()),                 null    /* required null entry */         };         setenv ("mpimachinefile", machinefile.c_str(), 1);         setenv ("display", parm.headnode.c_str(), 1);         setenv ("jsf_jobid", parm.name.c_str(), 1);          /* execute it! ------> */         execv (scriptfile.c_str(), arg);         exit (errno);     }      /* redirect stdin/stdout */     if (parm.redio) {         close (ipipe [0]);         close (opipe [1]);         job.redpipe [0] = opipe [0];         job.redpipe [1] = ipipe [1];     }     /* start nurse thread */     nursedata * nd = new nursedata (this, job);     if (pthread_create (&job.nurseid, null, ::_jobnurse, nd) == 0)         job.nurseactive = true;     else delete nd;      job.info.pid = pid;     setjobstate (job, js2active);         return; }      void jsfnode::monitorjob (job2 & job) {     int status;     pid_t pid = waitpid (job.info.pid, &status, wnohang);     if (pid < 0) {         if (errno == echild) {             /* job process has disappeared.. */             job.exitcode = 0;             setjobstate (job, js2finished);             return;         }     } else if (pid == job.info.pid) {         if (wifexited(status)) {             job.exitcode = wexitstatus(status);             setjobstate (job, js2finished);             return;         } else if (wifsignaled(status)) {             setjobstate (job, js2canceled);             return;         } else if (wifstopped(status)) {             if (ptrace (ptrace_cont, pid, null, null))                 jdebug ("ptrace_cont failed for: %d (%s)\n", pid, strerror(errno));         }     }      /* ... */ } 

yes, problem results multi-threading. if monitorjob() runs in seperate thread, ptrace(ptrace_cont) fails. after moving main thread (the 1 called ptrace(ptrace_attach)), things go smoothly.


Comments

Popular posts from this blog

php - Why I am getting the Error "Commands out of sync; you can't run this command now" -

linux - Does gcc have any options to add version info in ELF binary file? -

java - Are there any classes that implement javax.persistence.Parameter<T>? -