Projects
Browse Source     Search     Timeline     Wiki

Changeset 23626

Show
Ignore:
Timestamp:
2008-05-09 15:38:49 (4 months ago)
Author:
zarzycki@…
Message:

<rdar://problem/5834727> 10A37: launchd SIGKILL'ing after 2s

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • trunk/launchd/src/launchd_core_logic.c

    r23620 r23626  
    100100#include "job_forward.h" 
    101101 
     102/* 
     103 * LAUNCHD_SAMPLE_TIMEOUT 
     104 *   If the job hasn't exited in the given number of seconds after sending 
     105 *   it a SIGTERM, start sampling it. 
     106 * LAUNCHD_DEFAULT_EXIT_TIMEOUT 
     107 *   If the job hasn't exited in the given number of seconds after sending 
     108 *   it a SIGTERM, SIGKILL it. Can be overriden in the job plist. 
     109 */ 
    102110#define LAUNCHD_MIN_JOB_RUN_TIME 10 
    103 #define LAUNCHD_DEFAULT_EXIT_TIMEOUT 2 
     111#define LAUNCHD_SAMPLE_TIMEOUT 2 
     112#define LAUNCHD_DEFAULT_EXIT_TIMEOUT 20 
    104113#define LAUNCHD_SIGKILL_TIMER 5 
    105114 
     
    414423             stall_before_exec:1,               /* a hack to support an option of spawn_via_launchd() */ 
    415424             only_once:1,                       /* man launchd.plist --> LaunchOnlyOnce. Note: 5465184 Rename this to "HopefullyNeverExits" */ 
    416              currently_ignored:1,               /* Make job_ignore() /  job_watch() work. If these calls were balanced, then this wouldn't be necessarily. */ 
     425             currently_ignored:1,               /* Make job_ignore() / job_watch() work. If these calls were balanced, then this wouldn't be necessarily. */ 
    417426             forced_peers_to_demand_mode:1,     /* A job that forced all other jobs to be temporarily launch-on-demand */ 
    418427             setnice:1,                         /* man launchd.plist --> Nice */ 
     
    420429             removal_pending:1,                 /* a job was asked to be unloaded/removed while running, we'll remove it after it exits */ 
    421430             sent_sigkill:1,                    /* job_kill() was called */ 
     431             sampled:1,                         /* job_force_sampletool() was called (or is disabled) */ 
    422432             debug_before_kill:1,               /* enter the kernel debugger before killing a job */ 
    423433             weird_bootstrap:1,                 /* a hack that launchd+launchctl use during jobmgr_t creation */ 
     
    628638                job_kill(j); 
    629639        } else { 
     640                /* 
     641                 * If sampling is enabled and SAMPLE_TIMEOUT is earlier than the job exit_timeout, 
     642                 * then set a timer for SAMPLE_TIMEOUT seconds after killing 
     643                 */ 
     644                unsigned int exit_timeout = j->exit_timeout; 
     645                bool do_sample = do_apple_internal_logging; 
     646                unsigned int timeout = exit_timeout; 
     647 
     648                if (do_sample && (!exit_timeout || (LAUNCHD_SAMPLE_TIMEOUT < exit_timeout))) { 
     649                        timeout = LAUNCHD_SAMPLE_TIMEOUT; 
     650                } 
     651 
    630652                job_assumes(j, runtime_kill(j->p, SIGTERM) != -1); 
    631653 
    632                 if (j->exit_timeout) { 
     654                if (timeout) { 
     655                        j->sampled = !do_sample; 
    633656                        job_assumes(j, kevent_mod((uintptr_t)&j->exit_timeout, EVFILT_TIMER, 
    634                                                 EV_ADD|EV_ONESHOT, NOTE_SECONDS, j->exit_timeout, j) != -1); 
    635                 } else { 
     657                                                EV_ADD|EV_ONESHOT, NOTE_SECONDS, timeout, j) != -1); 
     658                } 
     659 
     660                if (!exit_timeout) { 
    636661                        job_log(j, LOG_DEBUG, "This job has an infinite exit timeout"); 
    637662                } 
     
    24252450        j->last_exit_status = status; 
    24262451        j->sent_sigkill = false; 
     2452        j->sampled = false; 
    24272453        j->sent_kill_via_shmem = false; 
    24282454        j->lastlookup = NULL; 
     
    26722698                job_dispatch(j, false); 
    26732699        } else if (&j->exit_timeout == ident) { 
     2700                /* 
     2701                 * This block might be executed up to 3 times for a given (slow) job 
     2702                 *  - once for the SAMPLE_TIMEOUT timer, at which point sampling is triggered 
     2703                 *  - once for the exit_timeout timer, at which point: 
     2704                 *          - sampling is performed if not triggered previously 
     2705                 *          - SIGKILL is being sent to the job 
     2706                 *  - once for the SIGKILL_TIMER timer, at which point we log an issue 
     2707                 *    with the long SIGKILL 
     2708                 */ 
    26742709                if (j->sent_sigkill) { 
    26752710                        uint64_t td = runtime_get_nanoseconds_since(j->sent_sigterm_time); 
     
    26782713                        td -= j->exit_timeout; 
    26792714 
    2680                         job_log(j, LOG_ERR, "Did not die after sending SIGKILL %llu seconds ago...", td); 
     2715                        job_log(j, LOG_WARNING, "Did not die after sending SIGKILL %llu seconds ago...", td); 
     2716                } else if (!j->sampled && (!j->exit_timeout || (LAUNCHD_SAMPLE_TIMEOUT < j->exit_timeout))) { 
     2717                        /* This should work even if the job changes its exit_timeout midstream */ 
     2718                        job_log(j, LOG_NOTICE, "Sampling timeout elapsed (%u seconds). Sampling...", LAUNCHD_SAMPLE_TIMEOUT); 
     2719                        if (j->exit_timeout) { 
     2720                                unsigned int ttk = (j->exit_timeout - LAUNCHD_SAMPLE_TIMEOUT); 
     2721                                job_assumes(j, kevent_mod((uintptr_t)&j->exit_timeout, EVFILT_TIMER, 
     2722                                                        EV_ADD|EV_ONESHOT, NOTE_SECONDS, ttk, j) != -1); 
     2723                                job_log(j, LOG_NOTICE, "Scheduled new exit timeout for %u seconds later", ttk); 
     2724                        } 
     2725                        job_force_sampletool(j); 
    26812726                } else { 
    2682                         job_force_sampletool(j); 
     2727                        job_force_sampletool(j); /* no-op if already done in previous pass */ 
    26832728                        if (unlikely(j->debug_before_kill)) { 
    2684                                 job_log(j, LOG_NOTICE, "Exit timeout elapsed. Entering the kernel debugger."); 
     2729                                job_log(j, LOG_NOTICE, "Exit timeout elapsed. Entering the kernel debugger"); 
    26852730                                job_assumes(j, host_reboot(mach_host_self(), HOST_REBOOT_DEBUGGER) == KERN_SUCCESS); 
    26862731                        } 
    2687                         job_log(j, LOG_WARNING, "Exit timeout elapsed (%u seconds). Killing.", j->exit_timeout); 
     2732                        job_log(j, LOG_WARNING, "Exit timeout elapsed (%u seconds). Killing", j->exit_timeout); 
    26882733                        job_kill(j); 
    26892734                } 
     
    33953440                job_assumes(j, dup2(j->stdin_fd, STDIN_FILENO) != -1); 
    33963441        } else { 
    3397                 job_setup_fd(j, STDIN_FILENO,  j->stdinpath, O_RDONLY|O_CREAT); 
     3442                job_setup_fd(j, STDIN_FILENO, j->stdinpath, O_RDONLY|O_CREAT); 
    33983443        } 
    33993444        job_setup_fd(j, STDOUT_FILENO, j->stdoutpath, O_WRONLY|O_CREAT|O_APPEND); 
     
    52075252        pid_t sp; 
    52085253 
    5209         if (!do_apple_internal_logging) { 
     5254        if (j->sampled) { 
    52105255                return; 
    52115256        } 
    5212          
     5257        j->sampled = true; 
     5258 
     5259        if (!job_assumes(j, do_apple_internal_logging)) { 
     5260                return; 
     5261        } 
     5262 
    52135263        if (!job_assumes(j, mkdir(SHUTDOWN_LOG_DIR, S_IRWXU) != -1 || errno == EEXIST)) { 
    52145264                return;