1 | /***************************************
2 | $Revision: 1.9 $
3 |
4 | Socket module - cd_watchdog.c - Socket watchdog - when activated, checks the
5 | socket for new data and discards it. If the
6 | socket is closed, it triggers predefined
7 | functions - executes a function and/or
8 | cancels a thread.
9 |
10 | Status: NOT REVUED, TESTED
11 |
12 | Design and implementation by Marek Bukowy.
13 |
14 | Modification history:
15 | marek (August 2000) Created the watchdog part
16 | marek (December 2000) Modified watchdog deactivation -
17 | replaced signals by pthread cancellation.
18 | ******************/ /******************
19 | Copyright (c) 1999,2000,2001,2002 RIPE NCC
20 |
21 | All Rights Reserved
22 |
23 | Permission to use, copy, modify, and distribute this software and its
24 | documentation for any purpose and without fee is hereby granted,
25 | provided that the above copyright notice appear in all copies and that
26 | both that copyright notice and this permission notice appear in
27 | supporting documentation, and that the name of the author not be
28 | used in advertising or publicity pertaining to distribution of the
29 | software without specific, written prior permission.
30 |
31 | THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
32 | ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS; IN NO EVENT SHALL
33 | AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
34 | DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
35 | AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
36 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37 | ***************************************/
38 |
39 | #include "rip.h"
40 |
41 | /*+ Uncomment this to use watchdog deactivation by signal (may be risky)
42 |
43 | #define WATCHDOG_BY_SIGNAL
44 | +*/
45 |
46 | static pthread_once_t sk_init_once = PTHREAD_ONCE_INIT;
47 |
48 | #ifdef WATCHDOG_BY_SIGNAL
49 |
50 | /*+ The signal version is complicated to cope with all timing situations.
51 | It uses a thread specific flag to see if the signal handler was invoked
52 | in case the signal arrives before select(3) is called in watchdog.
53 | +*/
54 |
55 | /* thread specific flag */
56 | static pthread_key_t sk_watch_tsd;
57 |
58 | /*++++++++++++++++++++++++++++++++++++++
59 | initialisation for the SIGNAL cancellation mode
60 | - initialises the thread specific flag.
61 | ++++++++++++++++++++++++++++++++++++++*/
62 | static void sk_real_init(void)
63 | {
64 | dieif( pthread_key_create( &sk_watch_tsd, NULL) != 0 );
65 | }
66 |
67 |
68 | /*++++++++++++++++++++++++++++++++++++++
69 | sk_watchdog signal handler - sets the thread-specific flag.
70 |
71 | int n signal received. (not used)
72 | ++++++++++++++++++++++++++++++++++++++*/
73 | static void func_sigusr(int n) {
74 | #if 0
75 | /* just for debugging - we don't check the value here */
76 | int *tsd_flag = (int *) pthread_getspecific(sk_watch_tsd);
77 | #endif
78 |
79 | /* 2000/12/18 MB:
80 | DEADLOCK has happened - the watchdog was just getting a mutex
81 | for the ER rwlock when a signal arrived and the execution of the
82 | pthread_mutex_lock function was interrupted AFTER the lock was
83 | grabbed. The this handler was invoked and tried to get that mutex
84 | again. As a result, everything stopped.
85 |
86 | Cures:
87 | 1. Not invoke this here:
88 | ER_dbg_va(FAC_SK, ASP_SK_GEN,"func_sigusr(%d) called", n);
89 |
90 | 2. Not accept any signals during any pthread calls so that this
91 | does not happen again. Must be reimplemented with pthread_cancel
92 | and all the signal stuff must go away. (Done, 2000/12/19).
93 | */
94 | /* set a thread-specific flag that the handler was invoked */
95 |
96 | pthread_setspecific(sk_watch_tsd, (void *)1 );
97 | }
98 |
99 | /*++++++++++++++++++++++++++++++++++++++
100 | watchdog (SIGNAL VERSION) - started as a separate thread.
101 |
102 | Selects on the given socket; discards all input.
103 | whenever it sees end of file (socket closed), it
104 | * sets a corresponding flag in the condat structure,
105 | * triggers the predefined actions (by SK_watchtrigger).
106 |
107 | void *arg - pointer to the connection data structure
108 | ++++++++++++++++++++++++++++++++++++++*/
109 | static
110 | void *sk_watchdog(void *arg)
111 | {
112 | sk_conn_st *condat = (sk_conn_st *) arg;
113 | int nready;
114 | int n;
115 | fd_set rset;
116 | char buff[STR_S];
117 | int socket = condat->sock;
118 | sigset_t sset;
119 | struct sigaction act;
120 |
121 | struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */
122 |
123 | FD_ZERO(&rset);
124 | FD_SET(socket, &rset);
125 |
126 | sigemptyset(&sset);
127 | sigaddset(&sset, SIGUSR2);
128 |
129 | act.sa_handler = func_sigusr;
130 | act.sa_flags = 0;
131 | dieif(sigaction(SIGUSR2, &act, NULL) != 0);
132 |
133 | /* XXX in fact, it's unblocked already. Should be blocked on startup */
134 | dieif(pthread_sigmask(SIG_UNBLOCK, &sset, NULL) != 0);
135 |
136 | /* clear the handler's flag */
137 | pthread_setspecific(sk_watch_tsd, NULL);
138 |
139 | /* now ready for signal */
140 | pthread_mutex_unlock( & condat->watchmutex );
141 |
142 | /* hey, viva threaded signal handling! There is no way for select
143 | to unblock a blocked signal, It must be done by "hand" (above).
144 |
145 | Consequently, every once in a while, the signal will be delivered
146 | before the select starts :-/. So, we have to introduce a timeout
147 | for select and check if the signal was delivered anyway....aARGH!!!
148 |
149 | This adds a <timeout interval> to unlucky queries, about 0.1% of all.
150 | */
151 |
152 | while ((nready=select(socket+1, &rset, NULL, NULL, &timeout))!=-1) {
153 |
154 | ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready);
155 |
156 | /* don't even try to read if we have been killed */
157 | if( errno == EINTR || pthread_getspecific(sk_watch_tsd) != NULL ) {
158 | break;
159 | }
160 |
161 | /* retry if the timeout has triggered */
162 | if( nready == 0 ) {
163 | continue;
164 | }
165 |
166 | /* There was some input or client half of connection was closed */
167 | /* Check for the latter */
168 | if (( n=read(socket, buff, sizeof(buff))) == 0) {
169 | /* Connection was closed by client */
170 | /* Now send a cancellation request to the whois thread. */
171 | /* mysql thread will be terminated by thread cleanup routine */
172 |
173 | /* call the actions: kill and exec (the SK_ functions called
174 | check if the action is defined. Will set the RTC flag on condat
175 | */
176 | SK_watchtrigger(condat);
177 |
178 | /* quit */
179 | break;
180 | }
181 | /* Otherwise dump input and continue */
182 |
183 | }
184 |
185 | /* Exit the watchdog thread, passing NULL as we don't expect a join */
186 | pthread_exit(NULL);
187 |
188 | /* oh yes. Shouldn't compilers _recognize_ library functions ? */
189 | return NULL;
190 | }
191 |
192 |
193 | #else /* not WATCHDOG_BY_SIGNAL */
194 |
195 |
196 | /*++++++++++++++++++++++++++++++++++++++
197 | watchdog (CANCEL VERSION) - started as a separate thread.
198 |
199 | Selects on the given socket; discards all input.
200 | whenever it sees end of file (socket closed), it
201 | * sets a corresponding flag in the condat structure,
202 | * triggers the predefined actions (by SK_watchtrigger).
203 |
204 | void *arg - pointer to the connection data structure
205 | ++++++++++++++++++++++++++++++++++++++*/
206 | static
207 | void *sk_watchdog(void *arg)
208 | {
209 | sk_conn_st *condat = (sk_conn_st *) arg;
210 | int nready;
211 | int n;
212 | char buff[STR_S];
213 | int socket = condat->sock;
214 | struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */
215 | fd_set rset;
216 |
217 | /* this is to allow cancellation of the select(3) call */
218 | pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
219 |
220 | /* now ready for the cancellation */
221 | pthread_mutex_unlock( & condat->watchmutex );
222 |
223 | FD_ZERO(&rset);
224 | FD_SET(socket, &rset);
225 | do {
226 | /* run the select exposed to cancellation */
227 | pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
228 | nready=select(socket+1, &rset, NULL, NULL, &timeout);
229 | pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
230 |
231 | ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready);
232 | /* quit on error */
233 | if( nready < 0 ) {
234 | break;
235 | }
236 |
237 | /* retry if the timeout has triggered */
238 | if( nready == 0 ) {
239 | continue;
240 | }
241 |
242 | /* There was some input or client half of connection was closed */
243 | /* Check for the latter */
244 | if (( n=read(socket, buff, sizeof(buff))) == 0) {
245 | /* Connection was closed by client */
246 | /* Now send a cancellation request to the whois thread. */
247 | /* mysql thread will be terminated by thread cleanup routine */
248 |
249 | /* call the actions: kill and exec (the SK_ functions called
250 | check if the action is defined. Will set the RTC flag on condat
251 | */
252 | SK_watchtrigger(condat);
253 |
254 | /* quit */
255 | break;
256 | }
257 | /* Otherwise dump input and continue */
258 |
259 | } while(nready != -1);
260 |
261 | return NULL; /* quit */
262 | }
263 |
264 |
265 | /*++++++++++++++++++++++++++++++++++++++
266 | initialisation for the PTHREAD_CANCEL mode is not needed.
267 | ++++++++++++++++++++++++++++++++++++++*/
268 | static void sk_real_init(void) {
269 | /* EMPTY */
270 | }
271 |
272 | #endif /* WATCHDOG_BY_SIGNAL */
273 |
274 |
275 | /*++++++++++++++++++++++++++++++++++++++
276 | starts sk_watchdog thread unless already started,
277 | and registers its threadid in the condat structure
278 |
279 | dies if watchdog already running
280 |
281 | er_ret_t SK_watchstart Returns SK_OK on success.
282 |
283 | sk_conn_st *condat pointer to the connection data structure
284 |
285 | The structure may (and normally, should) contain the predefined actions
286 | set by SK_watch_set... functions.
287 | ++++++++++++++++++++++++++++++++++++++*/
288 | er_ret_t
289 | SK_watchstart(sk_conn_st *condat)
290 | {
291 | dieif( condat->watchdog != 0 );
292 |
293 | /* init the mutex in locked state, watchdog will unlock it when
294 | it's ready for signal/cancellation */
295 | pthread_mutex_init( & condat->watchmutex, NULL );
296 | pthread_mutex_lock( & condat->watchmutex );
297 |
298 | /*
299 | Linux doesn't seem to like getting signals in select(), which isn't
300 | technically allowed by POSIX. The workaround in this case is simply
301 | to not create a watchdog for Linux. This is probably acceptable
302 | because we will be changing the query path to perform queries in small
303 | chunks, so if a disconnect occurs it won't consume a lot of database
304 | resources in any case, even without a watchdog.
305 |
306 | SCO has a really small stack, so we don't want to create extra threads.
307 | */
308 | #if !defined(__linux__) && !defined(SCO)
309 | /* NOT DETACHED! */
310 | pthread_create(&condat->watchdog, NULL, sk_watchdog, (void *) condat );
311 | #endif /* __linux__ */
312 |
313 | return SK_OK;
314 | }
315 |
316 |
317 | /*++++++++++++++++++++++++++++++++++++++
318 |
319 | stops running sk_watchdog thread.
320 | If it is not running ( == not registered in the connection struct),
321 | it does nothing.
322 |
323 | er_ret_t SK_watchstop always succeeds (returns SK_OK)
324 |
325 | sk_conn_st *condat pointer to the connection data structure
326 | ++++++++++++++++++++++++++++++++++++++*/
327 | er_ret_t
328 | SK_watchstop(sk_conn_st *condat)
329 | {
330 | void *res;
331 |
332 | if(condat->watchdog > 0) {
333 | int ret;
334 |
335 | /* wait until the watchdog is ready for signal */
336 | pthread_mutex_lock( & condat->watchmutex );
337 |
338 | #ifdef WATCHDOG_BY_SIGNAL
339 | ret = pthread_kill(condat->watchdog, SIGUSR2);
340 | #else
341 | ret = pthread_cancel(condat->watchdog);
342 | #endif
343 |
344 | ret = pthread_join(condat->watchdog, &res);
345 |
346 | pthread_mutex_destroy( & condat->watchmutex );
347 | condat->watchdog = 0;
348 | }
349 | return SK_OK;
350 | }
351 |
352 |
353 | /*++++++++++++++++++++++++++++++++++++++
354 |
355 | void SK_watch_setkill sets the thread id of the thread to be
356 | cancelled by the watchdog watching this socket.
357 | 0 (default) means do not cancel anything.
358 |
359 | sk_conn_st *condat pointer to the connection data structure.
360 |
361 | pthread_t killthis thread id of the thread to be cancelled, or 0.
362 | ++++++++++++++++++++++++++++++++++++++*/
363 | void
364 | SK_watch_setkill(sk_conn_st *condat, pthread_t killthis)
365 | {
366 | condat->killthis = killthis;
367 | }
368 |
369 |
370 | /*++++++++++++++++++++++++++++++++++++++
371 |
372 | void SK_watch_setexec sets the function to be invoked by the watchdog
373 | watching this socket. NULL (default) means do
374 | not invoke anything.
375 |
376 | sk_conn_st *condat pointer to the connection data structure.
377 |
378 | void *(*function)(void *) function to be invoked
379 |
380 | void *args argument to be passed to the function.
381 |
382 | ++++++++++++++++++++++++++++++++++++++*/
383 | void
384 | SK_watch_setexec( sk_conn_st *condat, void *(*function)(void *) , void *args)
385 | {
386 | condat->execthis = function;
387 | condat->execargs = args;
388 | }
389 |
390 |
391 | /*++++++++++++++++++++++++++++++++++++++
392 |
393 | void SK_watch_setclear clears the function and thread id fields so that
394 | nothing gets cancelled or invoked by the
395 | watchdog.
396 |
397 | sk_conn_st *condat pointer to the connection data structure.
398 |
399 | ++++++++++++++++++++++++++++++++++++++*/
400 | void
401 | SK_watch_setclear(sk_conn_st *condat)
402 | {
403 | condat->execthis = NULL;
404 | condat->execargs = NULL;
405 | condat->killthis = 0;
406 | }
407 |
408 | /* call the function to be called if defined */
409 |
410 |
411 | /*++++++++++++++++++++++++++++++++++++++
412 |
413 | void SK_watchexec invokes the predefined function if defined.
414 | (usually called from the watchdog).
415 | Also sets the reason-to-close
416 | flag on this connection to SK_INTERRUPT.
417 |
418 | sk_conn_st *condat pointer to the connection data structure.
419 |
420 | ++++++++++++++++++++++++++++++++++++++*/
421 | void
422 | SK_watchexec(sk_conn_st *condat)
423 | {
424 | /* set the reason-to-close flag on this connection */
425 | condat->rtc |= SK_INTERRUPT;
426 |
427 | if( condat->execthis != NULL ) {
428 | condat->execthis(condat->execargs);
429 | }
430 | }
431 |
432 | /* cancel the thread to be cancelled if defined */
433 |
434 |
435 | /*++++++++++++++++++++++++++++++++++++++
436 |
437 | void SK_watchkill cancels the predefined thread if defined.
438 | (usually called from the watchdog).
439 | Also sets the reason-to-close
440 | flag on this connection to SK_INTERRUPT.
441 |
442 | sk_conn_st *condat pointer to the connection data structure.
443 |
444 | ++++++++++++++++++++++++++++++++++++++*/
445 | void
446 | SK_watchkill(sk_conn_st *condat) {
447 |
448 | /* set the reason-to-close flag on this connection */
449 | condat->rtc |= SK_INTERRUPT;
450 |
451 | /* cancel thread if defined */
452 | if( condat->killthis != 0 ) {
453 | pthread_cancel(condat->killthis);
454 | /* The only possible error is ESRCH, so we do not care about it*/
455 | }
456 | }
457 |
458 |
459 | /*++++++++++++++++++++++++++++++++++++++
460 |
461 | void SK_watchtrigger Wrapper around SK_watchkill and SK_watchexec.
462 | First executes the function, then cancels the
463 | thread.
464 |
465 | sk_conn_st *condat pointer to the connection data structure.
466 |
467 | ++++++++++++++++++++++++++++++++++++++*/
468 | void SK_watchtrigger(sk_conn_st *condat)
469 | {
470 | SK_watchexec(condat);
471 | SK_watchkill(condat);
472 | }
473 |
474 |
475 | /*++++++++++++++++++++++++++++++++++++++
476 | Initialisation function, should be called exactly once
477 | (well, it ignores repeated calls). The actions depend on cancellation
478 | mode (signal or pthread_cancel).
479 | ++++++++++++++++++++++++++++++++++++++*/
480 | void SK_init(void)
481 | {
482 | /* can be called only once */
483 | pthread_once( &sk_init_once, sk_real_init);
484 | }