1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * Version: @(#)tcp.c 1.0.16 05/25/93
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 * Corey Minyard <wf-rch!minyard@relay.EU.net>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
16 * Linus Torvalds, <torvalds@cs.helsinki.fi>
17 * Alan Cox, <gw4pts@gw4pts.ampr.org>
18 * Matthew Dillon, <dillon@apollo.west.oic.com>
19 * Arnt Gulbrandsen, <agulbra@no.unit.nvg>
20 *
21 * Fixes:
22 * Alan Cox : Numerous verify_area() calls
23 * Alan Cox : Set the ACK bit on a reset
24 * Alan Cox : Stopped it crashing if it closed while sk->inuse=1
25 * and was trying to connect (tcp_err()).
26 * Alan Cox : All icmp error handling was broken
27 * pointers passed where wrong and the
28 * socket was looked up backwards. Nobody
29 * tested any icmp error code obviously.
30 * Alan Cox : tcp_err() now handled properly. It wakes people
31 * on errors. select behaves and the icmp error race
32 * has gone by moving it into sock.c
33 * Alan Cox : tcp_reset() fixed to work for everything not just
34 * packets for unknown sockets.
35 * Alan Cox : tcp option processing.
36 * Alan Cox : Reset tweaked (still not 100%) [Had syn rule wrong]
37 * Herp Rosmanith : More reset fixes
38 * Alan Cox : No longer acks invalid rst frames. Acking
39 * any kind of RST is right out.
40 * Alan Cox : Sets an ignore me flag on an rst receive
41 * otherwise odd bits of prattle escape still
42 * Alan Cox : Fixed another acking RST frame bug. Should stop
43 * LAN workplace lockups.
44 * Alan Cox : Some tidyups using the new skb list facilities
45 * Alan Cox : sk->keepopen now seems to work
46 * Alan Cox : Pulls options out correctly on accepts
47 * Alan Cox : Fixed assorted sk->rqueue->next errors
48 * Alan Cox : PSH doesn't end a TCP read. Switched a bit to skb ops.
49 * Alan Cox : Tidied tcp_data to avoid a potential nasty.
50 * Alan Cox : Added some beter commenting, as the tcp is hard to follow
51 * Alan Cox : Removed incorrect check for 20 * psh
52 * Michael O'Reilly : ack < copied bug fix.
53 * Johannes Stille : Misc tcp fixes (not all in yet).
54 * Alan Cox : FIN with no memory -> CRASH
55 * Alan Cox : Added socket option proto entries. Also added awareness of them to accept.
56 * Alan Cox : Added TCP options (SOL_TCP)
57 * Alan Cox : Switched wakeup calls to callbacks, so the kernel can layer network sockets.
58 * Alan Cox : Use ip_tos/ip_ttl settings.
59 * Alan Cox : Handle FIN (more) properly (we hope).
60 * Alan Cox : RST frames sent on unsynchronised state ack error/
61 * Alan Cox : Put in missing check for SYN bit.
62 * Alan Cox : Added tcp_select_window() aka NET2E
63 * window non shrink trick.
64 * Alan Cox : Added a couple of small NET2E timer fixes
65 * Charles Hedrick : TCP fixes
66 * Toomas Tamm : TCP window fixes
67 * Alan Cox : Small URG fix to rlogin ^C ack fight
68 * Charles Hedrick : Rewrote most of it to actually work
69 * Linus : Rewrote tcp_read() and URG handling
70 * completely
71 * Gerhard Koerting: Fixed some missing timer handling
72 * Matthew Dillon : Reworked TCP machine states as per RFC
73 * Gerhard Koerting: PC/TCP workarounds
74 * Adam Caldwell : Assorted timer/timing errors
75 * Matthew Dillon : Fixed another RST bug
76 * Alan Cox : Move to kernel side addressing changes.
77 * Alan Cox : Beginning work on TCP fastpathing (not yet usable)
78 * Arnt Gulbrandsen: Turbocharged tcp_check() routine.
79 * Alan Cox : TCP fast path debugging
80 * Alan Cox : Window clamping
81 * Michael Riepe : Bug in tcp_check()
82 *
83 *
84 * To Fix:
85 * Possibly a problem with accept(). BSD accept never fails after
86 * it causes a select. Linux can - given the official select semantics I
87 * feel that _really_ its the BSD network programs that are bust (notably
88 * inetd, which hangs occasionally because of this).
89 *
90 * Fast path the code. Two things here - fix the window calculation
91 * so it doesn't iterate over the queue, also spot packets with no funny
92 * options arriving in order and process directly.
93 *
94 * This program is free software; you can redistribute it and/or
95 * modify it under the terms of the GNU General Public License
96 * as published by the Free Software Foundation; either version
97 * 2 of the License, or(at your option) any later version.
98 *
99 * Description of States:
100 *
101 * TCP_SYN_SENT sent a connection request, waiting for ack
102 *
103 * TCP_SYN_RECV received a connection request, sent ack,
104 * waiting for final ack in three-way handshake.
105 *
106 * TCP_ESTABLISHED connection established
107 *
108 * TCP_FIN_WAIT1 our side has shutdown, waiting to complete
109 * transmission of remaining buffered data
110 *
111 * TCP_FIN_WAIT2 all buffered data sent, waiting for remote
112 * to shutdown
113 *
114 * TCP_CLOSING both sides have shutdown but we still have
115 * data we have to finish sending
116 *
117 * TCP_TIME_WAIT timeout to catch resent junk before entering
118 * closed, can only be entered from FIN_WAIT2
119 * or CLOSING. Required because the other end
120 * may not have gotten our last ACK causing it
121 * to retransmit the data packet (which we ignore)
122 *
123 * TCP_CLOSE_WAIT remote side has shutdown and is waiting for
124 * us to finish writing our data and to shutdown
125 * (we have to close() to move on to LAST_ACK)
126 *
127 * TCP_LAST_ACK out side has shutdown after remote has
128 * shutdown. There may still be data in our
129 * buffer that we have to finish sending
130 *
131 * TCP_CLOSED socket is finished
132 */
133 #include <linux/types.h>
134 #include <linux/sched.h>
135 #include <linux/mm.h>
136 #include <linux/string.h>
137 #include <linux/socket.h>
138 #include <linux/sockios.h>
139 #include <linux/termios.h>
140 #include <linux/in.h>
141 #include <linux/fcntl.h>
142 #include <linux/inet.h>
143 #include <linux/netdevice.h>
144 #include "snmp.h"
145 #include "ip.h"
146 #include "protocol.h"
147 #include "icmp.h"
148 #include "tcp.h"
149 #include <linux/skbuff.h>
150 #include "sock.h"
151 #include "route.h"
152 #include <linux/errno.h>
153 #include <linux/timer.h>
154 #include <asm/system.h>
155 #include <asm/segment.h>
156 #include <linux/mm.h>
157
158 #undef TCP_FASTPATH
159
160 #define SEQ_TICK 3
161 unsigned long seq_offset;
162 struct tcp_mib tcp_statistics;
163
164 #ifdef TCP_FASTPATH
165 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
166 #endif
167
168
169 static __inline__ int min(unsigned int a, unsigned int b)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
170 {
171 if (a < b)
172 return(a);
173 return(b);
174 }
175
176
177 /* This routine picks a TCP windows for a socket based on
178 the following constraints
179
180 1. The window can never be shrunk once it is offered (RFC 793)
181 2. We limit memory per socket
182
183 For now we use NET2E3's heuristic of offering half the memory
184 we have handy. All is not as bad as this seems however because
185 of two things. Firstly we will bin packets even within the window
186 in order to get the data we are waiting for into the memory limit.
187 Secondly we bin common duplicate forms at receive time
188
189 Better heuristics welcome
190 */
191
192 int tcp_select_window(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
193 {
194 int new_window = sk->prot->rspace(sk);
195
196 if(sk->window_clamp)
197 new_window=min(sk->window_clamp,new_window);
198 /*
199 * two things are going on here. First, we don't ever offer a
200 * window less than min(sk->mss, MAX_WINDOW/2). This is the
201 * receiver side of SWS as specified in RFC1122.
202 * Second, we always give them at least the window they
203 * had before, in order to avoid retracting window. This
204 * is technically allowed, but RFC1122 advises against it and
205 * in practice it causes trouble.
206 */
207 if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
208 return(sk->window);
209 return(new_window);
210 }
211
212 /*
213 * Enter the time wait state.
214 */
215
216 static void tcp_time_wait(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
217 {
218 sk->state = TCP_TIME_WAIT;
219 sk->shutdown = SHUTDOWN_MASK;
220 if (!sk->dead)
221 sk->state_change(sk);
222 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
223 }
224
225 /*
226 * A timer event has trigger a tcp retransmit timeout. The
227 * socket xmit queue is ready and set up to send. Because
228 * the ack receive code keeps the queue straight we do
229 * nothing clever here.
230 */
231
232 static void tcp_retransmit(struct sock *sk, int all)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
233 {
234 if (all)
235 {
236 ip_retransmit(sk, all);
237 return;
238 }
239
240 sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
241 /* sk->ssthresh in theory can be zero. I guess that's OK */
242 sk->cong_count = 0;
243
244 sk->cong_window = 1;
245
246 /* Do the actual retransmit. */
247 ip_retransmit(sk, all);
248 }
249
250
251 /*
252 * This routine is called by the ICMP module when it gets some
253 * sort of error condition. If err < 0 then the socket should
254 * be closed and the error returned to the user. If err > 0
255 * it's just the icmp type << 8 | icmp code. After adjustment
256 * header points to the first 8 bytes of the tcp header. We need
257 * to find the appropriate port.
258 */
259
260 void tcp_err(int err, unsigned char *header, unsigned long daddr,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
261 unsigned long saddr, struct inet_protocol *protocol)
262 {
263 struct tcphdr *th;
264 struct sock *sk;
265 struct iphdr *iph=(struct iphdr *)header;
266
267 header+=4*iph->ihl;
268
269
270 th =(struct tcphdr *)header;
271 sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
272
273 if (sk == NULL)
274 return;
275
276 if(err<0)
277 {
278 sk->err = -err;
279 sk->error_report(sk);
280 return;
281 }
282
283 if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8))
284 {
285 /*
286 * FIXME:
287 * For now we will just trigger a linear backoff.
288 * The slow start code should cause a real backoff here.
289 */
290 if (sk->cong_window > 4)
291 sk->cong_window--;
292 return;
293 }
294
295 /* sk->err = icmp_err_convert[err & 0xff].errno; -- moved as TCP should hide non fatals internally (and does) */
296
297 /*
298 * If we've already connected we will keep trying
299 * until we time out, or the user gives up.
300 */
301
302 if (icmp_err_convert[err & 0xff].fatal)
303 {
304 if (sk->state == TCP_SYN_SENT)
305 {
306 tcp_statistics.TcpAttemptFails++;
307 sk->state = TCP_CLOSE;
308 sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
309 }
310 sk->err = icmp_err_convert[err & 0xff].errno;
311 }
312 return;
313 }
314
315
316 /*
317 * Walk down the receive queue counting readable data until we hit the end or we find a gap
318 * in the received data queue (ie a frame missing that needs sending to us)
319 */
320
321 static int tcp_readable(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
322 {
323 unsigned long counted;
324 unsigned long amount;
325 struct sk_buff *skb;
326 int sum;
327 unsigned long flags;
328
329 if(sk && sk->debug)
330 printk("tcp_readable: %p - ",sk);
331
332 save_flags(flags);
333 cli();
334 if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
335 {
336 restore_flags(flags);
337 if(sk && sk->debug)
338 printk("empty\n");
339 return(0);
340 }
341
342 counted = sk->copied_seq+1; /* Where we are at the moment */
343 amount = 0;
344
345 /* Do until a push or until we are out of data. */
346 do
347 {
348 if (before(counted, skb->h.th->seq)) /* Found a hole so stops here */
349 break;
350 sum = skb->len -(counted - skb->h.th->seq); /* Length - header but start from where we are up to (avoid overlaps) */
351 if (skb->h.th->syn)
352 sum++;
353 if (sum >= 0)
354 { /* Add it up, move on */
355 amount += sum;
356 if (skb->h.th->syn)
357 amount--;
358 counted += sum;
359 }
360 if (amount && skb->h.th->psh) break;
361 skb = skb->next;
362 }
363 while(skb != (struct sk_buff *)&sk->receive_queue);
364
365 if (amount && !sk->urginline && sk->urg_data &&
366 (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq))
367 amount--; /* don't count urg data */
368 restore_flags(flags);
369 if(sk->debug)
370 printk("got %lu bytes.\n",amount);
371 return(amount);
372 }
373
374
375 /*
376 * Wait for a TCP event. Note the oddity with SEL_IN and reading. The
377 * listening socket has a receive queue of sockets to accept.
378 */
379
380 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
381 {
382 sk->inuse = 1;
383
384 switch(sel_type)
385 {
386 case SEL_IN:
387 if(sk->debug)
388 printk("select in");
389 select_wait(sk->sleep, wait);
390 if(sk->debug)
391 printk("-select out");
392 if (skb_peek(&sk->receive_queue) != NULL)
393 {
394 if (sk->state == TCP_LISTEN || tcp_readable(sk))
395 {
396 release_sock(sk);
397 if(sk->debug)
398 printk("-select ok data\n");
399 return(1);
400 }
401 }
402 if (sk->err != 0) /* Receiver error */
403 {
404 release_sock(sk);
405 if(sk->debug)
406 printk("-select ok error");
407 return(1);
408 }
409 if (sk->shutdown & RCV_SHUTDOWN)
410 {
411 release_sock(sk);
412 if(sk->debug)
413 printk("-select ok down\n");
414 return(1);
415 }
416 else
417 {
418 release_sock(sk);
419 if(sk->debug)
420 printk("-select fail\n");
421 return(0);
422 }
423 case SEL_OUT:
424 select_wait(sk->sleep, wait);
425 if (sk->shutdown & SEND_SHUTDOWN)
426 {
427 /* FIXME: should this return an error? */
428 release_sock(sk);
429 return(0);
430 }
431
432 /*
433 * FIXME:
434 * Hack so it will probably be able to write
435 * something if it says it's ok to write.
436 */
437
438 if (sk->prot->wspace(sk) >= sk->mss)
439 {
440 release_sock(sk);
441 /* This should cause connect to work ok. */
442 if (sk->state == TCP_SYN_RECV ||
443 sk->state == TCP_SYN_SENT) return(0);
444 return(1);
445 }
446 release_sock(sk);
447 return(0);
448 case SEL_EX:
449 select_wait(sk->sleep,wait);
450 if (sk->err || sk->urg_data)
451 {
452 release_sock(sk);
453 return(1);
454 }
455 release_sock(sk);
456 return(0);
457 }
458
459 release_sock(sk);
460 return(0);
461 }
462
463
464 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
465 {
466 int err;
467 switch(cmd)
468 {
469
470 case TIOCINQ:
471 #ifdef FIXME /* FIXME: */
472 case FIONREAD:
473 #endif
474 {
475 unsigned long amount;
476
477 if (sk->state == TCP_LISTEN)
478 return(-EINVAL);
479
480 sk->inuse = 1;
481 amount = tcp_readable(sk);
482 release_sock(sk);
483 err=verify_area(VERIFY_WRITE,(void *)arg,
484 sizeof(unsigned long));
485 if(err)
486 return err;
487 put_fs_long(amount,(unsigned long *)arg);
488 return(0);
489 }
490 case SIOCATMARK:
491 {
492 int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
493
494 err = verify_area(VERIFY_WRITE,(void *) arg,
495 sizeof(unsigned long));
496 if (err)
497 return err;
498 put_fs_long(answ,(int *) arg);
499 return(0);
500 }
501 case TIOCOUTQ:
502 {
503 unsigned long amount;
504
505 if (sk->state == TCP_LISTEN) return(-EINVAL);
506 amount = sk->prot->wspace(sk);
507 err=verify_area(VERIFY_WRITE,(void *)arg,
508 sizeof(unsigned long));
509 if(err)
510 return err;
511 put_fs_long(amount,(unsigned long *)arg);
512 return(0);
513 }
514 default:
515 return(-EINVAL);
516 }
517 }
518
519
520 /*
521 * This routine computes a TCP checksum.
522 */
523
524 unsigned short tcp_check(struct tcphdr *th, int len,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
525 unsigned long saddr, unsigned long daddr)
526 {
527 unsigned long sum;
528
529 if (saddr == 0) saddr = ip_my_addr();
530
531 /*
532 * stupid, gcc complains when I use just one __asm__ block,
533 * something about too many reloads, but this is just two
534 * instructions longer than what I want
535 */
536 __asm__("
537 addl %%ecx, %%ebx
538 adcl %%edx, %%ebx
539 adcl $0, %%ebx
540 "
541 : "=b"(sum)
542 : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
543 : "bx", "cx", "dx" );
544 __asm__("
545 movl %%ecx, %%edx
546 cld
547 cmpl $32, %%ecx
548 jb 2f
549 shrl $5, %%ecx
550 clc
551 1: lodsl
552 adcl %%eax, %%ebx
553 lodsl
554 adcl %%eax, %%ebx
555 lodsl
556 adcl %%eax, %%ebx
557 lodsl
558 adcl %%eax, %%ebx
559 lodsl
560 adcl %%eax, %%ebx
561 lodsl
562 adcl %%eax, %%ebx
563 lodsl
564 adcl %%eax, %%ebx
565 lodsl
566 adcl %%eax, %%ebx
567 loop 1b
568 adcl $0, %%ebx
569 movl %%edx, %%ecx
570 2: andl $28, %%ecx
571 je 4f
572 shrl $2, %%ecx
573 clc
574 3: lodsl
575 adcl %%eax, %%ebx
576 loop 3b
577 adcl $0, %%ebx
578 4: movl $0, %%eax
579 testw $2, %%dx
580 je 5f
581 lodsw
582 addl %%eax, %%ebx
583 adcl $0, %%ebx
584 movw $0, %%ax
585 5: test $1, %%edx
586 je 6f
587 lodsb
588 addl %%eax, %%ebx
589 adcl $0, %%ebx
590 6: movl %%ebx, %%eax
591 shrl $16, %%eax
592 addw %%ax, %%bx
593 adcw $0, %%bx
594 "
595 : "=b"(sum)
596 : "0"(sum), "c"(len), "S"(th)
597 : "ax", "bx", "cx", "dx", "si" );
598
599 /* We only want the bottom 16 bits, but we never cleared the top 16. */
600
601 return((~sum) & 0xffff);
602 }
603
604
605
606 void tcp_send_check(struct tcphdr *th, unsigned long saddr,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
607 unsigned long daddr, int len, struct sock *sk)
608 {
609 th->check = 0;
610 th->check = tcp_check(th, len, saddr, daddr);
611 return;
612 }
613
614 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
615 {
616 int size;
617 struct tcphdr * th = skb->h.th;
618
619 /* length of packet (not counting length of pre-tcp headers) */
620 size = skb->len - ((unsigned char *) th - skb->data);
621
622 /* sanity check it.. */
623 if (size < sizeof(struct tcphdr) || size > skb->len)
624 {
625 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
626 skb, skb->data, th, skb->len);
627 kfree_skb(skb, FREE_WRITE);
628 return;
629 }
630
631 /* If we have queued a header size packet.. */
632 if (size == sizeof(struct tcphdr))
633 {
634 /* If its got a syn or fin its notionally included in the size..*/
635 if(!th->syn && !th->fin)
636 {
637 printk("tcp_send_skb: attempt to queue a bogon.\n");
638 kfree_skb(skb,FREE_WRITE);
639 return;
640 }
641 }
642
643 tcp_statistics.TcpOutSegs++;
644
645 skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
646 if (after(skb->h.seq, sk->window_seq) ||
647 (sk->retransmits && sk->timeout == TIME_WRITE) ||
648 sk->packets_out >= sk->cong_window)
649 {
650 /* checksum will be supplied by tcp_write_xmit. So
651 * we shouldn't need to set it at all. I'm being paraoid */
652 th->check = 0;
653 if (skb->next != NULL)
654 {
655 printk("tcp_send_partial: next != NULL\n");
656 skb_unlink(skb);
657 }
658 skb_queue_tail(&sk->write_queue, skb);
659 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
660 sk->send_head == NULL &&
661 sk->ack_backlog == 0)
662 reset_timer(sk, TIME_PROBE0, sk->rto);
663 }
664 else
665 {
666 th->ack_seq = ntohl(sk->acked_seq);
667 th->window = ntohs(tcp_select_window(sk));
668
669 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
670
671 sk->sent_seq = sk->write_seq;
672 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
673 }
674 }
675
676 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
677 {
678 struct sk_buff * skb;
679 unsigned long flags;
680
681 save_flags(flags);
682 cli();
683 skb = sk->partial;
684 if (skb)
685 {
686 sk->partial = NULL;
687 del_timer(&sk->partial_timer);
688 }
689 restore_flags(flags);
690 return skb;
691 }
692
693 static void tcp_send_partial(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
694 {
695 struct sk_buff *skb;
696
697 if (sk == NULL)
698 return;
699 while ((skb = tcp_dequeue_partial(sk)) != NULL)
700 tcp_send_skb(sk, skb);
701 }
702
703 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
704 {
705 struct sk_buff * tmp;
706 unsigned long flags;
707
708 save_flags(flags);
709 cli();
710 tmp = sk->partial;
711 if (tmp)
712 del_timer(&sk->partial_timer);
713 sk->partial = skb;
714 sk->partial_timer.expires = HZ;
715 sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
716 sk->partial_timer.data = (unsigned long) sk;
717 add_timer(&sk->partial_timer);
718 restore_flags(flags);
719 if (tmp)
720 tcp_send_skb(sk, tmp);
721 }
722
723
724 /*
725 * This routine sends an ack and also updates the window.
726 */
727
728 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
729 struct sock *sk,
730 struct tcphdr *th, unsigned long daddr)
731 {
732 struct sk_buff *buff;
733 struct tcphdr *t1;
734 struct device *dev = NULL;
735 int tmp;
736
737 if(sk->zapped)
738 return; /* We have been reset, we may not send again */
739 /*
740 * We need to grab some memory, and put together an ack,
741 * and then put it into the queue to be sent.
742 */
743
744 buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
745 if (buff == NULL)
746 {
747 /* Force it to send an ack. */
748 sk->ack_backlog++;
749 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state))
750 {
751 reset_timer(sk, TIME_WRITE, 10);
752 }
753 return;
754 }
755
756 buff->len = sizeof(struct tcphdr);
757 buff->sk = sk;
758 buff->localroute = sk->localroute;
759 t1 =(struct tcphdr *) buff->data;
760
761 /* Put in the IP header and routing stuff. */
762 tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
763 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
764 if (tmp < 0)
765 {
766 buff->free=1;
767 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
768 return;
769 }
770 buff->len += tmp;
771 t1 =(struct tcphdr *)((char *)t1 +tmp);
772
773 /* FIXME: */
774 memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
775
776 /*
777 * Swap the send and the receive.
778 */
779
780 t1->dest = th->source;
781 t1->source = th->dest;
782 t1->seq = ntohl(sequence);
783 t1->ack = 1;
784 sk->window = tcp_select_window(sk);
785 t1->window = ntohs(sk->window);
786 t1->res1 = 0;
787 t1->res2 = 0;
788 t1->rst = 0;
789 t1->urg = 0;
790 t1->syn = 0;
791 t1->psh = 0;
792 t1->fin = 0;
793 if (ack == sk->acked_seq)
794 {
795 sk->ack_backlog = 0;
796 sk->bytes_rcv = 0;
797 sk->ack_timed = 0;
798 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
799 && sk->timeout == TIME_WRITE)
800 {
801 if(sk->keepopen)
802 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
803 else
804 delete_timer(sk);
805 }
806 }
807 t1->ack_seq = ntohl(ack);
808 t1->doff = sizeof(*t1)/4;
809 tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
810 if (sk->debug)
811 printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
812 tcp_statistics.TcpOutSegs++;
813 sk->prot->queue_xmit(sk, dev, buff, 1);
814 }
815
816
817 /*
818 * This routine builds a generic TCP header.
819 */
820
821 static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
822 {
823
824 /* FIXME: want to get rid of this. */
825 memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
826 th->seq = htonl(sk->write_seq);
827 th->psh =(push == 0) ? 1 : 0;
828 th->doff = sizeof(*th)/4;
829 th->ack = 1;
830 th->fin = 0;
831 sk->ack_backlog = 0;
832 sk->bytes_rcv = 0;
833 sk->ack_timed = 0;
834 th->ack_seq = htonl(sk->acked_seq);
835 sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
836 th->window = htons(sk->window);
837
838 return(sizeof(*th));
839 }
840
841 /*
842 * This routine copies from a user buffer into a socket,
843 * and starts the transmit system.
844 */
845
846 static int tcp_write(struct sock *sk, unsigned char *from,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
847 int len, int nonblock, unsigned flags)
848 {
849 int copied = 0;
850 int copy;
851 int tmp;
852 struct sk_buff *skb;
853 struct sk_buff *send_tmp;
854 unsigned char *buff;
855 struct proto *prot;
856 struct device *dev = NULL;
857
858 sk->inuse=1;
859 prot = sk->prot;
860 while(len > 0)
861 {
862 if (sk->err)
863 { /* Stop on an error */
864 release_sock(sk);
865 if (copied)
866 return(copied);
867 tmp = -sk->err;
868 sk->err = 0;
869 return(tmp);
870 }
871
872 /*
873 * First thing we do is make sure that we are established.
874 */
875
876 if (sk->shutdown & SEND_SHUTDOWN)
877 {
878 release_sock(sk);
879 sk->err = EPIPE;
880 if (copied)
881 return(copied);
882 sk->err = 0;
883 return(-EPIPE);
884 }
885
886
887 /*
888 * Wait for a connection to finish.
889 */
890
891 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)
892 {
893 if (sk->err)
894 {
895 release_sock(sk);
896 if (copied)
897 return(copied);
898 tmp = -sk->err;
899 sk->err = 0;
900 return(tmp);
901 }
902
903 if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV)
904 {
905 release_sock(sk);
906 if (copied)
907 return(copied);
908
909 if (sk->err)
910 {
911 tmp = -sk->err;
912 sk->err = 0;
913 return(tmp);
914 }
915
916 if (sk->keepopen)
917 {
918 send_sig(SIGPIPE, current, 0);
919 }
920 return(-EPIPE);
921 }
922
923 if (nonblock || copied)
924 {
925 release_sock(sk);
926 if (copied)
927 return(copied);
928 return(-EAGAIN);
929 }
930
931 release_sock(sk);
932 cli();
933
934 if (sk->state != TCP_ESTABLISHED &&
935 sk->state != TCP_CLOSE_WAIT && sk->err == 0)
936 {
937 interruptible_sleep_on(sk->sleep);
938 if (current->signal & ~current->blocked)
939 {
940 sti();
941 if (copied)
942 return(copied);
943 return(-ERESTARTSYS);
944 }
945 }
946 sk->inuse = 1;
947 sti();
948 }
949
950 /*
951 * The following code can result in copy <= if sk->mss is ever
952 * decreased. It shouldn't be. sk->mss is min(sk->mtu, sk->max_window).
953 * sk->mtu is constant once SYN processing is finished. I.e. we
954 * had better not get here until we've seen his SYN and at least one
955 * valid ack. (The SYN sets sk->mtu and the ack sets sk->max_window.)
956 * But ESTABLISHED should guarantee that. sk->max_window is by definition
957 * non-decreasing. Note that any ioctl to set user_mss must be done
958 * before the exchange of SYN's. If the initial ack from the other
959 * end has a window of 0, max_window and thus mss will both be 0.
960 */
961
962 /*
963 * Now we need to check if we have a half built packet.
964 */
965
966 if ((skb = tcp_dequeue_partial(sk)) != NULL)
967 {
968 int hdrlen;
969
970 /* IP header + TCP header */
971 hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
972 + sizeof(struct tcphdr);
973
974 /* Add more stuff to the end of skb->len */
975 if (!(flags & MSG_OOB))
976 {
977 copy = min(sk->mss - (skb->len - hdrlen), len);
978 /* FIXME: this is really a bug. */
979 if (copy <= 0)
980 {
981 printk("TCP: **bug**: \"copy\" <= 0!!\n");
982 copy = 0;
983 }
984
985 memcpy_fromfs(skb->data + skb->len, from, copy);
986 skb->len += copy;
987 from += copy;
988 copied += copy;
989 len -= copy;
990 sk->write_seq += copy;
991 }
992 if ((skb->len - hdrlen) >= sk->mss ||
993 (flags & MSG_OOB) || !sk->packets_out)
994 tcp_send_skb(sk, skb);
995 else
996 tcp_enqueue_partial(skb, sk);
997 continue;
998 }
999
1000 /*
1001 * We also need to worry about the window.
1002 * If window < 1/2 the maximum window we've seen from this
1003 * host, don't use it. This is sender side
1004 * silly window prevention, as specified in RFC1122.
1005 * (Note that this is diffferent than earlier versions of
1006 * SWS prevention, e.g. RFC813.). What we actually do is
1007 * use the whole MSS. Since the results in the right
1008 * edge of the packet being outside the window, it will
1009 * be queued for later rather than sent.
1010 */
1011
1012 copy = sk->window_seq - sk->write_seq;
1013 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1014 copy = sk->mss;
1015 if (copy > len)
1016 copy = len;
1017
1018 /*
1019 * We should really check the window here also.
1020 */
1021
1022 send_tmp = NULL;
1023 if (copy < sk->mss && !(flags & MSG_OOB))
1024 {
1025 /*
1026 * We will release the socket incase we sleep here.
1027 */
1028 release_sock(sk);
1029 /*
1030 * NB: following must be mtu, because mss can be increased.
1031 * mss is always <= mtu
1032 */
1033 skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1034 sk->inuse = 1;
1035 send_tmp = skb;
1036 }
1037 else
1038 {
1039 /*
1040 * We will release the socket incase we sleep here.
1041 */
1042 release_sock(sk);
1043 skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1044 sk->inuse = 1;
1045 }
1046
1047 /*
1048 * If we didn't get any memory, we need to sleep.
1049 */
1050
1051 if (skb == NULL)
1052 {
1053 if (nonblock /* || copied */)
1054 {
1055 release_sock(sk);
1056 if (copied)
1057 return(copied);
1058 return(-EAGAIN);
1059 }
1060
1061 /*
1062 * FIXME: here is another race condition.
1063 */
1064
1065 tmp = sk->wmem_alloc;
1066 release_sock(sk);
1067 cli();
1068 /*
1069 * Again we will try to avoid it.
1070 */
1071 if (tmp <= sk->wmem_alloc &&
1072 (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1073 && sk->err == 0)
1074 {
1075 interruptible_sleep_on(sk->sleep);
1076 if (current->signal & ~current->blocked)
1077 {
1078 sti();
1079 if (copied)
1080 return(copied);
1081 return(-ERESTARTSYS);
1082 }
1083 }
1084 sk->inuse = 1;
1085 sti();
1086 continue;
1087 }
1088
1089 skb->len = 0;
1090 skb->sk = sk;
1091 skb->free = 0;
1092 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1093
1094 buff = skb->data;
1095
1096 /*
1097 * FIXME: we need to optimize this.
1098 * Perhaps some hints here would be good.
1099 */
1100
1101 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1102 IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1103 if (tmp < 0 )
1104 {
1105 prot->wfree(sk, skb->mem_addr, skb->mem_len);
1106 release_sock(sk);
1107 if (copied)
1108 return(copied);
1109 return(tmp);
1110 }
1111 skb->len += tmp;
1112 skb->dev = dev;
1113 buff += tmp;
1114 skb->h.th =(struct tcphdr *) buff;
1115 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1116 if (tmp < 0)
1117 {
1118 prot->wfree(sk, skb->mem_addr, skb->mem_len);
1119 release_sock(sk);
1120 if (copied)
1121 return(copied);
1122 return(tmp);
1123 }
1124
1125 if (flags & MSG_OOB)
1126 {
1127 ((struct tcphdr *)buff)->urg = 1;
1128 ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1129 }
1130 skb->len += tmp;
1131 memcpy_fromfs(buff+tmp, from, copy);
1132
1133 from += copy;
1134 copied += copy;
1135 len -= copy;
1136 skb->len += copy;
1137 skb->free = 0;
1138 sk->write_seq += copy;
1139
1140 if (send_tmp != NULL && sk->packets_out)
1141 {
1142 tcp_enqueue_partial(send_tmp, sk);
1143 continue;
1144 }
1145 tcp_send_skb(sk, skb);
1146 }
1147 sk->err = 0;
1148
1149 /*
1150 * Nagles rule. Turn Nagle off with TCP_NODELAY for highly
1151 * interactive fast network servers. It's meant to be on and
1152 * it really improves the throughput though not the echo time
1153 * on my slow slip link - Alan
1154 */
1155
1156 /*
1157 * Avoid possible race on send_tmp - c/o Johannes Stille
1158 */
1159
1160 if(sk->partial && ((!sk->packets_out)
1161 /* If not nagling we can send on the before case too.. */
1162 || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1163 ))
1164 tcp_send_partial(sk);
1165
1166 release_sock(sk);
1167 return(copied);
1168 }
1169
1170
1171 static int tcp_sendto(struct sock *sk, unsigned char *from,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1172 int len, int nonblock, unsigned flags,
1173 struct sockaddr_in *addr, int addr_len)
1174 {
1175 if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1176 return -EINVAL;
1177 if (addr_len < sizeof(*addr))
1178 return(-EINVAL);
1179 if (addr->sin_family && addr->sin_family != AF_INET)
1180 return(-EINVAL);
1181 if (addr->sin_port != sk->dummy_th.dest)
1182 return(-EISCONN);
1183 if (addr->sin_addr.s_addr != sk->daddr)
1184 return(-EISCONN);
1185 return(tcp_write(sk, from, len, nonblock, flags));
1186 }
1187
1188
1189 static void tcp_read_wakeup(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1190 {
1191 int tmp;
1192 struct device *dev = NULL;
1193 struct tcphdr *t1;
1194 struct sk_buff *buff;
1195
1196 if (!sk->ack_backlog)
1197 return;
1198
1199 /*
1200 * FIXME: we need to put code here to prevent this routine from
1201 * being called. Being called once in a while is ok, so only check
1202 * if this is the second time in a row.
1203 */
1204
1205 /*
1206 * We need to grab some memory, and put together an ack,
1207 * and then put it into the queue to be sent.
1208 */
1209
1210 buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1211 if (buff == NULL)
1212 {
1213 /* Try again real soon. */
1214 reset_timer(sk, TIME_WRITE, 10);
1215 return;
1216 }
1217
1218 buff->len = sizeof(struct tcphdr);
1219 buff->sk = sk;
1220 buff->localroute = sk->localroute;
1221
1222 /*
1223 * Put in the IP header and routing stuff.
1224 */
1225
1226 tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1227 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1228 if (tmp < 0)
1229 {
1230 buff->free=1;
1231 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1232 return;
1233 }
1234
1235 buff->len += tmp;
1236 t1 =(struct tcphdr *)(buff->data +tmp);
1237
1238 memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1239 t1->seq = htonl(sk->sent_seq);
1240 t1->ack = 1;
1241 t1->res1 = 0;
1242 t1->res2 = 0;
1243 t1->rst = 0;
1244 t1->urg = 0;
1245 t1->syn = 0;
1246 t1->psh = 0;
1247 sk->ack_backlog = 0;
1248 sk->bytes_rcv = 0;
1249 sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1250 t1->window = ntohs(sk->window);
1251 t1->ack_seq = ntohl(sk->acked_seq);
1252 t1->doff = sizeof(*t1)/4;
1253 tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1254 sk->prot->queue_xmit(sk, dev, buff, 1);
1255 tcp_statistics.TcpOutSegs++;
1256 }
1257
1258
1259 /*
1260 * FIXME:
1261 * This routine frees used buffers.
1262 * It should consider sending an ACK to let the
1263 * other end know we now have a bigger window.
1264 */
1265
1266 static void cleanup_rbuf(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1267 {
1268 unsigned long flags;
1269 unsigned long left;
1270 struct sk_buff *skb;
1271 unsigned long rspace;
1272
1273 if(sk->debug)
1274 printk("cleaning rbuf for sk=%p\n", sk);
1275
1276 save_flags(flags);
1277 cli();
1278
1279 left = sk->prot->rspace(sk);
1280
1281 /*
1282 * We have to loop through all the buffer headers,
1283 * and try to free up all the space we can.
1284 */
1285
1286 while((skb=skb_peek(&sk->receive_queue)) != NULL)
1287 {
1288 if (!skb->used)
1289 break;
1290 skb_unlink(skb);
1291 skb->sk = sk;
1292 kfree_skb(skb, FREE_READ);
1293 }
1294
1295 restore_flags(flags);
1296
1297 /*
1298 * FIXME:
1299 * At this point we should send an ack if the difference
1300 * in the window, and the amount of space is bigger than
1301 * TCP_WINDOW_DIFF.
1302 */
1303
1304 if(sk->debug)
1305 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1306 left);
1307 if ((rspace=sk->prot->rspace(sk)) != left)
1308 {
1309 /*
1310 * This area has caused the most trouble. The current strategy
1311 * is to simply do nothing if the other end has room to send at
1312 * least 3 full packets, because the ack from those will auto-
1313 * matically update the window. If the other end doesn't think
1314 * we have much space left, but we have room for atleast 1 more
1315 * complete packet than it thinks we do, we will send an ack
1316 * immediatedly. Otherwise we will wait up to .5 seconds in case
1317 * the user reads some more.
1318 */
1319 sk->ack_backlog++;
1320 /*
1321 * It's unclear whether to use sk->mtu or sk->mss here. They differ only
1322 * if the other end is offering a window smaller than the agreed on MSS
1323 * (called sk->mtu here). In theory there's no connection between send
1324 * and receive, and so no reason to think that they're going to send
1325 * small packets. For the moment I'm using the hack of reducing the mss
1326 * only on the send side, so I'm putting mtu here.
1327 */
1328
1329 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu))
1330 {
1331 /* Send an ack right now. */
1332 tcp_read_wakeup(sk);
1333 }
1334 else
1335 {
1336 /* Force it to send an ack soon. */
1337 int was_active = del_timer(&sk->timer);
1338 if (!was_active || TCP_ACK_TIME < sk->timer.expires)
1339 {
1340 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1341 }
1342 else
1343 add_timer(&sk->timer);
1344 }
1345 }
1346 }
1347
1348
1349 /*
1350 * Handle reading urgent data.
1351 */
1352
1353 static int tcp_read_urg(struct sock * sk, int nonblock,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1354 unsigned char *to, int len, unsigned flags)
1355 {
1356 struct wait_queue wait = { current, NULL };
1357
1358 while (len > 0)
1359 {
1360 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1361 return -EINVAL;
1362 if (sk->urg_data & URG_VALID)
1363 {
1364 char c = sk->urg_data;
1365 if (!(flags & MSG_PEEK))
1366 sk->urg_data = URG_READ;
1367 put_fs_byte(c, to);
1368 return 1;
1369 }
1370
1371 if (sk->err)
1372 {
1373 int tmp = -sk->err;
1374 sk->err = 0;
1375 return tmp;
1376 }
1377
1378 if (sk->state == TCP_CLOSE || sk->done)
1379 {
1380 if (!sk->done) {
1381 sk->done = 1;
1382 return 0;
1383 }
1384 return -ENOTCONN;
1385 }
1386
1387 if (sk->shutdown & RCV_SHUTDOWN)
1388 {
1389 sk->done = 1;
1390 return 0;
1391 }
1392
1393 if (nonblock)
1394 return -EAGAIN;
1395
1396 if (current->signal & ~current->blocked)
1397 return -ERESTARTSYS;
1398
1399 current->state = TASK_INTERRUPTIBLE;
1400 add_wait_queue(sk->sleep, &wait);
1401 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1402 !(sk->shutdown & RCV_SHUTDOWN))
1403 schedule();
1404 remove_wait_queue(sk->sleep, &wait);
1405 current->state = TASK_RUNNING;
1406 }
1407 return 0;
1408 }
1409
1410
1411 /*
1412 * This routine copies from a sock struct into the user buffer.
1413 */
1414
1415 static int tcp_read(struct sock *sk, unsigned char *to,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1416 int len, int nonblock, unsigned flags)
1417 {
1418 struct wait_queue wait = { current, NULL };
1419 int copied = 0;
1420 unsigned long peek_seq;
1421 unsigned long *seq;
1422 unsigned long used;
1423
1424 /* This error should be checked. */
1425 if (sk->state == TCP_LISTEN)
1426 return -ENOTCONN;
1427
1428 /* Urgent data needs to be handled specially. */
1429 if (flags & MSG_OOB)
1430 return tcp_read_urg(sk, nonblock, to, len, flags);
1431
1432 peek_seq = sk->copied_seq;
1433 seq = &sk->copied_seq;
1434 if (flags & MSG_PEEK)
1435 seq = &peek_seq;
1436
1437 add_wait_queue(sk->sleep, &wait);
1438 sk->inuse = 1;
1439 while (len > 0)
1440 {
1441 struct sk_buff * skb;
1442 unsigned long offset;
1443
1444 /*
1445 * are we at urgent data? Stop if we have read anything.
1446 */
1447 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1448 break;
1449
1450 current->state = TASK_INTERRUPTIBLE;
1451
1452 skb = skb_peek(&sk->receive_queue);
1453 do
1454 {
1455 if (!skb)
1456 break;
1457 if (before(1+*seq, skb->h.th->seq))
1458 break;
1459 offset = 1 + *seq - skb->h.th->seq;
1460 if (skb->h.th->syn)
1461 offset--;
1462 if (offset < skb->len)
1463 goto found_ok_skb;
1464 if (!(flags & MSG_PEEK))
1465 skb->used = 1;
1466 skb = skb->next;
1467 }
1468 while (skb != (struct sk_buff *)&sk->receive_queue);
1469
1470 if (copied)
1471 break;
1472
1473 if (sk->err)
1474 {
1475 copied = -sk->err;
1476 sk->err = 0;
1477 break;
1478 }
1479
1480 if (sk->state == TCP_CLOSE)
1481 {
1482 if (!sk->done)
1483 {
1484 sk->done = 1;
1485 break;
1486 }
1487 copied = -ENOTCONN;
1488 break;
1489 }
1490
1491 if (sk->shutdown & RCV_SHUTDOWN)
1492 {
1493 sk->done = 1;
1494 break;
1495 }
1496
1497 if (nonblock)
1498 {
1499 copied = -EAGAIN;
1500 break;
1501 }
1502
1503 cleanup_rbuf(sk);
1504 release_sock(sk);
1505 schedule();
1506 sk->inuse = 1;
1507
1508 if (current->signal & ~current->blocked)
1509 {
1510 copied = -ERESTARTSYS;
1511 break;
1512 }
1513 continue;
1514
1515 found_ok_skb:
1516 /* Ok so how much can we use ? */
1517 used = skb->len - offset;
1518 if (len < used)
1519 used = len;
1520 /* do we have urgent data here? */
1521 if (sk->urg_data)
1522 {
1523 unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1524 if (urg_offset < used)
1525 {
1526 if (!urg_offset)
1527 {
1528 if (!sk->urginline)
1529 {
1530 ++*seq;
1531 offset++;
1532 used--;
1533 }
1534 }
1535 else
1536 used = urg_offset;
1537 }
1538 }
1539 /* Copy it */
1540 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1541 skb->h.th->doff*4 + offset, used);
1542 copied += used;
1543 len -= used;
1544 to += used;
1545 *seq += used;
1546 if (after(sk->copied_seq+1,sk->urg_seq))
1547 sk->urg_data = 0;
1548 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1549 skb->used = 1;
1550 }
1551 remove_wait_queue(sk->sleep, &wait);
1552 current->state = TASK_RUNNING;
1553
1554 /* Clean up data we have read: This will do ACK frames */
1555 cleanup_rbuf(sk);
1556 release_sock(sk);
1557 return copied;
1558 }
1559
1560
1561 /*
1562 * Shutdown the sending side of a connection.
1563 */
1564
1565 void tcp_shutdown(struct sock *sk, int how)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1566 {
1567 struct sk_buff *buff;
1568 struct tcphdr *t1, *th;
1569 struct proto *prot;
1570 int tmp;
1571 struct device *dev = NULL;
1572
1573 /*
1574 * We need to grab some memory, and put together a FIN,
1575 * and then put it into the queue to be sent.
1576 * FIXME:
1577 *
1578 * Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1579 * Most of this is guesswork, so maybe it will work...
1580 */
1581
1582 if (!(how & SEND_SHUTDOWN))
1583 return;
1584
1585 /*
1586 * If we've already sent a FIN, return.
1587 */
1588
1589 if (sk->state == TCP_FIN_WAIT1 ||
1590 sk->state == TCP_FIN_WAIT2 ||
1591 sk->state == TCP_CLOSING ||
1592 sk->state == TCP_LAST_ACK ||
1593 sk->state == TCP_TIME_WAIT
1594 )
1595 {
1596 return;
1597 }
1598 sk->inuse = 1;
1599
1600 /*
1601 * flag that the sender has shutdown
1602 */
1603
1604 sk->shutdown |= SEND_SHUTDOWN;
1605
1606 /*
1607 * Clear out any half completed packets.
1608 */
1609
1610 if (sk->partial)
1611 tcp_send_partial(sk);
1612
1613 prot =(struct proto *)sk->prot;
1614 th =(struct tcphdr *)&sk->dummy_th;
1615 release_sock(sk); /* incase the malloc sleeps. */
1616 buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1617 if (buff == NULL)
1618 return;
1619 sk->inuse = 1;
1620
1621 buff->sk = sk;
1622 buff->len = sizeof(*t1);
1623 buff->localroute = sk->localroute;
1624 t1 =(struct tcphdr *) buff->data;
1625
1626 /*
1627 * Put in the IP header and routing stuff.
1628 */
1629
1630 tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1631 IPPROTO_TCP, sk->opt,
1632 sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1633 if (tmp < 0)
1634 {
1635 /*
1636 * Finish anyway, treat this as a send that got lost.
1637 *
1638 * Enter FIN_WAIT1 on normal shutdown, which waits for
1639 * written data to be completely acknowledged along
1640 * with an acknowledge to our FIN.
1641 *
1642 * Enter FIN_WAIT2 on abnormal shutdown -- close before
1643 * connection established.
1644 */
1645 buff->free=1;
1646 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1647
1648 if (sk->state == TCP_ESTABLISHED)
1649 sk->state = TCP_FIN_WAIT1;
1650 else if(sk->state == TCP_CLOSE_WAIT)
1651 sk->state = TCP_LAST_ACK;
1652 else
1653 sk->state = TCP_FIN_WAIT2;
1654
1655 release_sock(sk);
1656 return;
1657 }
1658
1659 t1 =(struct tcphdr *)((char *)t1 +tmp);
1660 buff->len += tmp;
1661 buff->dev = dev;
1662 memcpy(t1, th, sizeof(*t1));
1663 t1->seq = ntohl(sk->write_seq);
1664 sk->write_seq++;
1665 buff->h.seq = sk->write_seq;
1666 t1->ack = 1;
1667 t1->ack_seq = ntohl(sk->acked_seq);
1668 t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1669 t1->fin = 1;
1670 t1->rst = 0;
1671 t1->doff = sizeof(*t1)/4;
1672 tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1673
1674 /*
1675 * Can't just queue this up.
1676 * It should go at the end of the write queue.
1677 */
1678
1679 if (skb_peek(&sk->write_queue) != NULL)
1680 {
1681 buff->free=0;
1682 if (buff->next != NULL)
1683 {
1684 printk("tcp_shutdown: next != NULL\n");
1685 skb_unlink(buff);
1686 }
1687 skb_queue_tail(&sk->write_queue, buff);
1688 }
1689 else
1690 {
1691 sk->sent_seq = sk->write_seq;
1692 sk->prot->queue_xmit(sk, dev, buff, 0);
1693 }
1694
1695 if (sk->state == TCP_ESTABLISHED)
1696 sk->state = TCP_FIN_WAIT1;
1697 else if (sk->state == TCP_CLOSE_WAIT)
1698 sk->state = TCP_LAST_ACK;
1699 else
1700 sk->state = TCP_FIN_WAIT2;
1701
1702 release_sock(sk);
1703 }
1704
1705
1706 static int
1707 tcp_recvfrom(struct sock *sk, unsigned char *to,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1708 int to_len, int nonblock, unsigned flags,
1709 struct sockaddr_in *addr, int *addr_len)
1710 {
1711 int result;
1712
1713 /*
1714 * Have to check these first unlike the old code. If
1715 * we check them after we lose data on an error
1716 * which is wrong
1717 */
1718
1719 if(addr_len)
1720 *addr_len = sizeof(*addr);
1721 result=tcp_read(sk, to, to_len, nonblock, flags);
1722
1723 if (result < 0)
1724 return(result);
1725
1726 if(addr)
1727 {
1728 addr->sin_family = AF_INET;
1729 addr->sin_port = sk->dummy_th.dest;
1730 addr->sin_addr.s_addr = sk->daddr;
1731 }
1732 return(result);
1733 }
1734
1735
1736 /*
1737 * This routine will send an RST to the other tcp.
1738 */
1739
1740 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1741 struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1742 {
1743 struct sk_buff *buff;
1744 struct tcphdr *t1;
1745 int tmp;
1746 struct device *ndev=NULL;
1747
1748 /*
1749 * We need to grab some memory, and put together an RST,
1750 * and then put it into the queue to be sent.
1751 */
1752
1753 buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1754 if (buff == NULL)
1755 return;
1756
1757 buff->len = sizeof(*t1);
1758 buff->sk = NULL;
1759 buff->dev = dev;
1760 buff->localroute = 0;
1761
1762 t1 =(struct tcphdr *) buff->data;
1763
1764 /*
1765 * Put in the IP header and routing stuff.
1766 */
1767
1768 tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1769 sizeof(struct tcphdr),tos,ttl);
1770 if (tmp < 0)
1771 {
1772 buff->free = 1;
1773 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1774 return;
1775 }
1776
1777 t1 =(struct tcphdr *)((char *)t1 +tmp);
1778 buff->len += tmp;
1779 memcpy(t1, th, sizeof(*t1));
1780
1781 /*
1782 * Swap the send and the receive.
1783 */
1784
1785 t1->dest = th->source;
1786 t1->source = th->dest;
1787 t1->rst = 1;
1788 t1->window = 0;
1789
1790 if(th->ack)
1791 {
1792 t1->ack = 0;
1793 t1->seq = th->ack_seq;
1794 t1->ack_seq = 0;
1795 }
1796 else
1797 {
1798 t1->ack = 1;
1799 if(!th->syn)
1800 t1->ack_seq=htonl(th->seq);
1801 else
1802 t1->ack_seq=htonl(th->seq+1);
1803 t1->seq=0;
1804 }
1805
1806 t1->syn = 0;
1807 t1->urg = 0;
1808 t1->fin = 0;
1809 t1->psh = 0;
1810 t1->doff = sizeof(*t1)/4;
1811 tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1812 prot->queue_xmit(NULL, dev, buff, 1);
1813 tcp_statistics.TcpOutSegs++;
1814 }
1815
1816
1817 /*
1818 * Look for tcp options. Parses everything but only knows about MSS.
1819 * This routine is always called with the packet containing the SYN.
1820 * However it may also be called with the ack to the SYN. So you
1821 * can't assume this is always the SYN. It's always called after
1822 * we have set up sk->mtu to our own MTU.
1823 */
1824
1825 static void tcp_options(struct sock *sk, struct tcphdr *th)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1826 {
1827 unsigned char *ptr;
1828 int length=(th->doff*4)-sizeof(struct tcphdr);
1829 int mss_seen = 0;
1830
1831 ptr = (unsigned char *)(th + 1);
1832
1833 while(length>0)
1834 {
1835 int opcode=*ptr++;
1836 int opsize=*ptr++;
1837 switch(opcode)
1838 {
1839 case TCPOPT_EOL:
1840 return;
1841 case TCPOPT_NOP:
1842 length-=2;
1843 continue;
1844
1845 default:
1846 if(opsize<=2) /* Avoid silly options looping forever */
1847 return;
1848 switch(opcode)
1849 {
1850 case TCPOPT_MSS:
1851 if(opsize==4 && th->syn)
1852 {
1853 sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1854 mss_seen = 1;
1855 }
1856 break;
1857 /* Add other options here as people feel the urge to implement stuff like large windows */
1858 }
1859 ptr+=opsize-2;
1860 length-=opsize;
1861 }
1862 }
1863 if (th->syn)
1864 {
1865 if (! mss_seen)
1866 sk->mtu=min(sk->mtu, 536); /* default MSS if none sent */
1867 }
1868 #ifdef CONFIG_INET_PCTCP
1869 sk->mss = min(sk->max_window >> 1, sk->mtu);
1870 #else
1871 sk->mss = min(sk->max_window, sk->mtu);
1872 #endif
1873 }
1874
1875 static inline unsigned long default_mask(unsigned long dst)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1876 {
1877 dst = ntohl(dst);
1878 if (IN_CLASSA(dst))
1879 return htonl(IN_CLASSA_NET);
1880 if (IN_CLASSB(dst))
1881 return htonl(IN_CLASSB_NET);
1882 return htonl(IN_CLASSC_NET);
1883 }
1884
1885 /*
1886 * This routine handles a connection request.
1887 * It should make sure we haven't already responded.
1888 * Because of the way BSD works, we have to send a syn/ack now.
1889 * This also means it will be harder to close a socket which is
1890 * listening.
1891 */
1892
1893 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1894 unsigned long daddr, unsigned long saddr,
1895 struct options *opt, struct device *dev)
1896 {
1897 struct sk_buff *buff;
1898 struct tcphdr *t1;
1899 unsigned char *ptr;
1900 struct sock *newsk;
1901 struct tcphdr *th;
1902 struct device *ndev=NULL;
1903 int tmp;
1904 struct rtable *rt;
1905
1906 th = skb->h.th;
1907
1908 /* If the socket is dead, don't accept the connection. */
1909 if (!sk->dead)
1910 {
1911 sk->data_ready(sk,0);
1912 }
1913 else
1914 {
1915 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1916 tcp_statistics.TcpAttemptFails++;
1917 kfree_skb(skb, FREE_READ);
1918 return;
1919 }
1920
1921 /*
1922 * Make sure we can accept more. This will prevent a
1923 * flurry of syns from eating up all our memory.
1924 */
1925
1926 if (sk->ack_backlog >= sk->max_ack_backlog)
1927 {
1928 tcp_statistics.TcpAttemptFails++;
1929 kfree_skb(skb, FREE_READ);
1930 return;
1931 }
1932
1933 /*
1934 * We need to build a new sock struct.
1935 * It is sort of bad to have a socket without an inode attached
1936 * to it, but the wake_up's will just wake up the listening socket,
1937 * and if the listening socket is destroyed before this is taken
1938 * off of the queue, this will take care of it.
1939 */
1940
1941 newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1942 if (newsk == NULL)
1943 {
1944 /* just ignore the syn. It will get retransmitted. */
1945 tcp_statistics.TcpAttemptFails++;
1946 kfree_skb(skb, FREE_READ);
1947 return;
1948 }
1949
1950 memcpy(newsk, sk, sizeof(*newsk));
1951 skb_queue_head_init(&newsk->write_queue);
1952 skb_queue_head_init(&newsk->receive_queue);
1953 newsk->send_head = NULL;
1954 newsk->send_tail = NULL;
1955 skb_queue_head_init(&newsk->back_log);
1956 newsk->rtt = 0; /*TCP_CONNECT_TIME<<3*/
1957 newsk->rto = TCP_TIMEOUT_INIT;
1958 newsk->mdev = 0;
1959 newsk->max_window = 0;
1960 newsk->cong_window = 1;
1961 newsk->cong_count = 0;
1962 newsk->ssthresh = 0;
1963 newsk->backoff = 0;
1964 newsk->blog = 0;
1965 newsk->intr = 0;
1966 newsk->proc = 0;
1967 newsk->done = 0;
1968 newsk->partial = NULL;
1969 newsk->pair = NULL;
1970 newsk->wmem_alloc = 0;
1971 newsk->rmem_alloc = 0;
1972 newsk->localroute = sk->localroute;
1973
1974 newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1975
1976 newsk->err = 0;
1977 newsk->shutdown = 0;
1978 newsk->ack_backlog = 0;
1979 newsk->acked_seq = skb->h.th->seq+1;
1980 newsk->fin_seq = skb->h.th->seq;
1981 newsk->copied_seq = skb->h.th->seq;
1982 newsk->state = TCP_SYN_RECV;
1983 newsk->timeout = 0;
1984 newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
1985 newsk->window_seq = newsk->write_seq;
1986 newsk->rcv_ack_seq = newsk->write_seq;
1987 newsk->urg_data = 0;
1988 newsk->retransmits = 0;
1989 newsk->destroy = 0;
1990 newsk->timer.data = (unsigned long)newsk;
1991 newsk->timer.function = &net_timer;
1992 newsk->dummy_th.source = skb->h.th->dest;
1993 newsk->dummy_th.dest = skb->h.th->source;
1994
1995 /*
1996 * Swap these two, they are from our point of view.
1997 */
1998
1999 newsk->daddr = saddr;
2000 newsk->saddr = daddr;
2001
2002 put_sock(newsk->num,newsk);
2003 newsk->dummy_th.res1 = 0;
2004 newsk->dummy_th.doff = 6;
2005 newsk->dummy_th.fin = 0;
2006 newsk->dummy_th.syn = 0;
2007 newsk->dummy_th.rst = 0;
2008 newsk->dummy_th.psh = 0;
2009 newsk->dummy_th.ack = 0;
2010 newsk->dummy_th.urg = 0;
2011 newsk->dummy_th.res2 = 0;
2012 newsk->acked_seq = skb->h.th->seq + 1;
2013 newsk->copied_seq = skb->h.th->seq;
2014
2015 /*
2016 * Grab the ttl and tos values and use them
2017 */
2018
2019 newsk->ip_ttl=sk->ip_ttl;
2020 newsk->ip_tos=skb->ip_hdr->tos;
2021
2022 /*
2023 * Use 512 or whatever user asked for
2024 */
2025
2026 /*
2027 * Note use of sk->user_mss, since user has no direct access to newsk
2028 */
2029
2030 rt=ip_rt_route(saddr, NULL,NULL);
2031
2032 if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2033 sk->window_clamp=rt->rt_window;
2034 else
2035 sk->window_clamp=0;
2036
2037 if (sk->user_mss)
2038 newsk->mtu = sk->user_mss;
2039 else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2040 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2041 else
2042 {
2043 #ifdef CONFIG_INET_SNARL /* Sub Nets ARe Local */
2044 if ((saddr ^ daddr) & default_mask(saddr))
2045 #else
2046 if ((saddr ^ daddr) & dev->pa_mask)
2047 #endif
2048 newsk->mtu = 576 - HEADER_SIZE;
2049 else
2050 newsk->mtu = MAX_WINDOW;
2051 }
2052
2053 /*
2054 * But not bigger than device MTU
2055 */
2056
2057 newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2058
2059 /*
2060 * This will min with what arrived in the packet
2061 */
2062
2063 tcp_options(newsk,skb->h.th);
2064
2065 buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2066 if (buff == NULL)
2067 {
2068 sk->err = -ENOMEM;
2069 newsk->dead = 1;
2070 release_sock(newsk);
2071 kfree_skb(skb, FREE_READ);
2072 tcp_statistics.TcpAttemptFails++;
2073 return;
2074 }
2075
2076 buff->len = sizeof(struct tcphdr)+4;
2077 buff->sk = newsk;
2078 buff->localroute = newsk->localroute;
2079
2080 t1 =(struct tcphdr *) buff->data;
2081
2082 /*
2083 * Put in the IP header and routing stuff.
2084 */
2085
2086 tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2087 IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2088
2089 /*
2090 * Something went wrong.
2091 */
2092
2093 if (tmp < 0)
2094 {
2095 sk->err = tmp;
2096 buff->free=1;
2097 kfree_skb(buff,FREE_WRITE);
2098 newsk->dead = 1;
2099 release_sock(newsk);
2100 skb->sk = sk;
2101 kfree_skb(skb, FREE_READ);
2102 tcp_statistics.TcpAttemptFails++;
2103 return;
2104 }
2105
2106 buff->len += tmp;
2107 t1 =(struct tcphdr *)((char *)t1 +tmp);
2108
2109 memcpy(t1, skb->h.th, sizeof(*t1));
2110 buff->h.seq = newsk->write_seq;
2111 /*
2112 * Swap the send and the receive.
2113 */
2114 t1->dest = skb->h.th->source;
2115 t1->source = newsk->dummy_th.source;
2116 t1->seq = ntohl(newsk->write_seq++);
2117 t1->ack = 1;
2118 newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
2119 newsk->sent_seq = newsk->write_seq;
2120 t1->window = ntohs(newsk->window);
2121 t1->res1 = 0;
2122 t1->res2 = 0;
2123 t1->rst = 0;
2124 t1->urg = 0;
2125 t1->psh = 0;
2126 t1->syn = 1;
2127 t1->ack_seq = ntohl(skb->h.th->seq+1);
2128 t1->doff = sizeof(*t1)/4+1;
2129 ptr =(unsigned char *)(t1+1);
2130 ptr[0] = 2;
2131 ptr[1] = 4;
2132 ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2133 ptr[3] =(newsk->mtu) & 0xff;
2134
2135 tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2136 newsk->prot->queue_xmit(newsk, dev, buff, 0);
2137
2138 reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_TIMEOUT_INIT);
2139 skb->sk = newsk;
2140
2141 /*
2142 * Charge the sock_buff to newsk.
2143 */
2144
2145 sk->rmem_alloc -= skb->mem_len;
2146 newsk->rmem_alloc += skb->mem_len;
2147
2148 skb_queue_tail(&sk->receive_queue,skb);
2149 sk->ack_backlog++;
2150 release_sock(newsk);
2151 tcp_statistics.TcpOutSegs++;
2152 }
2153
2154
2155 static void tcp_close(struct sock *sk, int timeout)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
2156 {
2157 struct sk_buff *buff;
2158 int need_reset = 0;
2159 struct tcphdr *t1, *th;
2160 struct proto *prot;
2161 struct device *dev=NULL;
2162 int tmp;
2163
2164 /*
2165 * We need to grab some memory, and put together a FIN,
2166 * and then put it into the queue to be sent.
2167 */
2168 sk->inuse = 1;
2169 sk->keepopen = 1;
2170 sk->shutdown = SHUTDOWN_MASK;
2171
2172 if (!sk->dead)
2173 sk->state_change(sk);
2174
2175 /*
2176 * We need to flush the recv. buffs.
2177 */
2178
2179 if (skb_peek(&sk->receive_queue) != NULL)
2180 {
2181 struct sk_buff *skb;
2182 if(sk->debug)
2183 printk("Clean rcv queue\n");
2184 while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2185 {
2186 /* The +1 is not needed because the FIN takes up sequence space and
2187 is not read!!! */
2188 if(skb->len > 0 && after(skb->h.th->seq + skb->len/* + 1 */ , sk->copied_seq))
2189 need_reset = 1;
2190 kfree_skb(skb, FREE_READ);
2191 }
2192 if(sk->debug)
2193 printk("Cleaned.\n");
2194 }
2195
2196 /*
2197 * Get rid off any half-completed packets.
2198 */
2199
2200 if (sk->partial)
2201 {
2202 tcp_send_partial(sk);
2203 }
2204
2205 switch(sk->state)
2206 {
2207 case TCP_FIN_WAIT1:
2208 case TCP_FIN_WAIT2:
2209 case TCP_CLOSING:
2210 /*
2211 * These states occur when we have already closed out
2212 * our end. If there is no timeout, we do not do
2213 * anything. We may still be in the middle of sending
2214 * the remainder of our buffer, for example...
2215 * resetting the timer would be inappropriate.
2216 *
2217 * XXX if retransmit count reaches limit, is tcp_close()
2218 * called with timeout == 1 ? if not, we need to fix that.
2219 */
2220 #ifdef NOTDEF
2221 /*
2222 * Start a timer.
2223 * original code was 4 * sk->rtt. In converting to the
2224 * new rtt representation, we can't quite use that.
2225 * it seems to make most sense to use the backed off value
2226 */
2227 reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2228 #endif
2229 if (timeout)
2230 tcp_time_wait(sk);
2231 release_sock(sk);
2232 return; /* break causes a double release - messy */
2233 case TCP_TIME_WAIT:
2234 case TCP_LAST_ACK:
2235 /*
2236 * A timeout from these states terminates the TCB.
2237 */
2238 if (timeout)
2239 {
2240 sk->state = TCP_CLOSE;
2241 }
2242 release_sock(sk);
2243 return;
2244 case TCP_LISTEN:
2245 sk->state = TCP_CLOSE;
2246 release_sock(sk);
2247 return;
2248 case TCP_CLOSE:
2249 release_sock(sk);
2250 return;
2251 case TCP_CLOSE_WAIT:
2252 case TCP_ESTABLISHED:
2253 case TCP_SYN_SENT:
2254 case TCP_SYN_RECV:
2255 prot =(struct proto *)sk->prot;
2256 th =(struct tcphdr *)&sk->dummy_th;
2257 buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2258 if (buff == NULL)
2259 {
2260 /* This will force it to try again later. */
2261 /* Or it would have if someone released the socket
2262 first. Anyway it might work now */
2263 release_sock(sk);
2264 if (sk->state != TCP_CLOSE_WAIT)
2265 sk->state = TCP_ESTABLISHED;
2266 reset_timer(sk, TIME_CLOSE, 100);
2267 return;
2268 }
2269 buff->sk = sk;
2270 buff->free = 1;
2271 buff->len = sizeof(*t1);
2272 buff->localroute = sk->localroute;
2273 t1 =(struct tcphdr *) buff->data;
2274
2275 /*
2276 * Put in the IP header and routing stuff.
2277 */
2278 tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2279 IPPROTO_TCP, sk->opt,
2280 sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2281 if (tmp < 0)
2282 {
2283 sk->write_seq++; /* Very important 8) */
2284 kfree_skb(buff,FREE_WRITE);
2285
2286 /*
2287 * Enter FIN_WAIT1 to await completion of
2288 * written out data and ACK to our FIN.
2289 */
2290
2291 if(sk->state==TCP_ESTABLISHED)
2292 sk->state=TCP_FIN_WAIT1;
2293 else
2294 sk->state=TCP_FIN_WAIT2;
2295 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2296 if(timeout)
2297 tcp_time_wait(sk);
2298
2299 release_sock(sk);
2300 return;
2301 }
2302
2303 t1 =(struct tcphdr *)((char *)t1 +tmp);
2304 buff->len += tmp;
2305 buff->dev = dev;
2306 memcpy(t1, th, sizeof(*t1));
2307 t1->seq = ntohl(sk->write_seq);
2308 sk->write_seq++;
2309 buff->h.seq = sk->write_seq;
2310 t1->ack = 1;
2311
2312 /*
2313 * Ack everything immediately from now on.
2314 */
2315
2316 sk->delay_acks = 0;
2317 t1->ack_seq = ntohl(sk->acked_seq);
2318 t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2319 t1->fin = 1;
2320 t1->rst = need_reset;
2321 t1->doff = sizeof(*t1)/4;
2322 tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2323
2324 tcp_statistics.TcpOutSegs++;
2325
2326 if (skb_peek(&sk->write_queue) == NULL)
2327 {
2328 sk->sent_seq = sk->write_seq;
2329 prot->queue_xmit(sk, dev, buff, 0);
2330 }
2331 else
2332 {
2333 reset_timer(sk, TIME_WRITE, sk->rto);
2334 if (buff->next != NULL)
2335 {
2336 printk("tcp_close: next != NULL\n");
2337 skb_unlink(buff);
2338 }
2339 skb_queue_tail(&sk->write_queue, buff);
2340 }
2341
2342 /*
2343 * If established (normal close), enter FIN_WAIT1.
2344 * If in CLOSE_WAIT, enter LAST_ACK
2345 * If in CLOSING, remain in CLOSING
2346 * otherwise enter FIN_WAIT2
2347 */
2348
2349 if (sk->state == TCP_ESTABLISHED)
2350 sk->state = TCP_FIN_WAIT1;
2351 else if (sk->state == TCP_CLOSE_WAIT)
2352 sk->state = TCP_LAST_ACK;
2353 else if (sk->state != TCP_CLOSING)
2354 sk->state = TCP_FIN_WAIT2;
2355 }
2356 release_sock(sk);
2357 }
2358
2359
2360 /*
2361 * This routine takes stuff off of the write queue,
2362 * and puts it in the xmit queue.
2363 */
2364 static void
2365 tcp_write_xmit(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
2366 {
2367 struct sk_buff *skb;
2368
2369 /*
2370 * The bytes will have to remain here. In time closedown will
2371 * empty the write queue and all will be happy
2372 */
2373
2374 if(sk->zapped)
2375 return;
2376
2377 while((skb = skb_peek(&sk->write_queue)) != NULL &&
2378 before(skb->h.seq, sk->window_seq + 1) &&
2379 (sk->retransmits == 0 ||
2380 sk->timeout != TIME_WRITE ||
2381 before(skb->h.seq, sk->rcv_ack_seq + 1))
2382 && sk->packets_out < sk->cong_window)
2383 {
2384 IS_SKB(skb);
2385 skb_unlink(skb);
2386 /* See if we really need to send the packet. */
2387 if (before(skb->h.seq, sk->rcv_ack_seq +1))
2388 {
2389 sk->retransmits = 0;
2390 kfree_skb(skb, FREE_WRITE);
2391 if (!sk->dead)
2392 sk->write_space(sk);
2393 }
2394 else
2395 {
2396 struct tcphdr *th;
2397 struct iphdr *iph;
2398 int size;
2399 /*
2400 * put in the ack seq and window at this point rather than earlier,
2401 * in order to keep them monotonic. We really want to avoid taking
2402 * back window allocations. That's legal, but RFC1122 says it's frowned on.
2403 * Ack and window will in general have changed since this packet was put
2404 * on the write queue.
2405 */
2406 iph = (struct iphdr *)(skb->data +
2407 skb->dev->hard_header_len);
2408 th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2409 size = skb->len - (((unsigned char *) th) - skb->data);
2410
2411 th->ack_seq = ntohl(sk->acked_seq);
2412 th->window = ntohs(tcp_select_window(sk));
2413
2414 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2415
2416 sk->sent_seq = skb->h.seq;
2417 sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2418 }
2419 }
2420 }
2421
2422
2423 /*
2424 * This routine sorts the send list, and resets the
2425 * sk->send_head and sk->send_tail pointers.
2426 */
2427
2428 static void sort_send(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
2429 {
2430 struct sk_buff *list = NULL;
2431 struct sk_buff *skb,*skb2,*skb3;
2432
2433 for (skb = sk->send_head; skb != NULL; skb = skb2)
2434 {
2435 skb2 = skb->link3;
2436 if (list == NULL || before (skb2->h.seq, list->h.seq))
2437 {
2438 skb->link3 = list;
2439 sk->send_tail = skb;
2440 list = skb;
2441 }
2442 else
2443 {
2444 for (skb3 = list; ; skb3 = skb3->link3)
2445 {
2446 if (skb3->link3 == NULL ||
2447 before(skb->h.seq, skb3->link3->h.seq))
2448 {
2449 skb->link3 = skb3->link3;
2450 skb3->link3 = skb;
2451 if (skb->link3 == NULL)
2452 sk->send_tail = skb;
2453 break;
2454 }
2455 }
2456 }
2457 }
2458 sk->send_head = list;
2459 }
2460
2461
2462 /*
2463 * This routine deals with incoming acks, but not outgoing ones.
2464 */
2465
2466 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
2467 {
2468 unsigned long ack;
2469 int flag = 0;
2470
2471 /*
2472 * 1 - there was data in packet as well as ack or new data is sent or
2473 * in shutdown state
2474 * 2 - data from retransmit queue was acked and removed
2475 * 4 - window shrunk or data from retransmit queue was acked and removed
2476 */
2477
2478 if(sk->zapped)
2479 return(1); /* Dead, cant ack any more so why bother */
2480
2481 ack = ntohl(th->ack_seq);
2482 if (ntohs(th->window) > sk->max_window)
2483 {
2484 sk->max_window = ntohs(th->window);
2485 #ifdef CONFIG_INET_PCTCP
2486 sk->mss = min(sk->max_window>>1, sk->mtu);
2487 #else
2488 sk->mss = min(sk->max_window, sk->mtu);
2489 #endif
2490 }
2491
2492 if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2493 sk->retransmits = 0;
2494
2495 #if 0
2496 /*
2497 * Not quite clear why the +1 and -1 here, and why not +1 in next line
2498 */
2499
2500 if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1))
2501 #else
2502 if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq))
2503 #endif
2504 {
2505 if(sk->debug)
2506 printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2507 if (after(ack, sk->sent_seq) ||
2508 (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT))
2509 {
2510 return(0);
2511 }
2512 if (sk->keepopen)
2513 {
2514 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2515 }
2516 return(1);
2517 }
2518
2519 if (len != th->doff*4)
2520 flag |= 1;
2521
2522 /* See if our window has been shrunk. */
2523
2524 if (after(sk->window_seq, ack+ntohs(th->window)))
2525 {
2526 /*
2527 * We may need to move packets from the send queue
2528 * to the write queue, if the window has been shrunk on us.
2529 * The RFC says you are not allowed to shrink your window
2530 * like this, but if the other end does, you must be able
2531 * to deal with it.
2532 */
2533 struct sk_buff *skb;
2534 struct sk_buff *skb2;
2535 struct sk_buff *wskb = NULL;
2536
2537 skb2 = sk->send_head;
2538 sk->send_head = NULL;
2539 sk->send_tail = NULL;
2540
2541 flag |= 4;
2542
2543 sk->window_seq = ack + ntohs(th->window);
2544 cli();
2545 while (skb2 != NULL)
2546 {
2547 skb = skb2;
2548 skb2 = skb->link3;
2549 skb->link3 = NULL;
2550 if (after(skb->h.seq, sk->window_seq))
2551 {
2552 if (sk->packets_out > 0)
2553 sk->packets_out--;
2554 /* We may need to remove this from the dev send list. */
2555 if (skb->next != NULL)
2556 {
2557 skb_unlink(skb);
2558 }
2559 /* Now add it to the write_queue. */
2560 if (wskb == NULL)
2561 skb_queue_head(&sk->write_queue,skb);
2562 else
2563 skb_append(wskb,skb);
2564 wskb = skb;
2565 }
2566 else
2567 {
2568 if (sk->send_head == NULL)
2569 {
2570 sk->send_head = skb;
2571 sk->send_tail = skb;
2572 }
2573 else
2574 {
2575 sk->send_tail->link3 = skb;
2576 sk->send_tail = skb;
2577 }
2578 skb->link3 = NULL;
2579 }
2580 }
2581 sti();
2582 }
2583
2584 if (sk->send_tail == NULL || sk->send_head == NULL)
2585 {
2586 sk->send_head = NULL;
2587 sk->send_tail = NULL;
2588 sk->packets_out= 0;
2589 }
2590
2591 sk->window_seq = ack + ntohs(th->window);
2592
2593 /* We don't want too many packets out there. */
2594 if (sk->timeout == TIME_WRITE &&
2595 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq))
2596 {
2597 /*
2598 * This is Jacobson's slow start and congestion avoidance.
2599 * SIGCOMM '88, p. 328. Because we keep cong_window in integral
2600 * mss's, we can't do cwnd += 1 / cwnd. Instead, maintain a
2601 * counter and increment it once every cwnd times. It's possible
2602 * that this should be done only if sk->retransmits == 0. I'm
2603 * interpreting "new data is acked" as including data that has
2604 * been retransmitted but is just now being acked.
2605 */
2606 if (sk->cong_window < sk->ssthresh)
2607 /*
2608 * In "safe" area, increase
2609 */
2610 sk->cong_window++;
2611 else
2612 {
2613 /*
2614 * In dangerous area, increase slowly. In theory this is
2615 * sk->cong_window += 1 / sk->cong_window
2616 */
2617 if (sk->cong_count >= sk->cong_window)
2618 {
2619 sk->cong_window++;
2620 sk->cong_count = 0;
2621 }
2622 else
2623 sk->cong_count++;
2624 }
2625 }
2626
2627 sk->rcv_ack_seq = ack;
2628
2629 /*
2630 * if this ack opens up a zero window, clear backoff. It was
2631 * being used to time the probes, and is probably far higher than
2632 * it needs to be for normal retransmission.
2633 */
2634
2635 if (sk->timeout == TIME_PROBE0)
2636 {
2637 if (skb_peek(&sk->write_queue) != NULL && /* should always be non-null */
2638 ! before (sk->window_seq, sk->write_queue.next->h.seq))
2639 {
2640 sk->retransmits = 0;
2641 sk->backoff = 0;
2642 /*
2643 * Recompute rto from rtt. this eliminates any backoff.
2644 */
2645
2646 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2647 if (sk->rto > 120*HZ)
2648 sk->rto = 120*HZ;
2649 if (sk->rto < 20) /* Was 1*HZ, then 1 - turns out we must allow about
2650 .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2651 .2 of a second is going to need huge windows (SIGH) */
2652 sk->rto = 20;
2653 }
2654 }
2655
2656 /*
2657 * See if we can take anything off of the retransmit queue.
2658 */
2659
2660 while(sk->send_head != NULL)
2661 {
2662 /* Check for a bug. */
2663 if (sk->send_head->link3 &&
2664 after(sk->send_head->h.seq, sk->send_head->link3->h.seq))
2665 {
2666 printk("INET: tcp.c: *** bug send_list out of order.\n");
2667 sort_send(sk);
2668 }
2669
2670 if (before(sk->send_head->h.seq, ack+1))
2671 {
2672 struct sk_buff *oskb;
2673 if (sk->retransmits)
2674 {
2675 /*
2676 * We were retransmitting. don't count this in RTT est
2677 */
2678 flag |= 2;
2679
2680 /*
2681 * even though we've gotten an ack, we're still
2682 * retransmitting as long as we're sending from
2683 * the retransmit queue. Keeping retransmits non-zero
2684 * prevents us from getting new data interspersed with
2685 * retransmissions.
2686 */
2687
2688 if (sk->send_head->link3)
2689 sk->retransmits = 1;
2690 else
2691 sk->retransmits = 0;
2692 }
2693 /*
2694 * Note that we only reset backoff and rto in the
2695 * rtt recomputation code. And that doesn't happen
2696 * if there were retransmissions in effect. So the
2697 * first new packet after the retransmissions is
2698 * sent with the backoff still in effect. Not until
2699 * we get an ack from a non-retransmitted packet do
2700 * we reset the backoff and rto. This allows us to deal
2701 * with a situation where the network delay has increased
2702 * suddenly. I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2703 */
2704
2705 /*
2706 * We have one less packet out there.
2707 */
2708
2709 if (sk->packets_out > 0)
2710 sk->packets_out --;
2711 /*
2712 * Wake up the process, it can probably write more.
2713 */
2714 if (!sk->dead)
2715 sk->write_space(sk);
2716 oskb = sk->send_head;
2717
2718 if (!(flag&2))
2719 {
2720 long m;
2721
2722 /*
2723 * The following amusing code comes from Jacobson's
2724 * article in SIGCOMM '88. Note that rtt and mdev
2725 * are scaled versions of rtt and mean deviation.
2726 * This is designed to be as fast as possible
2727 * m stands for "measurement".
2728 */
2729
2730 m = jiffies - oskb->when; /* RTT */
2731 if(m<=0)
2732 m=1; /* IS THIS RIGHT FOR <0 ??? */
2733 m -= (sk->rtt >> 3); /* m is now error in rtt est */
2734 sk->rtt += m; /* rtt = 7/8 rtt + 1/8 new */
2735 if (m < 0)
2736 m = -m; /* m is now abs(error) */
2737 m -= (sk->mdev >> 2); /* similar update on mdev */
2738 sk->mdev += m; /* mdev = 3/4 mdev + 1/4 new */
2739
2740 /*
2741 * Now update timeout. Note that this removes any backoff.
2742 */
2743
2744 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2745 if (sk->rto > 120*HZ)
2746 sk->rto = 120*HZ;
2747 if (sk->rto < 20) /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2748 sk->rto = 20;
2749 sk->backoff = 0;
2750 }
2751 flag |= (2|4);
2752 cli();
2753 oskb = sk->send_head;
2754 IS_SKB(oskb);
2755 sk->send_head = oskb->link3;
2756 if (sk->send_head == NULL)
2757 {
2758 sk->send_tail = NULL;
2759 }
2760
2761 /*
2762 * We may need to remove this from the dev send list.
2763 */
2764
2765 if (oskb->next)
2766 skb_unlink(oskb);
2767 sti();
2768 kfree_skb(oskb, FREE_WRITE); /* write. */
2769 if (!sk->dead)
2770 sk->write_space(sk);
2771 }
2772 else
2773 {
2774 break;
2775 }
2776 }
2777
2778 /*
2779 * Maybe we can take some stuff off of the write queue,
2780 * and put it onto the xmit queue.
2781 */
2782 if (skb_peek(&sk->write_queue) != NULL)
2783 {
2784 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2785 (sk->retransmits == 0 ||
2786 sk->timeout != TIME_WRITE ||
2787 before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2788 && sk->packets_out < sk->cong_window)
2789 {
2790 flag |= 1;
2791 tcp_write_xmit(sk);
2792 }
2793 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2794 sk->send_head == NULL &&
2795 sk->ack_backlog == 0 &&
2796 sk->state != TCP_TIME_WAIT)
2797 {
2798 reset_timer(sk, TIME_PROBE0, sk->rto);
2799 }
2800 }
2801 else
2802 {
2803 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2804 sk->state != TCP_TIME_WAIT && !sk->keepopen)
2805 {
2806 if (!sk->dead)
2807 sk->write_space(sk);
2808 if (sk->keepopen)
2809 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2810 else
2811 delete_timer(sk);
2812 }
2813 else
2814 {
2815 if (sk->state != (unsigned char) sk->keepopen)
2816 {
2817 reset_timer(sk, TIME_WRITE, sk->rto);
2818 }
2819 if (sk->state == TCP_TIME_WAIT)
2820 {
2821 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2822 }
2823 }
2824 }
2825
2826 if (sk->packets_out == 0 && sk->partial != NULL &&
2827 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL)
2828 {
2829 flag |= 1;
2830 tcp_send_partial(sk);
2831 }
2832
2833 /*
2834 * In the LAST_ACK case, the other end FIN'd us. We then FIN'd them, and
2835 * we are now waiting for an acknowledge to our FIN. The other end is
2836 * already in TIME_WAIT.
2837 *
2838 * Move to TCP_CLOSE on success.
2839 */
2840
2841 if (sk->state == TCP_LAST_ACK)
2842 {
2843 if (!sk->dead)
2844 sk->state_change(sk);
2845 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq)
2846 {
2847 flag |= 1;
2848 sk->state = TCP_CLOSE;
2849 sk->shutdown = SHUTDOWN_MASK;
2850 }
2851 }
2852
2853 /*
2854 * Incomming ACK to a FIN we sent in the case of our initiating the close.
2855 *
2856 * Move to FIN_WAIT2 to await a FIN from the other end.
2857 */
2858
2859 if (sk->state == TCP_FIN_WAIT1)
2860 {
2861
2862 if (!sk->dead)
2863 sk->state_change(sk);
2864 if (sk->rcv_ack_seq == sk->write_seq)
2865 {
2866 flag |= 1;
2867 if (sk->acked_seq != sk->fin_seq)
2868 {
2869 tcp_time_wait(sk);
2870 }
2871 else
2872 {
2873 sk->shutdown = SHUTDOWN_MASK;
2874 sk->state = TCP_FIN_WAIT2;
2875 }
2876 }
2877 }
2878
2879 /*
2880 * Incoming ACK to a FIN we sent in the case of a simultanious close.
2881 *
2882 * Move to TIME_WAIT
2883 */
2884
2885 if (sk->state == TCP_CLOSING)
2886 {
2887
2888 if (!sk->dead)
2889 sk->state_change(sk);
2890 if (sk->rcv_ack_seq == sk->write_seq)
2891 {
2892 flag |= 1;
2893 tcp_time_wait(sk);
2894 }
2895 }
2896
2897 /*
2898 * I make no guarantees about the first clause in the following
2899 * test, i.e. "(!flag) || (flag&4)". I'm not entirely sure under
2900 * what conditions "!flag" would be true. However I think the rest
2901 * of the conditions would prevent that from causing any
2902 * unnecessary retransmission.
2903 * Clearly if the first packet has expired it should be
2904 * retransmitted. The other alternative, "flag&2 && retransmits", is
2905 * harder to explain: You have to look carefully at how and when the
2906 * timer is set and with what timeout. The most recent transmission always
2907 * sets the timer. So in general if the most recent thing has timed
2908 * out, everything before it has as well. So we want to go ahead and
2909 * retransmit some more. If we didn't explicitly test for this
2910 * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2911 * would not be true. If you look at the pattern of timing, you can
2912 * show that rto is increased fast enough that the next packet would
2913 * almost never be retransmitted immediately. Then you'd end up
2914 * waiting for a timeout to send each packet on the retranmission
2915 * queue. With my implementation of the Karn sampling algorithm,
2916 * the timeout would double each time. The net result is that it would
2917 * take a hideous amount of time to recover from a single dropped packet.
2918 * It's possible that there should also be a test for TIME_WRITE, but
2919 * I think as long as "send_head != NULL" and "retransmit" is on, we've
2920 * got to be in real retransmission mode.
2921 * Note that ip_do_retransmit is called with all==1. Setting cong_window
2922 * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
2923 * As long as no further losses occur, this seems reasonable.
2924 */
2925
2926 if (((!flag) || (flag&4)) && sk->send_head != NULL &&
2927 (((flag&2) && sk->retransmits) ||
2928 (sk->send_head->when + sk->rto < jiffies)))
2929 {
2930 ip_do_retransmit(sk, 1);
2931 reset_timer(sk, TIME_WRITE, sk->rto);
2932 }
2933
2934 return(1);
2935 }
2936
2937
2938 /*
2939 * This routine handles the data. If there is room in the buffer,
2940 * it will be have already been moved into it. If there is no
2941 * room, then we will just have to discard the packet.
2942 */
2943
2944 static int tcp_data(struct sk_buff *skb, struct sock *sk,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
2945 unsigned long saddr, unsigned short len)
2946 {
2947 struct sk_buff *skb1, *skb2;
2948 struct tcphdr *th;
2949 int dup_dumped=0;
2950 unsigned long new_seq;
2951
2952 th = skb->h.th;
2953 skb->len = len -(th->doff*4);
2954
2955 /* The bytes in the receive read/assembly queue has increased. Needed for the
2956 low memory discard algorithm */
2957
2958 sk->bytes_rcv += skb->len;
2959
2960 if (skb->len == 0 && !th->fin && !th->urg && !th->psh)
2961 {
2962 /*
2963 * Don't want to keep passing ack's back and forth.
2964 * (someone sent us dataless, boring frame)
2965 */
2966 if (!th->ack)
2967 tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
2968 kfree_skb(skb, FREE_READ);
2969 return(0);
2970 }
2971
2972 /*
2973 * We no longer have anyone receiving data on this connection.
2974 */
2975
2976 if(sk->shutdown & RCV_SHUTDOWN)
2977 {
2978 new_seq= th->seq + skb->len + th->syn; /* Right edge of _data_ part of frame */
2979
2980 if(after(new_seq,sk->copied_seq+1)) /* If the right edge of this frame is after the last copied byte
2981 then it contains data we will never touch. We send an RST to
2982 ensure the far end knows it never got to the application */
2983 {
2984 sk->acked_seq = new_seq + th->fin;
2985 tcp_reset(sk->saddr, sk->daddr, skb->h.th,
2986 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
2987 tcp_statistics.TcpEstabResets++;
2988 sk->state = TCP_CLOSE;
2989 sk->err = EPIPE;
2990 sk->shutdown = SHUTDOWN_MASK;
2991 kfree_skb(skb, FREE_READ);
2992 if (!sk->dead)
2993 sk->state_change(sk);
2994 return(0);
2995 }
2996 #if 0
2997 /* Discard the frame here - we've already proved its a duplicate */
2998
2999 kfree_skb(skb, FREE_READ);
3000 return(0);
3001 #endif
3002 }
3003 /*
3004 * Now we have to walk the chain, and figure out where this one
3005 * goes into it. This is set up so that the last packet we received
3006 * will be the first one we look at, that way if everything comes
3007 * in order, there will be no performance loss, and if they come
3008 * out of order we will be able to fit things in nicely.
3009 */
3010
3011 /*
3012 * This should start at the last one, and then go around forwards.
3013 */
3014
3015 if (skb_peek(&sk->receive_queue) == NULL) /* Empty queue is easy case */
3016 {
3017 skb_queue_head(&sk->receive_queue,skb);
3018 skb1= NULL;
3019 }
3020 else
3021 {
3022 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev)
3023 {
3024 if(sk->debug)
3025 {
3026 printk("skb1=%p :", skb1);
3027 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3028 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3029 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3030 sk->acked_seq);
3031 }
3032
3033 /*
3034 * Optimisation: Duplicate frame or extension of previous frame from
3035 * same sequence point (lost ack case).
3036 * The frame contains duplicate data or replaces a previous frame
3037 * discard the previous frame (safe as sk->inuse is set) and put
3038 * the new one in its place.
3039 */
3040
3041 if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3042 {
3043 skb_append(skb1,skb);
3044 skb_unlink(skb1);
3045 kfree_skb(skb1,FREE_READ);
3046 dup_dumped=1;
3047 skb1=NULL;
3048 break;
3049 }
3050
3051 /*
3052 * Found where it fits
3053 */
3054
3055 if (after(th->seq+1, skb1->h.th->seq))
3056 {
3057 skb_append(skb1,skb);
3058 break;
3059 }
3060
3061 /*
3062 * See if we've hit the start. If so insert.
3063 */
3064 if (skb1 == skb_peek(&sk->receive_queue))
3065 {
3066 skb_queue_head(&sk->receive_queue, skb);
3067 break;
3068 }
3069 }
3070 }
3071
3072 /*
3073 * Figure out what the ack value for this frame is
3074 */
3075
3076 th->ack_seq = th->seq + skb->len;
3077 if (th->syn)
3078 th->ack_seq++;
3079 if (th->fin)
3080 th->ack_seq++;
3081
3082 if (before(sk->acked_seq, sk->copied_seq))
3083 {
3084 printk("*** tcp.c:tcp_data bug acked < copied\n");
3085 sk->acked_seq = sk->copied_seq;
3086 }
3087
3088 /*
3089 * Now figure out if we can ack anything.
3090 */
3091
3092 if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1))
3093 {
3094 if (before(th->seq, sk->acked_seq+1))
3095 {
3096 int newwindow;
3097
3098 if (after(th->ack_seq, sk->acked_seq))
3099 {
3100 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3101 if (newwindow < 0)
3102 newwindow = 0;
3103 sk->window = newwindow;
3104 sk->acked_seq = th->ack_seq;
3105 }
3106 skb->acked = 1;
3107
3108 /*
3109 * When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3110 */
3111
3112 if (skb->h.th->fin)
3113 {
3114 if (!sk->dead)
3115 sk->state_change(sk);
3116 sk->shutdown |= RCV_SHUTDOWN;
3117 }
3118
3119 for(skb2 = skb->next;
3120 skb2 != (struct sk_buff *)&sk->receive_queue;
3121 skb2 = skb2->next)
3122 {
3123 if (before(skb2->h.th->seq, sk->acked_seq+1))
3124 {
3125 if (after(skb2->h.th->ack_seq, sk->acked_seq))
3126 {
3127 newwindow = sk->window -
3128 (skb2->h.th->ack_seq - sk->acked_seq);
3129 if (newwindow < 0)
3130 newwindow = 0;
3131 sk->window = newwindow;
3132 sk->acked_seq = skb2->h.th->ack_seq;
3133 }
3134 skb2->acked = 1;
3135 /*
3136 * When we ack the fin, we turn on
3137 * the RCV_SHUTDOWN flag.
3138 */
3139 if (skb2->h.th->fin)
3140 {
3141 sk->shutdown |= RCV_SHUTDOWN;
3142 if (!sk->dead)
3143 sk->state_change(sk);
3144 }
3145
3146 /*
3147 * Force an immediate ack.
3148 */
3149
3150 sk->ack_backlog = sk->max_ack_backlog;
3151 }
3152 else
3153 {
3154 break;
3155 }
3156 }
3157
3158 /*
3159 * This also takes care of updating the window.
3160 * This if statement needs to be simplified.
3161 */
3162 if (!sk->delay_acks ||
3163 sk->ack_backlog >= sk->max_ack_backlog ||
3164 sk->bytes_rcv > sk->max_unacked || th->fin) {
3165 /* tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3166 }
3167 else
3168 {
3169 sk->ack_backlog++;
3170 if(sk->debug)
3171 printk("Ack queued.\n");
3172 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3173 }
3174 }
3175 }
3176
3177 /*
3178 * If we've missed a packet, send an ack.
3179 * Also start a timer to send another.
3180 */
3181
3182 if (!skb->acked)
3183 {
3184
3185 /*
3186 * This is important. If we don't have much room left,
3187 * we need to throw out a few packets so we have a good
3188 * window. Note that mtu is used, not mss, because mss is really
3189 * for the send side. He could be sending us stuff as large as mtu.
3190 */
3191
3192 while (sk->prot->rspace(sk) < sk->mtu)
3193 {
3194 skb1 = skb_peek(&sk->receive_queue);
3195 if (skb1 == NULL)
3196 {
3197 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3198 break;
3199 }
3200
3201 /*
3202 * Don't throw out something that has been acked.
3203 */
3204
3205 if (skb1->acked)
3206 {
3207 break;
3208 }
3209
3210 skb_unlink(skb1);
3211 kfree_skb(skb1, FREE_READ);
3212 }
3213 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3214 sk->ack_backlog++;
3215 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3216 }
3217 else
3218 {
3219 /* We missed a packet. Send an ack to try to resync things. */
3220 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3221 }
3222
3223 /*
3224 * Now tell the user we may have some data.
3225 */
3226
3227 if (!sk->dead)
3228 {
3229 if(sk->debug)
3230 printk("Data wakeup.\n");
3231 sk->data_ready(sk,0);
3232 }
3233 return(0);
3234 }
3235
3236
3237 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
3238 {
3239 unsigned long ptr = ntohs(th->urg_ptr);
3240
3241 if (ptr)
3242 ptr--;
3243 ptr += th->seq;
3244
3245 /* ignore urgent data that we've already seen and read */
3246 if (after(sk->copied_seq+1, ptr))
3247 return;
3248
3249 /* do we already have a newer (or duplicate) urgent pointer? */
3250 if (sk->urg_data && !after(ptr, sk->urg_seq))
3251 return;
3252
3253 /* tell the world about our new urgent pointer */
3254 if (sk->proc != 0) {
3255 if (sk->proc > 0) {
3256 kill_proc(sk->proc, SIGURG, 1);
3257 } else {
3258 kill_pg(-sk->proc, SIGURG, 1);
3259 }
3260 }
3261 sk->urg_data = URG_NOTYET;
3262 sk->urg_seq = ptr;
3263 }
3264
3265 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
3266 unsigned long saddr, unsigned long len)
3267 {
3268 unsigned long ptr;
3269
3270 /* check if we get a new urgent pointer */
3271 if (th->urg)
3272 tcp_check_urg(sk,th);
3273
3274 /* do we wait for any urgent data? */
3275 if (sk->urg_data != URG_NOTYET)
3276 return 0;
3277
3278 /* is the urgent pointer pointing into this packet? */
3279 ptr = sk->urg_seq - th->seq + th->doff*4;
3280 if (ptr >= len)
3281 return 0;
3282
3283 /* ok, got the correct packet, update info */
3284 sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3285 if (!sk->dead)
3286 sk->data_ready(sk,0);
3287 return 0;
3288 }
3289
3290
3291 /*
3292 * This deals with incoming fins. 'Linus at 9 O'clock' 8-)
3293 *
3294 * If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3295 * (and thence onto LAST-ACK and finally, CLOSED, we never enter
3296 * TIME-WAIT)
3297 *
3298 * If we are in FINWAIT-1, a received FIN indicates simultanious
3299 * close and we go into CLOSING (and later onto TIME-WAIT)
3300 *
3301 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3302 *
3303 */
3304
3305 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
3306 unsigned long saddr, struct device *dev)
3307 {
3308 sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3309
3310 if (!sk->dead)
3311 {
3312 sk->state_change(sk);
3313 }
3314
3315 switch(sk->state)
3316 {
3317 case TCP_SYN_RECV:
3318 case TCP_SYN_SENT:
3319 case TCP_ESTABLISHED:
3320 /*
3321 * move to CLOSE_WAIT, tcp_data() already handled
3322 * sending the ack.
3323 */
3324 reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3325 /*sk->fin_seq = th->seq+1;*/
3326 tcp_statistics.TcpCurrEstab--;
3327 sk->state = TCP_CLOSE_WAIT;
3328 if (th->rst)
3329 sk->shutdown = SHUTDOWN_MASK;
3330 break;
3331
3332 case TCP_CLOSE_WAIT:
3333 case TCP_CLOSING:
3334 /*
3335 * received a retransmission of the FIN, do
3336 * nothing.
3337 */
3338 break;
3339 case TCP_TIME_WAIT:
3340 /*
3341 * received a retransmission of the FIN,
3342 * restart the TIME_WAIT timer.
3343 */
3344 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3345 return(0);
3346 case TCP_FIN_WAIT1:
3347 /*
3348 * This case occurs when a simultanious close
3349 * happens, we must ack the received FIN and
3350 * enter the CLOSING state.
3351 *
3352 * XXX timeout not set properly
3353 */
3354
3355 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3356 /*sk->fin_seq = th->seq+1;*/
3357 sk->state = TCP_CLOSING;
3358 break;
3359 case TCP_FIN_WAIT2:
3360 /*
3361 * received a FIN -- send ACK and enter TIME_WAIT
3362 */
3363 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3364 /*sk->fin_seq = th->seq+1;*/
3365 sk->state = TCP_TIME_WAIT;
3366 break;
3367 case TCP_CLOSE:
3368 /*
3369 * already in CLOSE
3370 */
3371 break;
3372 default:
3373 sk->state = TCP_LAST_ACK;
3374
3375 /* Start the timers. */
3376 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3377 return(0);
3378 }
3379 sk->ack_backlog++;
3380
3381 return(0);
3382 }
3383
3384
3385 /* This will accept the next outstanding connection. */
3386 static struct sock *
3387 tcp_accept(struct sock *sk, int flags)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
3388 {
3389 struct sock *newsk;
3390 struct sk_buff *skb;
3391
3392 /*
3393 * We need to make sure that this socket is listening,
3394 * and that it has something pending.
3395 */
3396
3397 if (sk->state != TCP_LISTEN)
3398 {
3399 sk->err = EINVAL;
3400 return(NULL);
3401 }
3402
3403 /* Avoid the race. */
3404 cli();
3405 sk->inuse = 1;
3406
3407 while((skb = skb_dequeue(&sk->receive_queue)) == NULL)
3408 {
3409 if (flags & O_NONBLOCK)
3410 {
3411 sti();
3412 release_sock(sk);
3413 sk->err = EAGAIN;
3414 return(NULL);
3415 }
3416
3417 release_sock(sk);
3418 interruptible_sleep_on(sk->sleep);
3419 if (current->signal & ~current->blocked)
3420 {
3421 sti();
3422 sk->err = ERESTARTSYS;
3423 return(NULL);
3424 }
3425 sk->inuse = 1;
3426 }
3427 sti();
3428
3429 /*
3430 * Now all we need to do is return skb->sk.
3431 */
3432
3433 newsk = skb->sk;
3434
3435 kfree_skb(skb, FREE_READ);
3436 sk->ack_backlog--;
3437 release_sock(sk);
3438 return(newsk);
3439 }
3440
3441
3442 /*
3443 * This will initiate an outgoing connection.
3444 */
3445
3446 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
3447 {
3448 struct sk_buff *buff;
3449 struct device *dev=NULL;
3450 unsigned char *ptr;
3451 int tmp;
3452 struct tcphdr *t1;
3453 struct rtable *rt;
3454
3455 if (sk->state != TCP_CLOSE)
3456 return(-EISCONN);
3457
3458 if (addr_len < 8)
3459 return(-EINVAL);
3460
3461 if (usin->sin_family && usin->sin_family != AF_INET)
3462 return(-EAFNOSUPPORT);
3463
3464 /*
3465 * connect() to INADDR_ANY means loopback (BSD'ism).
3466 */
3467
3468 if(usin->sin_addr.s_addr==INADDR_ANY)
3469 usin->sin_addr.s_addr=ip_my_addr();
3470
3471 /*
3472 * Don't want a TCP connection going to a broadcast address
3473 */
3474
3475 if (ip_chk_addr(usin->sin_addr.s_addr) == IS_BROADCAST)
3476 {
3477 return -ENETUNREACH;
3478 }
3479
3480 /*
3481 * Connect back to the same socket: Blows up so disallow it
3482 */
3483
3484 if(sk->saddr == usin->sin_addr.s_addr && sk->num==ntohs(usin->sin_port))
3485 return -EBUSY;
3486
3487 sk->inuse = 1;
3488 sk->daddr = usin->sin_addr.s_addr;
3489 sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3490 sk->window_seq = sk->write_seq;
3491 sk->rcv_ack_seq = sk->write_seq -1;
3492 sk->err = 0;
3493 sk->dummy_th.dest = usin->sin_port;
3494 release_sock(sk);
3495
3496 buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3497 if (buff == NULL)
3498 {
3499 return(-ENOMEM);
3500 }
3501 sk->inuse = 1;
3502 buff->len = 24;
3503 buff->sk = sk;
3504 buff->free = 1;
3505 buff->localroute = sk->localroute;
3506
3507 t1 = (struct tcphdr *) buff->data;
3508
3509 /*
3510 * Put in the IP header and routing stuff.
3511 */
3512
3513 rt=ip_rt_route(sk->daddr, NULL, NULL);
3514
3515
3516 /*
3517 * We need to build the routing stuff fromt the things saved in skb.
3518 */
3519
3520 tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3521 IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3522 if (tmp < 0)
3523 {
3524 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3525 release_sock(sk);
3526 return(-ENETUNREACH);
3527 }
3528
3529 buff->len += tmp;
3530 t1 = (struct tcphdr *)((char *)t1 +tmp);
3531
3532 memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3533 t1->seq = ntohl(sk->write_seq++);
3534 sk->sent_seq = sk->write_seq;
3535 buff->h.seq = sk->write_seq;
3536 t1->ack = 0;
3537 t1->window = 2;
3538 t1->res1=0;
3539 t1->res2=0;
3540 t1->rst = 0;
3541 t1->urg = 0;
3542 t1->psh = 0;
3543 t1->syn = 1;
3544 t1->urg_ptr = 0;
3545 t1->doff = 6;
3546 /* use 512 or whatever user asked for */
3547
3548 if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
3549 sk->window_clamp=rt->rt_window;
3550 else
3551 sk->window_clamp=0;
3552
3553 if (sk->user_mss)
3554 sk->mtu = sk->user_mss;
3555 else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
3556 sk->mtu = rt->rt_mss;
3557 else
3558 {
3559 #ifdef CONFIG_INET_SNARL
3560 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3561 #else
3562 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3563 #endif
3564 sk->mtu = 576 - HEADER_SIZE;
3565 else
3566 sk->mtu = MAX_WINDOW;
3567 }
3568 /*
3569 * but not bigger than device MTU
3570 */
3571
3572 if(sk->mtu <32)
3573 sk->mtu = 32; /* Sanity limit */
3574
3575 sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3576
3577 /*
3578 * Put in the TCP options to say MTU.
3579 */
3580
3581 ptr = (unsigned char *)(t1+1);
3582 ptr[0] = 2;
3583 ptr[1] = 4;
3584 ptr[2] = (sk->mtu) >> 8;
3585 ptr[3] = (sk->mtu) & 0xff;
3586 tcp_send_check(t1, sk->saddr, sk->daddr,
3587 sizeof(struct tcphdr) + 4, sk);
3588
3589 /*
3590 * This must go first otherwise a really quick response will get reset.
3591 */
3592
3593 sk->state = TCP_SYN_SENT;
3594 /* sk->rtt = TCP_CONNECT_TIME;*/
3595 sk->rto = TCP_TIMEOUT_INIT;
3596 reset_timer(sk, TIME_WRITE, sk->rto); /* Timer for repeating the SYN until an answer */
3597 sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3598
3599 sk->prot->queue_xmit(sk, dev, buff, 0);
3600 tcp_statistics.TcpActiveOpens++;
3601 tcp_statistics.TcpOutSegs++;
3602
3603 release_sock(sk);
3604 return(0);
3605 }
3606
3607
3608 /* This functions checks to see if the tcp header is actually acceptable. */
3609 static int
3610 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
3611 struct options *opt, unsigned long saddr, struct device *dev)
3612 {
3613 unsigned long next_seq;
3614
3615 next_seq = len - 4*th->doff;
3616 if (th->fin)
3617 next_seq++;
3618 /* if we have a zero window, we can't have any data in the packet.. */
3619 if (next_seq && !sk->window)
3620 goto ignore_it;
3621 next_seq += th->seq;
3622
3623 /*
3624 * This isn't quite right. sk->acked_seq could be more recent
3625 * than sk->window. This is however close enough. We will accept
3626 * slightly more packets than we should, but it should not cause
3627 * problems unless someone is trying to forge packets.
3628 */
3629
3630 /* have we already seen all of this packet? */
3631 if (!after(next_seq+1, sk->acked_seq))
3632 goto ignore_it;
3633 /* or does it start beyond the window? */
3634 if (!before(th->seq, sk->acked_seq + sk->window + 1))
3635 goto ignore_it;
3636
3637 /* ok, at least part of this packet would seem interesting.. */
3638 return 1;
3639
3640 ignore_it:
3641 if (th->rst)
3642 return 0;
3643
3644 /*
3645 * Send a reset if we get something not ours and we are
3646 * unsynchronized. Note: We don't do anything to our end. We
3647 * are just killing the bogus remote connection then we will
3648 * connect again and it will work (with luck).
3649 */
3650
3651 if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3652 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3653 return 1;
3654 }
3655
3656 /* Try to resync things. */
3657 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3658 return 0;
3659 }
3660
3661
3662 #ifdef TCP_FASTPATH
3663 /*
3664 * Is the end of the queue clear of fragments as yet unmerged into the data stream
3665 * Yes if
3666 * a) The queue is empty
3667 * b) The last frame on the queue has the acked flag set
3668 */
3669
3670 static inline int tcp_clean_end(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
3671 {
3672 struct sk_buff *skb=skb_peek(&sk->receive_queue);
3673 if(skb==NULL || sk->receive_queue.prev->acked)
3674 return 1;
3675 }
3676
3677 #endif
3678
3679 int
3680 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
3681 unsigned long daddr, unsigned short len,
3682 unsigned long saddr, int redo, struct inet_protocol * protocol)
3683 {
3684 struct tcphdr *th;
3685 struct sock *sk;
3686
3687 if (!skb)
3688 {
3689 return(0);
3690 }
3691
3692 if (!dev)
3693 {
3694 return(0);
3695 }
3696
3697 tcp_statistics.TcpInSegs++;
3698
3699 if(skb->pkt_type!=PACKET_HOST)
3700 {
3701 kfree_skb(skb,FREE_READ);
3702 return(0);
3703 }
3704
3705 th = skb->h.th;
3706
3707 /*
3708 * Find the socket.
3709 */
3710
3711 sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3712
3713 /*
3714 * If this socket has got a reset its to all intents and purposes
3715 * really dead
3716 */
3717
3718 if (sk!=NULL && sk->zapped)
3719 sk=NULL;
3720
3721 if (!redo)
3722 {
3723 if (tcp_check(th, len, saddr, daddr ))
3724 {
3725 skb->sk = NULL;
3726 kfree_skb(skb,FREE_READ);
3727 /*
3728 * We don't release the socket because it was
3729 * never marked in use.
3730 */
3731 return(0);
3732 }
3733 th->seq = ntohl(th->seq);
3734
3735 /* See if we know about the socket. */
3736 if (sk == NULL)
3737 {
3738 if (!th->rst)
3739 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3740 skb->sk = NULL;
3741 kfree_skb(skb, FREE_READ);
3742 return(0);
3743 }
3744
3745 skb->len = len;
3746 skb->sk = sk;
3747 skb->acked = 0;
3748 skb->used = 0;
3749 skb->free = 0;
3750 skb->saddr = daddr;
3751 skb->daddr = saddr;
3752
3753 /* We may need to add it to the backlog here. */
3754 cli();
3755 if (sk->inuse)
3756 {
3757 skb_queue_head(&sk->back_log, skb);
3758 sti();
3759 return(0);
3760 }
3761 sk->inuse = 1;
3762 sti();
3763 }
3764 else
3765 {
3766 if (!sk)
3767 {
3768 return(0);
3769 }
3770 }
3771
3772
3773 if (!sk->prot)
3774 {
3775 return(0);
3776 }
3777
3778
3779 /*
3780 * Charge the memory to the socket.
3781 */
3782
3783 if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf)
3784 {
3785 skb->sk = NULL;
3786 kfree_skb(skb, FREE_READ);
3787 release_sock(sk);
3788 return(0);
3789 }
3790
3791 sk->rmem_alloc += skb->mem_len;
3792
3793 #ifdef TCP_FASTPATH
3794 /*
3795 * Incoming data stream fastpath.
3796 *
3797 * We try to optimise two things.
3798 * 1) Spot general data arriving without funny options and skip extra checks and the switch.
3799 * 2) Spot the common case in raw data receive streams of a packet that has no funny options,
3800 * fits exactly on the end of the current queue and may or may not have the ack bit set.
3801 *
3802 * Case two especially is done inline in this routine so there are no long jumps causing heavy
3803 * cache thrashing, no function call overhead (except for the ack sending if needed) and for
3804 * speed although further optimizing here is possible.
3805 */
3806
3807 /* Im trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3808 if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
3809 {
3810 /* Packets in order. Fits window */
3811 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3812 {
3813 /* Ack is harder */
3814 if(th->ack && !tcp_ack(sk, th, saddr, len))
3815 {
3816 kfree_skb(skb, FREE_READ);
3817 release_sock(sk);
3818 return 0;
3819 }
3820 /*
3821 * Set up variables
3822 */
3823 skb->len -= (th->doff *4);
3824 sk->bytes_rcv += skb->len;
3825 tcp_rx_hit2++;
3826 if(skb->len)
3827 {
3828 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3829 if(sk->window >= skb->len)
3830 sk->window-=skb->len; /* We know its effect on the window */
3831 else
3832 sk->window=0;
3833 sk->acked_seq = th->seq+skb->len; /* Easy */
3834 skb->acked=1; /* Guaranteed true */
3835 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog ||
3836 sk->bytes_rcv > sk->max_unacked)
3837 {
3838 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3839 }
3840 else
3841 {
3842 sk->ack_backlog++;
3843 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3844 }
3845 if(!sk->dead)
3846 sk->data_ready(sk,0);
3847 return 0;
3848 }
3849 }
3850 /*
3851 * More generic case of arriving data stream in ESTABLISHED
3852 */
3853 tcp_rx_hit1++;
3854 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3855 {
3856 kfree_skb(skb, FREE_READ);
3857 release_sock(sk);
3858 return 0;
3859 }
3860 if(th->ack && !tcp_ack(sk, th, saddr, len))
3861 {
3862 kfree_skb(skb, FREE_READ);
3863 release_sock(sk);
3864 return 0;
3865 }
3866 if(tcp_data(skb, sk, saddr, len))
3867 kfree_skb(skb, FREE_READ);
3868 release_sock(sk);
3869 return 0;
3870 }
3871 tcp_rx_miss++;
3872 #endif
3873
3874 /*
3875 * Now deal with all cases.
3876 */
3877
3878 switch(sk->state)
3879 {
3880
3881 /*
3882 * This should close the system down if it's waiting
3883 * for an ack that is never going to be sent.
3884 */
3885 case TCP_LAST_ACK:
3886 if (th->rst)
3887 {
3888 sk->zapped=1;
3889 sk->err = ECONNRESET;
3890 sk->state = TCP_CLOSE;
3891 sk->shutdown = SHUTDOWN_MASK;
3892 if (!sk->dead)
3893 {
3894 sk->state_change(sk);
3895 }
3896 kfree_skb(skb, FREE_READ);
3897 release_sock(sk);
3898 return(0);
3899 }
3900
3901 case TCP_ESTABLISHED:
3902 case TCP_CLOSE_WAIT:
3903 case TCP_CLOSING:
3904 case TCP_FIN_WAIT1:
3905 case TCP_FIN_WAIT2:
3906 case TCP_TIME_WAIT:
3907 if (!tcp_sequence(sk, th, len, opt, saddr,dev))
3908 {
3909 kfree_skb(skb, FREE_READ);
3910 release_sock(sk);
3911 return(0);
3912 }
3913
3914 if (th->rst)
3915 {
3916 tcp_statistics.TcpEstabResets++;
3917 tcp_statistics.TcpCurrEstab--;
3918 sk->zapped=1;
3919 /* This means the thing should really be closed. */
3920 sk->err = ECONNRESET;
3921 if (sk->state == TCP_CLOSE_WAIT)
3922 {
3923 sk->err = EPIPE;
3924 }
3925
3926 /*
3927 * A reset with a fin just means that
3928 * the data was not all read.
3929 */
3930 sk->state = TCP_CLOSE;
3931 sk->shutdown = SHUTDOWN_MASK;
3932 if (!sk->dead)
3933 {
3934 sk->state_change(sk);
3935 }
3936 kfree_skb(skb, FREE_READ);
3937 release_sock(sk);
3938 return(0);
3939 }
3940 if (th->syn)
3941 {
3942 tcp_statistics.TcpCurrEstab--;
3943 tcp_statistics.TcpEstabResets++;
3944 sk->err = ECONNRESET;
3945 sk->state = TCP_CLOSE;
3946 sk->shutdown = SHUTDOWN_MASK;
3947 tcp_reset(daddr, saddr, th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
3948 if (!sk->dead) {
3949 sk->state_change(sk);
3950 }
3951 kfree_skb(skb, FREE_READ);
3952 release_sock(sk);
3953 return(0);
3954 }
3955
3956 if (th->ack && !tcp_ack(sk, th, saddr, len)) {
3957 kfree_skb(skb, FREE_READ);
3958 release_sock(sk);
3959 return(0);
3960 }
3961
3962 if (tcp_urg(sk, th, saddr, len)) {
3963 kfree_skb(skb, FREE_READ);
3964 release_sock(sk);
3965 return(0);
3966 }
3967
3968
3969 if (tcp_data(skb, sk, saddr, len)) {
3970 kfree_skb(skb, FREE_READ);
3971 release_sock(sk);
3972 return(0);
3973 }
3974
3975 if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
3976 kfree_skb(skb, FREE_READ);
3977 release_sock(sk);
3978 return(0);
3979 }
3980
3981 release_sock(sk);
3982 return(0);
3983
3984 case TCP_CLOSE:
3985 if (sk->dead || sk->daddr) {
3986 kfree_skb(skb, FREE_READ);
3987 release_sock(sk);
3988 return(0);
3989 }
3990
3991 if (!th->rst) {
3992 if (!th->ack)
3993 th->ack_seq = 0;
3994 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3995 }
3996 kfree_skb(skb, FREE_READ);
3997 release_sock(sk);
3998 return(0);
3999
4000 case TCP_LISTEN:
4001 if (th->rst) {
4002 kfree_skb(skb, FREE_READ);
4003 release_sock(sk);
4004 return(0);
4005 }
4006 if (th->ack) {
4007 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4008 kfree_skb(skb, FREE_READ);
4009 release_sock(sk);
4010 return(0);
4011 }
4012
4013 if (th->syn)
4014 {
4015 /*
4016 * Now we just put the whole thing including
4017 * the header and saddr, and protocol pointer
4018 * into the buffer. We can't respond until the
4019 * user tells us to accept the connection.
4020 */
4021 tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
4022 release_sock(sk);
4023 return(0);
4024 }
4025
4026 kfree_skb(skb, FREE_READ);
4027 release_sock(sk);
4028 return(0);
4029
4030 case TCP_SYN_RECV:
4031 if (th->syn) {
4032 /* Probably a retransmitted syn */
4033 kfree_skb(skb, FREE_READ);
4034 release_sock(sk);
4035 return(0);
4036 }
4037
4038
4039 default:
4040 if (!tcp_sequence(sk, th, len, opt, saddr,dev))
4041 {
4042 kfree_skb(skb, FREE_READ);
4043 release_sock(sk);
4044 return(0);
4045 }
4046
4047 case TCP_SYN_SENT:
4048 if (th->rst)
4049 {
4050 tcp_statistics.TcpAttemptFails++;
4051 sk->err = ECONNREFUSED;
4052 sk->state = TCP_CLOSE;
4053 sk->shutdown = SHUTDOWN_MASK;
4054 sk->zapped = 1;
4055 if (!sk->dead)
4056 {
4057 sk->state_change(sk);
4058 }
4059 kfree_skb(skb, FREE_READ);
4060 release_sock(sk);
4061 return(0);
4062 }
4063 if (!th->ack)
4064 {
4065 if (th->syn)
4066 {
4067 sk->state = TCP_SYN_RECV;
4068 }
4069 kfree_skb(skb, FREE_READ);
4070 release_sock(sk);
4071 return(0);
4072 }
4073
4074 switch(sk->state)
4075 {
4076 case TCP_SYN_SENT:
4077 if (!tcp_ack(sk, th, saddr, len))
4078 {
4079 tcp_statistics.TcpAttemptFails++;
4080 tcp_reset(daddr, saddr, th,
4081 sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4082 kfree_skb(skb, FREE_READ);
4083 release_sock(sk);
4084 return(0);
4085 }
4086
4087 /*
4088 * If the syn bit is also set, switch to
4089 * tcp_syn_recv, and then to established.
4090 */
4091 if (!th->syn)
4092 {
4093 kfree_skb(skb, FREE_READ);
4094 release_sock(sk);
4095 return(0);
4096 }
4097
4098 /* Ack the syn and fall through. */
4099 sk->acked_seq = th->seq+1;
4100 sk->fin_seq = th->seq;
4101 tcp_send_ack(sk->sent_seq, th->seq+1,
4102 sk, th, sk->daddr);
4103
4104 case TCP_SYN_RECV:
4105 if (!tcp_ack(sk, th, saddr, len))
4106 {
4107 tcp_statistics.TcpAttemptFails++;
4108 tcp_reset(daddr, saddr, th,
4109 sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4110 kfree_skb(skb, FREE_READ);
4111 release_sock(sk);
4112 return(0);
4113 }
4114
4115 tcp_statistics.TcpCurrEstab++;
4116 sk->state = TCP_ESTABLISHED;
4117
4118 /*
4119 * Now we need to finish filling out
4120 * some of the tcp header.
4121 *
4122 * We need to check for mtu info.
4123 */
4124 tcp_options(sk, th);
4125 sk->dummy_th.dest = th->source;
4126 sk->copied_seq = sk->acked_seq-1;
4127 if (!sk->dead)
4128 {
4129 sk->state_change(sk);
4130 }
4131
4132 /*
4133 * We've already processed his first
4134 * ack. In just about all cases that
4135 * will have set max_window. This is
4136 * to protect us against the possibility
4137 * that the initial window he sent was 0.
4138 * This must occur after tcp_options, which
4139 * sets sk->mtu.
4140 */
4141 if (sk->max_window == 0)
4142 {
4143 sk->max_window = 32;
4144 sk->mss = min(sk->max_window, sk->mtu);
4145 }
4146
4147 /*
4148 * Now process the rest like we were
4149 * already in the established state.
4150 */
4151 if (th->urg)
4152 {
4153 if (tcp_urg(sk, th, saddr, len))
4154 {
4155 kfree_skb(skb, FREE_READ);
4156 release_sock(sk);
4157 return(0);
4158 }
4159 }
4160 if (tcp_data(skb, sk, saddr, len))
4161 kfree_skb(skb, FREE_READ);
4162
4163 if (th->fin)
4164 tcp_fin(skb, sk, th, saddr, dev);
4165 release_sock(sk);
4166 return(0);
4167 }
4168
4169 if (th->urg)
4170 {
4171 if (tcp_urg(sk, th, saddr, len))
4172 {
4173 kfree_skb(skb, FREE_READ);
4174 release_sock(sk);
4175 return(0);
4176 }
4177 }
4178 if (tcp_data(skb, sk, saddr, len))
4179 {
4180 kfree_skb(skb, FREE_READ);
4181 release_sock(sk);
4182 return(0);
4183 }
4184
4185 if (!th->fin)
4186 {
4187 release_sock(sk);
4188 return(0);
4189 }
4190 tcp_fin(skb, sk, th, saddr, dev);
4191 release_sock(sk);
4192 return(0);
4193 }
4194 }
4195
4196
4197 /*
4198 * This routine sends a packet with an out of date sequence
4199 * number. It assumes the other end will try to ack it.
4200 */
4201
4202 static void tcp_write_wakeup(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
4203 {
4204 struct sk_buff *buff;
4205 struct tcphdr *t1;
4206 struct device *dev=NULL;
4207 int tmp;
4208
4209 if (sk->zapped)
4210 return; /* Afer a valid reset we can send no more */
4211
4212 /*
4213 * Write data can still be transmitted/retransmitted in the
4214 * following states. If any other state is encountered, return.
4215 */
4216
4217 if (sk->state != TCP_ESTABLISHED &&
4218 sk->state != TCP_CLOSE_WAIT &&
4219 sk->state != TCP_FIN_WAIT1 &&
4220 sk->state != TCP_LAST_ACK &&
4221 sk->state != TCP_CLOSING
4222 ) {
4223 return;
4224 }
4225
4226 buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4227 if (buff == NULL)
4228 return;
4229
4230 buff->len = sizeof(struct tcphdr);
4231 buff->free = 1;
4232 buff->sk = sk;
4233 buff->localroute = sk->localroute;
4234
4235 t1 = (struct tcphdr *) buff->data;
4236
4237 /* Put in the IP header and routing stuff. */
4238 tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4239 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4240 if (tmp < 0)
4241 {
4242 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4243 return;
4244 }
4245
4246 buff->len += tmp;
4247 t1 = (struct tcphdr *)((char *)t1 +tmp);
4248
4249 memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4250
4251 /*
4252 * Use a previous sequence.
4253 * This should cause the other end to send an ack.
4254 */
4255 t1->seq = htonl(sk->sent_seq-1);
4256 t1->ack = 1;
4257 t1->res1= 0;
4258 t1->res2= 0;
4259 t1->rst = 0;
4260 t1->urg = 0;
4261 t1->psh = 0;
4262 t1->fin = 0;
4263 t1->syn = 0;
4264 t1->ack_seq = ntohl(sk->acked_seq);
4265 t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
4266 t1->doff = sizeof(*t1)/4;
4267 tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4268
4269 /* Send it and free it.
4270 * This will prevent the timer from automatically being restarted.
4271 */
4272 sk->prot->queue_xmit(sk, dev, buff, 1);
4273 tcp_statistics.TcpOutSegs++;
4274 }
4275
4276 void
4277 tcp_send_probe0(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
4278 {
4279 if (sk->zapped)
4280 return; /* Afer a valid reset we can send no more */
4281
4282 tcp_write_wakeup(sk);
4283
4284 sk->backoff++;
4285 sk->rto = min(sk->rto << 1, 120*HZ);
4286 reset_timer (sk, TIME_PROBE0, sk->rto);
4287 sk->retransmits++;
4288 sk->prot->retransmits ++;
4289 }
4290
4291 /*
4292 * Socket option code for TCP.
4293 */
4294
4295 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
4296 {
4297 int val,err;
4298
4299 if(level!=SOL_TCP)
4300 return ip_setsockopt(sk,level,optname,optval,optlen);
4301
4302 if (optval == NULL)
4303 return(-EINVAL);
4304
4305 err=verify_area(VERIFY_READ, optval, sizeof(int));
4306 if(err)
4307 return err;
4308
4309 val = get_fs_long((unsigned long *)optval);
4310
4311 switch(optname)
4312 {
4313 case TCP_MAXSEG:
4314 /* if(val<200||val>2048 || val>sk->mtu) */
4315 /*
4316 * values greater than interface MTU won't take effect. however at
4317 * the point when this call is done we typically don't yet know
4318 * which interface is going to be used
4319 */
4320 if(val<1||val>MAX_WINDOW)
4321 return -EINVAL;
4322 sk->user_mss=val;
4323 return 0;
4324 case TCP_NODELAY:
4325 sk->nonagle=(val==0)?0:1;
4326 return 0;
4327 default:
4328 return(-ENOPROTOOPT);
4329 }
4330 }
4331
4332 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
4333 {
4334 int val,err;
4335
4336 if(level!=SOL_TCP)
4337 return ip_getsockopt(sk,level,optname,optval,optlen);
4338
4339 switch(optname)
4340 {
4341 case TCP_MAXSEG:
4342 val=sk->user_mss;
4343 break;
4344 case TCP_NODELAY:
4345 val=sk->nonagle; /* Until Johannes stuff is in */
4346 break;
4347 default:
4348 return(-ENOPROTOOPT);
4349 }
4350 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4351 if(err)
4352 return err;
4353 put_fs_long(sizeof(int),(unsigned long *) optlen);
4354
4355 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4356 if(err)
4357 return err;
4358 put_fs_long(val,(unsigned long *)optval);
4359
4360 return(0);
4361 }
4362
4363
4364 struct proto tcp_prot = {
4365 sock_wmalloc,
4366 sock_rmalloc,
4367 sock_wfree,
4368 sock_rfree,
4369 sock_rspace,
4370 sock_wspace,
4371 tcp_close,
4372 tcp_read,
4373 tcp_write,
4374 tcp_sendto,
4375 tcp_recvfrom,
4376 ip_build_header,
4377 tcp_connect,
4378 tcp_accept,
4379 ip_queue_xmit,
4380 tcp_retransmit,
4381 tcp_write_wakeup,
4382 tcp_read_wakeup,
4383 tcp_rcv,
4384 tcp_select,
4385 tcp_ioctl,
4386 NULL,
4387 tcp_shutdown,
4388 tcp_setsockopt,
4389 tcp_getsockopt,
4390 128,
4391 0,
4392 {NULL,},
4393 "TCP"
4394 };